Add suggest_cache command and tests

This commit is contained in:
Jordan Wages 2025-07-19 02:07:23 -05:00
commit 7f996fe123
2 changed files with 169 additions and 0 deletions

View file

@ -143,5 +143,59 @@ def check_missing_domains(json_output: bool = typer.Option(False, "--json", help
typer.echo(d)
@app.command("suggest-cache")
def suggest_cache(
threshold: int = typer.Option(
10, help="Minimum number of MISS entries to report"
),
json_output: bool = typer.Option(False, "--json", help="Output results as JSON"),
) -> None:
"""Suggest domain/path pairs that could benefit from caching.
Paths with at least ``threshold`` ``MISS`` entries are shown for domains
whose server blocks lack a ``proxy_cache`` directive.
"""
# Discover domains without explicit proxy_cache
paths = nginx_config.discover_configs()
servers = nginx_config.parse_servers(paths)
no_cache: Set[str] = set()
for server in servers:
if "proxy_cache" in server:
continue
for name in server.get("server_name", "").split():
if name:
no_cache.add(name)
conn = _connect()
cur = conn.cursor()
cur.execute(
"""
SELECT host,
substr(request, instr(request, ' ')+1,
instr(request, ' HTTP') - instr(request, ' ') - 1) AS path,
COUNT(*) AS miss_count
FROM logs
WHERE cache_status = 'MISS'
GROUP BY host, path
HAVING miss_count >= ?
ORDER BY miss_count DESC
""",
(threshold,),
)
rows = [r for r in cur.fetchall() if r[0] in no_cache]
conn.close()
if json_output:
result = [
{"host": host, "path": path, "misses": count} for host, path, count in rows
]
typer.echo(json.dumps(result))
else:
for host, path, count in rows:
typer.echo(f"{host} {path} {count}")
if __name__ == "__main__":
app()

View file

@ -88,3 +88,118 @@ server {
analyze.check_missing_domains(json_output=True)
out_json = json.loads(capsys.readouterr().out.strip())
assert out_json == ["missing.com"]
def test_suggest_cache(tmp_path, monkeypatch, capsys):
db_path = tmp_path / "database" / "ngxstat.db"
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(db_path)
cur = conn.cursor()
cur.execute(
"""
CREATE TABLE logs (
id INTEGER PRIMARY KEY,
ip TEXT,
host TEXT,
time TEXT,
request TEXT,
status INTEGER,
bytes_sent INTEGER,
referer TEXT,
user_agent TEXT,
cache_status TEXT
)
"""
)
entries = [
(
"127.0.0.1",
"example.com",
"2024-01-01 10:00:00",
"GET /foo HTTP/1.1",
200,
100,
"-",
"curl",
"MISS",
),
(
"127.0.0.1",
"example.com",
"2024-01-01 10:01:00",
"GET /foo HTTP/1.1",
200,
100,
"-",
"curl",
"MISS",
),
(
"127.0.0.1",
"example.com",
"2024-01-01 10:02:00",
"GET /foo HTTP/1.1",
200,
100,
"-",
"curl",
"MISS",
),
(
"127.0.0.1",
"cached.com",
"2024-01-01 10:00:00",
"GET /bar HTTP/1.1",
200,
100,
"-",
"curl",
"MISS",
),
(
"127.0.0.1",
"cached.com",
"2024-01-01 10:01:00",
"GET /bar HTTP/1.1",
200,
100,
"-",
"curl",
"MISS",
),
]
cur.executemany(
"INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status)"
" VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
entries,
)
conn.commit()
conn.close()
conf = tmp_path / "nginx.conf"
conf.write_text(
"""
server {
listen 80;
server_name example.com;
}
server {
listen 80;
server_name cached.com;
proxy_cache cache1;
}
"""
)
monkeypatch.setattr(analyze, "DB_PATH", db_path)
monkeypatch.setattr(gr, "DB_PATH", db_path)
monkeypatch.setattr(analyze.nginx_config, "DEFAULT_PATHS", [str(conf)])
analyze.suggest_cache(threshold=2, json_output=False)
out = capsys.readouterr().out.strip().splitlines()
assert out == ["example.com /foo 3"]
analyze.suggest_cache(threshold=2, json_output=True)
out_json = json.loads(capsys.readouterr().out.strip())
assert out_json == [{"host": "example.com", "path": "/foo", "misses": 3}]