diff --git a/scripts/analyze.py b/scripts/analyze.py index 14634e0..ded224d 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -143,5 +143,59 @@ def check_missing_domains(json_output: bool = typer.Option(False, "--json", help typer.echo(d) +@app.command("suggest-cache") +def suggest_cache( + threshold: int = typer.Option( + 10, help="Minimum number of MISS entries to report" + ), + json_output: bool = typer.Option(False, "--json", help="Output results as JSON"), +) -> None: + """Suggest domain/path pairs that could benefit from caching. + + Paths with at least ``threshold`` ``MISS`` entries are shown for domains + whose server blocks lack a ``proxy_cache`` directive. + """ + + # Discover domains without explicit proxy_cache + paths = nginx_config.discover_configs() + servers = nginx_config.parse_servers(paths) + no_cache: Set[str] = set() + for server in servers: + if "proxy_cache" in server: + continue + for name in server.get("server_name", "").split(): + if name: + no_cache.add(name) + + conn = _connect() + cur = conn.cursor() + cur.execute( + """ + SELECT host, + substr(request, instr(request, ' ')+1, + instr(request, ' HTTP') - instr(request, ' ') - 1) AS path, + COUNT(*) AS miss_count + FROM logs + WHERE cache_status = 'MISS' + GROUP BY host, path + HAVING miss_count >= ? + ORDER BY miss_count DESC + """, + (threshold,), + ) + + rows = [r for r in cur.fetchall() if r[0] in no_cache] + conn.close() + + if json_output: + result = [ + {"host": host, "path": path, "misses": count} for host, path, count in rows + ] + typer.echo(json.dumps(result)) + else: + for host, path, count in rows: + typer.echo(f"{host} {path} {count}") + + if __name__ == "__main__": app() diff --git a/tests/test_analyze.py b/tests/test_analyze.py index 5f2b4d9..40bdc40 100644 --- a/tests/test_analyze.py +++ b/tests/test_analyze.py @@ -88,3 +88,118 @@ server { analyze.check_missing_domains(json_output=True) out_json = json.loads(capsys.readouterr().out.strip()) assert out_json == ["missing.com"] + + +def test_suggest_cache(tmp_path, monkeypatch, capsys): + db_path = tmp_path / "database" / "ngxstat.db" + db_path.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(db_path) + cur = conn.cursor() + cur.execute( + """ + CREATE TABLE logs ( + id INTEGER PRIMARY KEY, + ip TEXT, + host TEXT, + time TEXT, + request TEXT, + status INTEGER, + bytes_sent INTEGER, + referer TEXT, + user_agent TEXT, + cache_status TEXT + ) + """ + ) + entries = [ + ( + "127.0.0.1", + "example.com", + "2024-01-01 10:00:00", + "GET /foo HTTP/1.1", + 200, + 100, + "-", + "curl", + "MISS", + ), + ( + "127.0.0.1", + "example.com", + "2024-01-01 10:01:00", + "GET /foo HTTP/1.1", + 200, + 100, + "-", + "curl", + "MISS", + ), + ( + "127.0.0.1", + "example.com", + "2024-01-01 10:02:00", + "GET /foo HTTP/1.1", + 200, + 100, + "-", + "curl", + "MISS", + ), + ( + "127.0.0.1", + "cached.com", + "2024-01-01 10:00:00", + "GET /bar HTTP/1.1", + 200, + 100, + "-", + "curl", + "MISS", + ), + ( + "127.0.0.1", + "cached.com", + "2024-01-01 10:01:00", + "GET /bar HTTP/1.1", + 200, + 100, + "-", + "curl", + "MISS", + ), + ] + cur.executemany( + "INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status)" + " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", + entries, + ) + conn.commit() + conn.close() + + conf = tmp_path / "nginx.conf" + conf.write_text( + """ +server { + listen 80; + server_name example.com; +} + +server { + listen 80; + server_name cached.com; + proxy_cache cache1; +} +""" + ) + + monkeypatch.setattr(analyze, "DB_PATH", db_path) + monkeypatch.setattr(gr, "DB_PATH", db_path) + monkeypatch.setattr(analyze.nginx_config, "DEFAULT_PATHS", [str(conf)]) + + analyze.suggest_cache(threshold=2, json_output=False) + out = capsys.readouterr().out.strip().splitlines() + assert out == ["example.com /foo 3"] + + analyze.suggest_cache(threshold=2, json_output=True) + out_json = json.loads(capsys.readouterr().out.strip()) + assert out_json == [{"host": "example.com", "path": "/foo", "misses": 3}]