From 37fc8985564bd50170d96deb41a7f0ff15cf42d5 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 01:53:36 -0500 Subject: [PATCH] Add analyze utility for DB queries --- scripts/analyze.py | 114 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 scripts/analyze.py diff --git a/scripts/analyze.py b/scripts/analyze.py new file mode 100644 index 0000000..528af37 --- /dev/null +++ b/scripts/analyze.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +"""Utility helpers for ad-hoc log analysis. + +This module exposes small helper functions to inspect the ``ngxstat`` SQLite +database. The intent is to allow quick queries from the command line or other +scripts without rewriting SQL each time. + +Examples +-------- +To list all domains present in the database:: + + python scripts/analyze.py domains + +The CLI is powered by :mod:`typer` and currently only offers a couple of +commands. More analysis routines can be added over time. +""" +from __future__ import annotations + +import sqlite3 +from pathlib import Path +from typing import Dict, List, Optional + +import typer + +from scripts import nginx_config # noqa: F401 # imported for side effects/usage + +DB_PATH = Path("database/ngxstat.db") + +app = typer.Typer(help="Ad-hoc statistics queries") + + +def _connect() -> sqlite3.Connection: + """Return a new SQLite connection to :data:`DB_PATH`.""" + return sqlite3.connect(DB_PATH) + + +def load_domains_from_db() -> List[str]: + """Return a sorted list of distinct domains from the ``logs`` table.""" + conn = _connect() + cur = conn.cursor() + cur.execute("SELECT DISTINCT host FROM logs ORDER BY host") + domains = [row[0] for row in cur.fetchall()] + conn.close() + return domains + + +def get_hit_count(domain: Optional[str] = None) -> int: + """Return total request count. + + Parameters + ---------- + domain: + Optional domain to filter on. If ``None`` the count includes all logs. + """ + conn = _connect() + cur = conn.cursor() + if domain: + cur.execute("SELECT COUNT(*) FROM logs WHERE host = ?", (domain,)) + else: + cur.execute("SELECT COUNT(*) FROM logs") + count = cur.fetchone()[0] or 0 + conn.close() + return count + + +def get_cache_ratio(domain: Optional[str] = None) -> float: + """Return the percentage of requests served from cache.""" + conn = _connect() + cur = conn.cursor() + if domain: + cur.execute( + "SELECT SUM(CASE WHEN cache_status = 'HIT' THEN 1 ELSE 0 END) * 1.0 / " + "COUNT(*) FROM logs WHERE host = ?", + (domain,), + ) + else: + cur.execute( + "SELECT SUM(CASE WHEN cache_status = 'HIT' THEN 1 ELSE 0 END) * 1.0 / " + "COUNT(*) FROM logs" + ) + result = cur.fetchone()[0] + conn.close() + return float(result or 0.0) + + +@app.command() +def domains() -> None: + """Print the list of domains discovered in the database.""" + for d in load_domains_from_db(): + typer.echo(d) + + +@app.command() +def hits(domain: Optional[str] = typer.Option(None, help="Filter by domain")) -> None: + """Show request count.""" + count = get_hit_count(domain) + if domain: + typer.echo(f"{domain}: {count} hits") + else: + typer.echo(f"Total hits: {count}") + + +@app.command("cache-ratio") +def cache_ratio_cmd(domain: Optional[str] = typer.Option(None, help="Filter by domain")) -> None: + """Display cache hit ratio as a percentage.""" + ratio = get_cache_ratio(domain) * 100 + if domain: + typer.echo(f"{domain}: {ratio:.2f}% cached") + else: + typer.echo(f"Cache hit ratio: {ratio:.2f}%") + + +if __name__ == "__main__": + app()