Add analyze script for simple DB queries #32
					 1 changed files with 114 additions and 0 deletions
				
			
		Add analyze utility for DB queries
				commit
				
					
					
						37fc898556
					
				
			
		
							
								
								
									
										114
									
								
								scripts/analyze.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								scripts/analyze.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,114 @@ | ||||||
|  | #!/usr/bin/env python3 | ||||||
|  | """Utility helpers for ad-hoc log analysis. | ||||||
|  | 
 | ||||||
|  | This module exposes small helper functions to inspect the ``ngxstat`` SQLite | ||||||
|  | database.  The intent is to allow quick queries from the command line or other | ||||||
|  | scripts without rewriting SQL each time. | ||||||
|  | 
 | ||||||
|  | Examples | ||||||
|  | -------- | ||||||
|  | To list all domains present in the database:: | ||||||
|  | 
 | ||||||
|  |     python scripts/analyze.py domains | ||||||
|  | 
 | ||||||
|  | The CLI is powered by :mod:`typer` and currently only offers a couple of | ||||||
|  | commands.  More analysis routines can be added over time. | ||||||
|  | """ | ||||||
|  | from __future__ import annotations | ||||||
|  | 
 | ||||||
|  | import sqlite3 | ||||||
|  | from pathlib import Path | ||||||
|  | from typing import Dict, List, Optional | ||||||
|  | 
 | ||||||
|  | import typer | ||||||
|  | 
 | ||||||
|  | from scripts import nginx_config  # noqa: F401  # imported for side effects/usage | ||||||
|  | 
 | ||||||
|  | DB_PATH = Path("database/ngxstat.db") | ||||||
|  | 
 | ||||||
|  | app = typer.Typer(help="Ad-hoc statistics queries") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _connect() -> sqlite3.Connection: | ||||||
|  |     """Return a new SQLite connection to :data:`DB_PATH`.""" | ||||||
|  |     return sqlite3.connect(DB_PATH) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def load_domains_from_db() -> List[str]: | ||||||
|  |     """Return a sorted list of distinct domains from the ``logs`` table.""" | ||||||
|  |     conn = _connect() | ||||||
|  |     cur = conn.cursor() | ||||||
|  |     cur.execute("SELECT DISTINCT host FROM logs ORDER BY host") | ||||||
|  |     domains = [row[0] for row in cur.fetchall()] | ||||||
|  |     conn.close() | ||||||
|  |     return domains | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_hit_count(domain: Optional[str] = None) -> int: | ||||||
|  |     """Return total request count. | ||||||
|  | 
 | ||||||
|  |     Parameters | ||||||
|  |     ---------- | ||||||
|  |     domain: | ||||||
|  |         Optional domain to filter on. If ``None`` the count includes all logs. | ||||||
|  |     """ | ||||||
|  |     conn = _connect() | ||||||
|  |     cur = conn.cursor() | ||||||
|  |     if domain: | ||||||
|  |         cur.execute("SELECT COUNT(*) FROM logs WHERE host = ?", (domain,)) | ||||||
|  |     else: | ||||||
|  |         cur.execute("SELECT COUNT(*) FROM logs") | ||||||
|  |     count = cur.fetchone()[0] or 0 | ||||||
|  |     conn.close() | ||||||
|  |     return count | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_cache_ratio(domain: Optional[str] = None) -> float: | ||||||
|  |     """Return the percentage of requests served from cache.""" | ||||||
|  |     conn = _connect() | ||||||
|  |     cur = conn.cursor() | ||||||
|  |     if domain: | ||||||
|  |         cur.execute( | ||||||
|  |             "SELECT SUM(CASE WHEN cache_status = 'HIT' THEN 1 ELSE 0 END) * 1.0 / " | ||||||
|  |             "COUNT(*) FROM logs WHERE host = ?", | ||||||
|  |             (domain,), | ||||||
|  |         ) | ||||||
|  |     else: | ||||||
|  |         cur.execute( | ||||||
|  |             "SELECT SUM(CASE WHEN cache_status = 'HIT' THEN 1 ELSE 0 END) * 1.0 / " | ||||||
|  |             "COUNT(*) FROM logs" | ||||||
|  |         ) | ||||||
|  |     result = cur.fetchone()[0] | ||||||
|  |     conn.close() | ||||||
|  |     return float(result or 0.0) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @app.command() | ||||||
|  | def domains() -> None: | ||||||
|  |     """Print the list of domains discovered in the database.""" | ||||||
|  |     for d in load_domains_from_db(): | ||||||
|  |         typer.echo(d) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @app.command() | ||||||
|  | def hits(domain: Optional[str] = typer.Option(None, help="Filter by domain")) -> None: | ||||||
|  |     """Show request count.""" | ||||||
|  |     count = get_hit_count(domain) | ||||||
|  |     if domain: | ||||||
|  |         typer.echo(f"{domain}: {count} hits") | ||||||
|  |     else: | ||||||
|  |         typer.echo(f"Total hits: {count}") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @app.command("cache-ratio") | ||||||
|  | def cache_ratio_cmd(domain: Optional[str] = typer.Option(None, help="Filter by domain")) -> None: | ||||||
|  |     """Display cache hit ratio as a percentage.""" | ||||||
|  |     ratio = get_cache_ratio(domain) * 100 | ||||||
|  |     if domain: | ||||||
|  |         typer.echo(f"{domain}: {ratio:.2f}% cached") | ||||||
|  |     else: | ||||||
|  |         typer.echo(f"Cache hit ratio: {ratio:.2f}%") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     app() | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue