From 6241fd268538f885f683a03ff96dcaa46daf96da Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Fri, 18 Jul 2025 01:24:26 -0500 Subject: [PATCH] Add YAML-driven report generation --- README.md | 23 ++++++++++ reports.yml | 21 +++++++++ requirements.txt | 1 + scripts/generate_reports.py | 86 +++++++++++++++++++++--------------- templates/report.html | 56 ++++++++++++++---------- tests/test_reports.py | 87 +++++++++++++++++++++++++++++++++++++ 6 files changed, 214 insertions(+), 60 deletions(-) create mode 100644 reports.yml create mode 100644 tests/test_reports.py diff --git a/README.md b/README.md index 1a8a307..7779d4d 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,29 @@ python scripts/generate_reports.py monthly Reports are written under the `output/` directory. Each command updates the corresponding `.json` file and produces an HTML dashboard using Chart.js. +### Configuring Reports + +Report queries are defined in `reports.yml`. Each entry specifies the `name`, +`interval`, optional `label` and `chart` type, and a SQL `query` that must return +`bucket` and `value` columns. When `generate_reports.py` runs, every matching +definition creates `output//.json` and an interval dashboard. + +Example snippet: + +```yaml +- name: hits + interval: hourly + chart: bar + query: | + SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, + COUNT(*) AS value + FROM logs + GROUP BY bucket + ORDER BY bucket +``` + +Add or modify entries in `reports.yml` to tailor the generated metrics. + ## Importing Logs Use the `run-import.sh` script to set up the Python environment if needed and import the latest Nginx log entries into `database/ngxstat.db`. diff --git a/reports.yml b/reports.yml new file mode 100644 index 0000000..f08dc34 --- /dev/null +++ b/reports.yml @@ -0,0 +1,21 @@ +- name: hits + interval: hourly + label: Hits + chart: bar + query: | + SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, + COUNT(*) AS value + FROM logs + GROUP BY bucket + ORDER BY bucket + +- name: error_rate + interval: hourly + label: Error Rate (%) + chart: line + query: | + SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, + SUM(CASE WHEN status >= 500 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value + FROM logs + GROUP BY bucket + ORDER BY bucket diff --git a/requirements.txt b/requirements.txt index 221e3c8..2678f7b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ Flask # For optional lightweight API server # Linting / formatting (optional but recommended) black flake8 +PyYAML diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index b244075..8dacf68 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -3,77 +3,91 @@ import sqlite3 from pathlib import Path from typing import List, Dict +import yaml + import typer from jinja2 import Environment, FileSystemLoader DB_PATH = Path("database/ngxstat.db") OUTPUT_DIR = Path("output") TEMPLATE_DIR = Path("templates") +REPORT_CONFIG = Path("reports.yml") app = typer.Typer(help="Generate aggregated log reports") -def _load_existing(path: Path) -> List[Dict]: - if path.exists(): - try: - return json.loads(path.read_text()) - except Exception: - return [] - return [] +def _load_config() -> List[Dict]: + if not REPORT_CONFIG.exists(): + typer.echo(f"Config file not found: {REPORT_CONFIG}") + raise typer.Exit(1) + with REPORT_CONFIG.open("r") as fh: + data = yaml.safe_load(fh) or [] + if not isinstance(data, list): + typer.echo("reports.yml must contain a list of report definitions") + raise typer.Exit(1) + return data def _save_json(path: Path, data: List[Dict]) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text(json.dumps(data, indent=2)) -def _render_html(interval: str, json_name: str, out_path: Path) -> None: +def _render_html(interval: str, reports: List[Dict], out_path: Path) -> None: env = Environment(loader=FileSystemLoader(TEMPLATE_DIR)) template = env.get_template("report.html") - out_path.write_text(template.render(interval=interval, json_path=json_name)) + out_path.write_text(template.render(interval=interval, reports=reports)) -def _aggregate(interval: str, fmt: str) -> None: - json_path = OUTPUT_DIR / f"{interval}.json" - html_path = OUTPUT_DIR / f"{interval}.html" - - existing = _load_existing(json_path) - last_bucket = existing[-1]["bucket"] if existing else None +def _generate_interval(interval: str) -> None: + cfg = _load_config() + defs = [d for d in cfg if d.get("interval") == interval] + if not defs: + typer.echo(f"No reports defined for {interval}") + return conn = sqlite3.connect(DB_PATH) cur = conn.cursor() - query = f"SELECT strftime('{fmt}', datetime(time)) as bucket, COUNT(*) as hits FROM logs" - params = [] - if last_bucket: - query += " WHERE datetime(time) > datetime(?)" - params.append(last_bucket) - query += " GROUP BY bucket ORDER BY bucket" + out_dir = OUTPUT_DIR / interval + out_dir.mkdir(parents=True, exist_ok=True) - rows = cur.execute(query, params).fetchall() - for bucket, hits in rows: - existing.append({"bucket": bucket, "hits": hits}) + report_list = [] + for definition in defs: + name = definition["name"] + query = definition["query"] + cur.execute(query) + rows = cur.fetchall() + headers = [c[0] for c in cur.description] + data = [dict(zip(headers, row)) for row in rows] + json_path = out_dir / f"{name}.json" + _save_json(json_path, data) + report_list.append({ + "name": name, + "label": definition.get("label", name.title()), + "chart": definition.get("chart", "line"), + "json": f"{name}.json", + }) - existing.sort(key=lambda x: x["bucket"]) - _save_json(json_path, existing) - _render_html(interval, json_path.name, html_path) - typer.echo(f"Generated {json_path} and {html_path}") + _save_json(out_dir / "reports.json", report_list) + _render_html(interval, report_list, out_dir / "index.html") + typer.echo(f"Generated {interval} reports") @app.command() def hourly() -> None: - """Aggregate logs into hourly buckets.""" - _aggregate("hourly", "%Y-%m-%d %H:00:00") + """Generate hourly reports.""" + _generate_interval("hourly") @app.command() def daily() -> None: - """Aggregate logs into daily buckets.""" - _aggregate("daily", "%Y-%m-%d") + """Generate daily reports.""" + _generate_interval("daily") @app.command() def weekly() -> None: - """Aggregate logs into weekly buckets.""" - _aggregate("weekly", "%Y-%W") + """Generate weekly reports.""" + _generate_interval("weekly") @app.command() def monthly() -> None: - """Aggregate logs into monthly buckets.""" - _aggregate("monthly", "%Y-%m") + """Generate monthly reports.""" + _generate_interval("monthly") if __name__ == "__main__": app() diff --git a/templates/report.html b/templates/report.html index e6dfee0..3288269 100644 --- a/templates/report.html +++ b/templates/report.html @@ -9,34 +9,42 @@

{{ interval.title() }} Report

- + {% for report in reports %} +
+

{{ report.label }}

+ +
+ {% endfor %}
diff --git a/tests/test_reports.py b/tests/test_reports.py new file mode 100644 index 0000000..0bf7483 --- /dev/null +++ b/tests/test_reports.py @@ -0,0 +1,87 @@ +import sqlite3 +from pathlib import Path +import json +import sys + +import pytest + +REPO_ROOT = Path(__file__).resolve().parents[1] +sys.path.append(str(REPO_ROOT)) +from scripts import generate_reports as gr + + +def setup_db(path: Path): + path.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(path) + cur = conn.cursor() + cur.execute( + """ + CREATE TABLE logs ( + id INTEGER PRIMARY KEY, + ip TEXT, + host TEXT, + time TEXT, + request TEXT, + status INTEGER, + bytes_sent INTEGER, + referer TEXT, + user_agent TEXT, + cache_status TEXT + ) + """ + ) + cur.execute( + "INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", + ("127.0.0.1", "example.com", "2024-01-01 10:00:00", "GET / HTTP/1.1", 200, 100, "-", "curl", "MISS"), + ) + cur.execute( + "INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", + ("127.0.0.1", "example.com", "2024-01-01 10:05:00", "GET /err HTTP/1.1", 500, 100, "-", "curl", "MISS"), + ) + conn.commit() + conn.close() + + +@pytest.fixture() +def sample_reports(tmp_path): + cfg = tmp_path / "reports.yml" + cfg.write_text( + """ +- name: hits + interval: hourly + query: | + SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, COUNT(*) AS value + FROM logs + GROUP BY bucket + ORDER BY bucket +- name: error_rate + interval: hourly + query: | + SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, + SUM(CASE WHEN status >= 400 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value + FROM logs + GROUP BY bucket + ORDER BY bucket +""" + ) + return cfg + + +def test_generate_interval(tmp_path, sample_reports, monkeypatch): + db_path = tmp_path / "database" / "ngxstat.db" + setup_db(db_path) + + monkeypatch.setattr(gr, "DB_PATH", db_path) + monkeypatch.setattr(gr, "OUTPUT_DIR", tmp_path / "output") + monkeypatch.setattr(gr, "REPORT_CONFIG", sample_reports) + monkeypatch.setattr(gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates") + + gr._generate_interval("hourly") + + hits = json.loads((tmp_path / "output" / "hourly" / "hits.json").read_text()) + assert hits[0]["value"] == 2 + error_rate = json.loads((tmp_path / "output" / "hourly" / "error_rate.json").read_text()) + assert error_rate[0]["value"] == pytest.approx(50.0) + reports = json.loads((tmp_path / "output" / "hourly" / "reports.json").read_text()) + assert {r["name"] for r in reports} == {"hits", "error_rate"} +