Merge pull request #11 from wagesj45/codex/add-reports.yml-and-update-report-generation

Enable YAML-driven reporting
This commit is contained in:
Jordan Wages 2025-07-18 01:29:54 -05:00 committed by GitHub
commit 99a6b45d4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 214 additions and 60 deletions

View file

@ -24,6 +24,29 @@ python scripts/generate_reports.py monthly
Reports are written under the `output/` directory. Each command updates the corresponding `<interval>.json` file and produces an HTML dashboard using Chart.js. Reports are written under the `output/` directory. Each command updates the corresponding `<interval>.json` file and produces an HTML dashboard using Chart.js.
### Configuring Reports
Report queries are defined in `reports.yml`. Each entry specifies the `name`,
`interval`, optional `label` and `chart` type, and a SQL `query` that must return
`bucket` and `value` columns. When `generate_reports.py` runs, every matching
definition creates `output/<interval>/<name>.json` and an interval dashboard.
Example snippet:
```yaml
- name: hits
interval: hourly
chart: bar
query: |
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket,
COUNT(*) AS value
FROM logs
GROUP BY bucket
ORDER BY bucket
```
Add or modify entries in `reports.yml` to tailor the generated metrics.
## Importing Logs ## Importing Logs
Use the `run-import.sh` script to set up the Python environment if needed and import the latest Nginx log entries into `database/ngxstat.db`. Use the `run-import.sh` script to set up the Python environment if needed and import the latest Nginx log entries into `database/ngxstat.db`.

21
reports.yml Normal file
View file

@ -0,0 +1,21 @@
- name: hits
interval: hourly
label: Hits
chart: bar
query: |
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket,
COUNT(*) AS value
FROM logs
GROUP BY bucket
ORDER BY bucket
- name: error_rate
interval: hourly
label: Error Rate (%)
chart: line
query: |
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket,
SUM(CASE WHEN status >= 500 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value
FROM logs
GROUP BY bucket
ORDER BY bucket

View file

@ -7,3 +7,4 @@ Flask # For optional lightweight API server
# Linting / formatting (optional but recommended) # Linting / formatting (optional but recommended)
black black
flake8 flake8
PyYAML

View file

@ -3,77 +3,91 @@ import sqlite3
from pathlib import Path from pathlib import Path
from typing import List, Dict from typing import List, Dict
import yaml
import typer import typer
from jinja2 import Environment, FileSystemLoader from jinja2 import Environment, FileSystemLoader
DB_PATH = Path("database/ngxstat.db") DB_PATH = Path("database/ngxstat.db")
OUTPUT_DIR = Path("output") OUTPUT_DIR = Path("output")
TEMPLATE_DIR = Path("templates") TEMPLATE_DIR = Path("templates")
REPORT_CONFIG = Path("reports.yml")
app = typer.Typer(help="Generate aggregated log reports") app = typer.Typer(help="Generate aggregated log reports")
def _load_existing(path: Path) -> List[Dict]: def _load_config() -> List[Dict]:
if path.exists(): if not REPORT_CONFIG.exists():
try: typer.echo(f"Config file not found: {REPORT_CONFIG}")
return json.loads(path.read_text()) raise typer.Exit(1)
except Exception: with REPORT_CONFIG.open("r") as fh:
return [] data = yaml.safe_load(fh) or []
return [] if not isinstance(data, list):
typer.echo("reports.yml must contain a list of report definitions")
raise typer.Exit(1)
return data
def _save_json(path: Path, data: List[Dict]) -> None: def _save_json(path: Path, data: List[Dict]) -> None:
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(data, indent=2)) path.write_text(json.dumps(data, indent=2))
def _render_html(interval: str, json_name: str, out_path: Path) -> None: def _render_html(interval: str, reports: List[Dict], out_path: Path) -> None:
env = Environment(loader=FileSystemLoader(TEMPLATE_DIR)) env = Environment(loader=FileSystemLoader(TEMPLATE_DIR))
template = env.get_template("report.html") template = env.get_template("report.html")
out_path.write_text(template.render(interval=interval, json_path=json_name)) out_path.write_text(template.render(interval=interval, reports=reports))
def _aggregate(interval: str, fmt: str) -> None: def _generate_interval(interval: str) -> None:
json_path = OUTPUT_DIR / f"{interval}.json" cfg = _load_config()
html_path = OUTPUT_DIR / f"{interval}.html" defs = [d for d in cfg if d.get("interval") == interval]
if not defs:
existing = _load_existing(json_path) typer.echo(f"No reports defined for {interval}")
last_bucket = existing[-1]["bucket"] if existing else None return
conn = sqlite3.connect(DB_PATH) conn = sqlite3.connect(DB_PATH)
cur = conn.cursor() cur = conn.cursor()
query = f"SELECT strftime('{fmt}', datetime(time)) as bucket, COUNT(*) as hits FROM logs" out_dir = OUTPUT_DIR / interval
params = [] out_dir.mkdir(parents=True, exist_ok=True)
if last_bucket:
query += " WHERE datetime(time) > datetime(?)"
params.append(last_bucket)
query += " GROUP BY bucket ORDER BY bucket"
rows = cur.execute(query, params).fetchall() report_list = []
for bucket, hits in rows: for definition in defs:
existing.append({"bucket": bucket, "hits": hits}) name = definition["name"]
query = definition["query"]
cur.execute(query)
rows = cur.fetchall()
headers = [c[0] for c in cur.description]
data = [dict(zip(headers, row)) for row in rows]
json_path = out_dir / f"{name}.json"
_save_json(json_path, data)
report_list.append({
"name": name,
"label": definition.get("label", name.title()),
"chart": definition.get("chart", "line"),
"json": f"{name}.json",
})
existing.sort(key=lambda x: x["bucket"]) _save_json(out_dir / "reports.json", report_list)
_save_json(json_path, existing) _render_html(interval, report_list, out_dir / "index.html")
_render_html(interval, json_path.name, html_path) typer.echo(f"Generated {interval} reports")
typer.echo(f"Generated {json_path} and {html_path}")
@app.command() @app.command()
def hourly() -> None: def hourly() -> None:
"""Aggregate logs into hourly buckets.""" """Generate hourly reports."""
_aggregate("hourly", "%Y-%m-%d %H:00:00") _generate_interval("hourly")
@app.command() @app.command()
def daily() -> None: def daily() -> None:
"""Aggregate logs into daily buckets.""" """Generate daily reports."""
_aggregate("daily", "%Y-%m-%d") _generate_interval("daily")
@app.command() @app.command()
def weekly() -> None: def weekly() -> None:
"""Aggregate logs into weekly buckets.""" """Generate weekly reports."""
_aggregate("weekly", "%Y-%W") _generate_interval("weekly")
@app.command() @app.command()
def monthly() -> None: def monthly() -> None:
"""Aggregate logs into monthly buckets.""" """Generate monthly reports."""
_aggregate("monthly", "%Y-%m") _generate_interval("monthly")
if __name__ == "__main__": if __name__ == "__main__":
app() app()

View file

@ -9,34 +9,42 @@
<body class="section"> <body class="section">
<div class="container"> <div class="container">
<h1 class="title">{{ interval.title() }} Report</h1> <h1 class="title">{{ interval.title() }} Report</h1>
<canvas id="chart"></canvas> {% for report in reports %}
<div class="box">
<h2 class="subtitle">{{ report.label }}</h2>
<canvas id="chart-{{ report.name }}"></canvas>
</div>
{% endfor %}
</div> </div>
<script> <script>
fetch('{{ json_path }}') const reports = {{ reports | tojson }};
.then(r => r.json()) reports.forEach(rep => {
.then(data => { fetch(rep.json)
const labels = data.map(x => x.bucket); .then(r => r.json())
const hits = data.map(x => x.hits); .then(data => {
new Chart(document.getElementById('chart'), { const labels = data.map(x => x.bucket);
type: '{{ 'bar' if interval == 'hourly' else 'line' }}', const values = data.map(x => x.value);
data: { new Chart(document.getElementById('chart-' + rep.name), {
labels: labels, type: rep.chart,
datasets: [{ data: {
label: 'Hits', labels: labels,
data: hits, datasets: [{
backgroundColor: 'rgba(54, 162, 235, 0.5)', label: rep.label,
borderColor: 'rgba(54, 162, 235, 1)', data: values,
borderWidth: 1, backgroundColor: 'rgba(54, 162, 235, 0.5)',
fill: true, borderColor: 'rgba(54, 162, 235, 1)',
}] borderWidth: 1,
}, fill: rep.chart !== 'bar',
options: { }]
scales: { },
y: { beginAtZero: true } options: {
scales: {
y: { beginAtZero: true }
}
} }
} });
}); });
}); });
</script> </script>
</body> </body>
</html> </html>

87
tests/test_reports.py Normal file
View file

@ -0,0 +1,87 @@
import sqlite3
from pathlib import Path
import json
import sys
import pytest
REPO_ROOT = Path(__file__).resolve().parents[1]
sys.path.append(str(REPO_ROOT))
from scripts import generate_reports as gr
def setup_db(path: Path):
path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(path)
cur = conn.cursor()
cur.execute(
"""
CREATE TABLE logs (
id INTEGER PRIMARY KEY,
ip TEXT,
host TEXT,
time TEXT,
request TEXT,
status INTEGER,
bytes_sent INTEGER,
referer TEXT,
user_agent TEXT,
cache_status TEXT
)
"""
)
cur.execute(
"INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
("127.0.0.1", "example.com", "2024-01-01 10:00:00", "GET / HTTP/1.1", 200, 100, "-", "curl", "MISS"),
)
cur.execute(
"INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
("127.0.0.1", "example.com", "2024-01-01 10:05:00", "GET /err HTTP/1.1", 500, 100, "-", "curl", "MISS"),
)
conn.commit()
conn.close()
@pytest.fixture()
def sample_reports(tmp_path):
cfg = tmp_path / "reports.yml"
cfg.write_text(
"""
- name: hits
interval: hourly
query: |
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, COUNT(*) AS value
FROM logs
GROUP BY bucket
ORDER BY bucket
- name: error_rate
interval: hourly
query: |
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket,
SUM(CASE WHEN status >= 400 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value
FROM logs
GROUP BY bucket
ORDER BY bucket
"""
)
return cfg
def test_generate_interval(tmp_path, sample_reports, monkeypatch):
db_path = tmp_path / "database" / "ngxstat.db"
setup_db(db_path)
monkeypatch.setattr(gr, "DB_PATH", db_path)
monkeypatch.setattr(gr, "OUTPUT_DIR", tmp_path / "output")
monkeypatch.setattr(gr, "REPORT_CONFIG", sample_reports)
monkeypatch.setattr(gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates")
gr._generate_interval("hourly")
hits = json.loads((tmp_path / "output" / "hourly" / "hits.json").read_text())
assert hits[0]["value"] == 2
error_rate = json.loads((tmp_path / "output" / "hourly" / "error_rate.json").read_text())
assert error_rate[0]["value"] == pytest.approx(50.0)
reports = json.loads((tmp_path / "output" / "hourly" / "reports.json").read_text())
assert {r["name"] for r in reports} == {"hits", "error_rate"}