Add YAML-driven report generation

This commit is contained in:
Jordan Wages 2025-07-18 01:24:26 -05:00
commit 6241fd2685
6 changed files with 214 additions and 60 deletions

View file

@ -24,6 +24,29 @@ python scripts/generate_reports.py monthly
Reports are written under the `output/` directory. Each command updates the corresponding `<interval>.json` file and produces an HTML dashboard using Chart.js.
### Configuring Reports
Report queries are defined in `reports.yml`. Each entry specifies the `name`,
`interval`, optional `label` and `chart` type, and a SQL `query` that must return
`bucket` and `value` columns. When `generate_reports.py` runs, every matching
definition creates `output/<interval>/<name>.json` and an interval dashboard.
Example snippet:
```yaml
- name: hits
interval: hourly
chart: bar
query: |
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket,
COUNT(*) AS value
FROM logs
GROUP BY bucket
ORDER BY bucket
```
Add or modify entries in `reports.yml` to tailor the generated metrics.
## Importing Logs
Use the `run-import.sh` script to set up the Python environment if needed and import the latest Nginx log entries into `database/ngxstat.db`.

21
reports.yml Normal file
View file

@ -0,0 +1,21 @@
- name: hits
interval: hourly
label: Hits
chart: bar
query: |
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket,
COUNT(*) AS value
FROM logs
GROUP BY bucket
ORDER BY bucket
- name: error_rate
interval: hourly
label: Error Rate (%)
chart: line
query: |
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket,
SUM(CASE WHEN status >= 500 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value
FROM logs
GROUP BY bucket
ORDER BY bucket

View file

@ -7,3 +7,4 @@ Flask # For optional lightweight API server
# Linting / formatting (optional but recommended)
black
flake8
PyYAML

View file

@ -3,77 +3,91 @@ import sqlite3
from pathlib import Path
from typing import List, Dict
import yaml
import typer
from jinja2 import Environment, FileSystemLoader
DB_PATH = Path("database/ngxstat.db")
OUTPUT_DIR = Path("output")
TEMPLATE_DIR = Path("templates")
REPORT_CONFIG = Path("reports.yml")
app = typer.Typer(help="Generate aggregated log reports")
def _load_existing(path: Path) -> List[Dict]:
if path.exists():
try:
return json.loads(path.read_text())
except Exception:
return []
return []
def _load_config() -> List[Dict]:
if not REPORT_CONFIG.exists():
typer.echo(f"Config file not found: {REPORT_CONFIG}")
raise typer.Exit(1)
with REPORT_CONFIG.open("r") as fh:
data = yaml.safe_load(fh) or []
if not isinstance(data, list):
typer.echo("reports.yml must contain a list of report definitions")
raise typer.Exit(1)
return data
def _save_json(path: Path, data: List[Dict]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(data, indent=2))
def _render_html(interval: str, json_name: str, out_path: Path) -> None:
def _render_html(interval: str, reports: List[Dict], out_path: Path) -> None:
env = Environment(loader=FileSystemLoader(TEMPLATE_DIR))
template = env.get_template("report.html")
out_path.write_text(template.render(interval=interval, json_path=json_name))
out_path.write_text(template.render(interval=interval, reports=reports))
def _aggregate(interval: str, fmt: str) -> None:
json_path = OUTPUT_DIR / f"{interval}.json"
html_path = OUTPUT_DIR / f"{interval}.html"
existing = _load_existing(json_path)
last_bucket = existing[-1]["bucket"] if existing else None
def _generate_interval(interval: str) -> None:
cfg = _load_config()
defs = [d for d in cfg if d.get("interval") == interval]
if not defs:
typer.echo(f"No reports defined for {interval}")
return
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
query = f"SELECT strftime('{fmt}', datetime(time)) as bucket, COUNT(*) as hits FROM logs"
params = []
if last_bucket:
query += " WHERE datetime(time) > datetime(?)"
params.append(last_bucket)
query += " GROUP BY bucket ORDER BY bucket"
out_dir = OUTPUT_DIR / interval
out_dir.mkdir(parents=True, exist_ok=True)
rows = cur.execute(query, params).fetchall()
for bucket, hits in rows:
existing.append({"bucket": bucket, "hits": hits})
report_list = []
for definition in defs:
name = definition["name"]
query = definition["query"]
cur.execute(query)
rows = cur.fetchall()
headers = [c[0] for c in cur.description]
data = [dict(zip(headers, row)) for row in rows]
json_path = out_dir / f"{name}.json"
_save_json(json_path, data)
report_list.append({
"name": name,
"label": definition.get("label", name.title()),
"chart": definition.get("chart", "line"),
"json": f"{name}.json",
})
existing.sort(key=lambda x: x["bucket"])
_save_json(json_path, existing)
_render_html(interval, json_path.name, html_path)
typer.echo(f"Generated {json_path} and {html_path}")
_save_json(out_dir / "reports.json", report_list)
_render_html(interval, report_list, out_dir / "index.html")
typer.echo(f"Generated {interval} reports")
@app.command()
def hourly() -> None:
"""Aggregate logs into hourly buckets."""
_aggregate("hourly", "%Y-%m-%d %H:00:00")
"""Generate hourly reports."""
_generate_interval("hourly")
@app.command()
def daily() -> None:
"""Aggregate logs into daily buckets."""
_aggregate("daily", "%Y-%m-%d")
"""Generate daily reports."""
_generate_interval("daily")
@app.command()
def weekly() -> None:
"""Aggregate logs into weekly buckets."""
_aggregate("weekly", "%Y-%W")
"""Generate weekly reports."""
_generate_interval("weekly")
@app.command()
def monthly() -> None:
"""Aggregate logs into monthly buckets."""
_aggregate("monthly", "%Y-%m")
"""Generate monthly reports."""
_generate_interval("monthly")
if __name__ == "__main__":
app()

View file

@ -9,34 +9,42 @@
<body class="section">
<div class="container">
<h1 class="title">{{ interval.title() }} Report</h1>
<canvas id="chart"></canvas>
{% for report in reports %}
<div class="box">
<h2 class="subtitle">{{ report.label }}</h2>
<canvas id="chart-{{ report.name }}"></canvas>
</div>
{% endfor %}
</div>
<script>
fetch('{{ json_path }}')
.then(r => r.json())
.then(data => {
const labels = data.map(x => x.bucket);
const hits = data.map(x => x.hits);
new Chart(document.getElementById('chart'), {
type: '{{ 'bar' if interval == 'hourly' else 'line' }}',
data: {
labels: labels,
datasets: [{
label: 'Hits',
data: hits,
backgroundColor: 'rgba(54, 162, 235, 0.5)',
borderColor: 'rgba(54, 162, 235, 1)',
borderWidth: 1,
fill: true,
}]
},
options: {
scales: {
y: { beginAtZero: true }
const reports = {{ reports | tojson }};
reports.forEach(rep => {
fetch(rep.json)
.then(r => r.json())
.then(data => {
const labels = data.map(x => x.bucket);
const values = data.map(x => x.value);
new Chart(document.getElementById('chart-' + rep.name), {
type: rep.chart,
data: {
labels: labels,
datasets: [{
label: rep.label,
data: values,
backgroundColor: 'rgba(54, 162, 235, 0.5)',
borderColor: 'rgba(54, 162, 235, 1)',
borderWidth: 1,
fill: rep.chart !== 'bar',
}]
},
options: {
scales: {
y: { beginAtZero: true }
}
}
}
});
});
});
});
</script>
</body>
</html>

87
tests/test_reports.py Normal file
View file

@ -0,0 +1,87 @@
import sqlite3
from pathlib import Path
import json
import sys
import pytest
REPO_ROOT = Path(__file__).resolve().parents[1]
sys.path.append(str(REPO_ROOT))
from scripts import generate_reports as gr
def setup_db(path: Path):
path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(path)
cur = conn.cursor()
cur.execute(
"""
CREATE TABLE logs (
id INTEGER PRIMARY KEY,
ip TEXT,
host TEXT,
time TEXT,
request TEXT,
status INTEGER,
bytes_sent INTEGER,
referer TEXT,
user_agent TEXT,
cache_status TEXT
)
"""
)
cur.execute(
"INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
("127.0.0.1", "example.com", "2024-01-01 10:00:00", "GET / HTTP/1.1", 200, 100, "-", "curl", "MISS"),
)
cur.execute(
"INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
("127.0.0.1", "example.com", "2024-01-01 10:05:00", "GET /err HTTP/1.1", 500, 100, "-", "curl", "MISS"),
)
conn.commit()
conn.close()
@pytest.fixture()
def sample_reports(tmp_path):
cfg = tmp_path / "reports.yml"
cfg.write_text(
"""
- name: hits
interval: hourly
query: |
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, COUNT(*) AS value
FROM logs
GROUP BY bucket
ORDER BY bucket
- name: error_rate
interval: hourly
query: |
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket,
SUM(CASE WHEN status >= 400 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value
FROM logs
GROUP BY bucket
ORDER BY bucket
"""
)
return cfg
def test_generate_interval(tmp_path, sample_reports, monkeypatch):
db_path = tmp_path / "database" / "ngxstat.db"
setup_db(db_path)
monkeypatch.setattr(gr, "DB_PATH", db_path)
monkeypatch.setattr(gr, "OUTPUT_DIR", tmp_path / "output")
monkeypatch.setattr(gr, "REPORT_CONFIG", sample_reports)
monkeypatch.setattr(gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates")
gr._generate_interval("hourly")
hits = json.loads((tmp_path / "output" / "hourly" / "hits.json").read_text())
assert hits[0]["value"] == 2
error_rate = json.loads((tmp_path / "output" / "hourly" / "error_rate.json").read_text())
assert error_rate[0]["value"] == pytest.approx(50.0)
reports = json.loads((tmp_path / "output" / "hourly" / "reports.json").read_text())
assert {r["name"] for r in reports} == {"hits", "error_rate"}