Merge pull request #11 from wagesj45/codex/add-reports.yml-and-update-report-generation
Enable YAML-driven reporting
This commit is contained in:
commit
99a6b45d4c
6 changed files with 214 additions and 60 deletions
23
README.md
23
README.md
|
@ -24,6 +24,29 @@ python scripts/generate_reports.py monthly
|
|||
|
||||
Reports are written under the `output/` directory. Each command updates the corresponding `<interval>.json` file and produces an HTML dashboard using Chart.js.
|
||||
|
||||
### Configuring Reports
|
||||
|
||||
Report queries are defined in `reports.yml`. Each entry specifies the `name`,
|
||||
`interval`, optional `label` and `chart` type, and a SQL `query` that must return
|
||||
`bucket` and `value` columns. When `generate_reports.py` runs, every matching
|
||||
definition creates `output/<interval>/<name>.json` and an interval dashboard.
|
||||
|
||||
Example snippet:
|
||||
|
||||
```yaml
|
||||
- name: hits
|
||||
interval: hourly
|
||||
chart: bar
|
||||
query: |
|
||||
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket,
|
||||
COUNT(*) AS value
|
||||
FROM logs
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
||||
```
|
||||
|
||||
Add or modify entries in `reports.yml` to tailor the generated metrics.
|
||||
|
||||
## Importing Logs
|
||||
|
||||
Use the `run-import.sh` script to set up the Python environment if needed and import the latest Nginx log entries into `database/ngxstat.db`.
|
||||
|
|
21
reports.yml
Normal file
21
reports.yml
Normal file
|
@ -0,0 +1,21 @@
|
|||
- name: hits
|
||||
interval: hourly
|
||||
label: Hits
|
||||
chart: bar
|
||||
query: |
|
||||
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket,
|
||||
COUNT(*) AS value
|
||||
FROM logs
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
||||
|
||||
- name: error_rate
|
||||
interval: hourly
|
||||
label: Error Rate (%)
|
||||
chart: line
|
||||
query: |
|
||||
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket,
|
||||
SUM(CASE WHEN status >= 500 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value
|
||||
FROM logs
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
|
@ -7,3 +7,4 @@ Flask # For optional lightweight API server
|
|||
# Linting / formatting (optional but recommended)
|
||||
black
|
||||
flake8
|
||||
PyYAML
|
||||
|
|
|
@ -3,77 +3,91 @@ import sqlite3
|
|||
from pathlib import Path
|
||||
from typing import List, Dict
|
||||
|
||||
import yaml
|
||||
|
||||
import typer
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
DB_PATH = Path("database/ngxstat.db")
|
||||
OUTPUT_DIR = Path("output")
|
||||
TEMPLATE_DIR = Path("templates")
|
||||
REPORT_CONFIG = Path("reports.yml")
|
||||
|
||||
app = typer.Typer(help="Generate aggregated log reports")
|
||||
|
||||
def _load_existing(path: Path) -> List[Dict]:
|
||||
if path.exists():
|
||||
try:
|
||||
return json.loads(path.read_text())
|
||||
except Exception:
|
||||
return []
|
||||
return []
|
||||
def _load_config() -> List[Dict]:
|
||||
if not REPORT_CONFIG.exists():
|
||||
typer.echo(f"Config file not found: {REPORT_CONFIG}")
|
||||
raise typer.Exit(1)
|
||||
with REPORT_CONFIG.open("r") as fh:
|
||||
data = yaml.safe_load(fh) or []
|
||||
if not isinstance(data, list):
|
||||
typer.echo("reports.yml must contain a list of report definitions")
|
||||
raise typer.Exit(1)
|
||||
return data
|
||||
|
||||
def _save_json(path: Path, data: List[Dict]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(data, indent=2))
|
||||
|
||||
def _render_html(interval: str, json_name: str, out_path: Path) -> None:
|
||||
def _render_html(interval: str, reports: List[Dict], out_path: Path) -> None:
|
||||
env = Environment(loader=FileSystemLoader(TEMPLATE_DIR))
|
||||
template = env.get_template("report.html")
|
||||
out_path.write_text(template.render(interval=interval, json_path=json_name))
|
||||
out_path.write_text(template.render(interval=interval, reports=reports))
|
||||
|
||||
def _aggregate(interval: str, fmt: str) -> None:
|
||||
json_path = OUTPUT_DIR / f"{interval}.json"
|
||||
html_path = OUTPUT_DIR / f"{interval}.html"
|
||||
|
||||
existing = _load_existing(json_path)
|
||||
last_bucket = existing[-1]["bucket"] if existing else None
|
||||
def _generate_interval(interval: str) -> None:
|
||||
cfg = _load_config()
|
||||
defs = [d for d in cfg if d.get("interval") == interval]
|
||||
if not defs:
|
||||
typer.echo(f"No reports defined for {interval}")
|
||||
return
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
query = f"SELECT strftime('{fmt}', datetime(time)) as bucket, COUNT(*) as hits FROM logs"
|
||||
params = []
|
||||
if last_bucket:
|
||||
query += " WHERE datetime(time) > datetime(?)"
|
||||
params.append(last_bucket)
|
||||
query += " GROUP BY bucket ORDER BY bucket"
|
||||
out_dir = OUTPUT_DIR / interval
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
rows = cur.execute(query, params).fetchall()
|
||||
for bucket, hits in rows:
|
||||
existing.append({"bucket": bucket, "hits": hits})
|
||||
report_list = []
|
||||
for definition in defs:
|
||||
name = definition["name"]
|
||||
query = definition["query"]
|
||||
cur.execute(query)
|
||||
rows = cur.fetchall()
|
||||
headers = [c[0] for c in cur.description]
|
||||
data = [dict(zip(headers, row)) for row in rows]
|
||||
json_path = out_dir / f"{name}.json"
|
||||
_save_json(json_path, data)
|
||||
report_list.append({
|
||||
"name": name,
|
||||
"label": definition.get("label", name.title()),
|
||||
"chart": definition.get("chart", "line"),
|
||||
"json": f"{name}.json",
|
||||
})
|
||||
|
||||
existing.sort(key=lambda x: x["bucket"])
|
||||
_save_json(json_path, existing)
|
||||
_render_html(interval, json_path.name, html_path)
|
||||
typer.echo(f"Generated {json_path} and {html_path}")
|
||||
_save_json(out_dir / "reports.json", report_list)
|
||||
_render_html(interval, report_list, out_dir / "index.html")
|
||||
typer.echo(f"Generated {interval} reports")
|
||||
|
||||
@app.command()
|
||||
def hourly() -> None:
|
||||
"""Aggregate logs into hourly buckets."""
|
||||
_aggregate("hourly", "%Y-%m-%d %H:00:00")
|
||||
"""Generate hourly reports."""
|
||||
_generate_interval("hourly")
|
||||
|
||||
@app.command()
|
||||
def daily() -> None:
|
||||
"""Aggregate logs into daily buckets."""
|
||||
_aggregate("daily", "%Y-%m-%d")
|
||||
"""Generate daily reports."""
|
||||
_generate_interval("daily")
|
||||
|
||||
@app.command()
|
||||
def weekly() -> None:
|
||||
"""Aggregate logs into weekly buckets."""
|
||||
_aggregate("weekly", "%Y-%W")
|
||||
"""Generate weekly reports."""
|
||||
_generate_interval("weekly")
|
||||
|
||||
@app.command()
|
||||
def monthly() -> None:
|
||||
"""Aggregate logs into monthly buckets."""
|
||||
_aggregate("monthly", "%Y-%m")
|
||||
"""Generate monthly reports."""
|
||||
_generate_interval("monthly")
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
|
|
@ -9,34 +9,42 @@
|
|||
<body class="section">
|
||||
<div class="container">
|
||||
<h1 class="title">{{ interval.title() }} Report</h1>
|
||||
<canvas id="chart"></canvas>
|
||||
{% for report in reports %}
|
||||
<div class="box">
|
||||
<h2 class="subtitle">{{ report.label }}</h2>
|
||||
<canvas id="chart-{{ report.name }}"></canvas>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
<script>
|
||||
fetch('{{ json_path }}')
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
const labels = data.map(x => x.bucket);
|
||||
const hits = data.map(x => x.hits);
|
||||
new Chart(document.getElementById('chart'), {
|
||||
type: '{{ 'bar' if interval == 'hourly' else 'line' }}',
|
||||
data: {
|
||||
labels: labels,
|
||||
datasets: [{
|
||||
label: 'Hits',
|
||||
data: hits,
|
||||
backgroundColor: 'rgba(54, 162, 235, 0.5)',
|
||||
borderColor: 'rgba(54, 162, 235, 1)',
|
||||
borderWidth: 1,
|
||||
fill: true,
|
||||
}]
|
||||
},
|
||||
options: {
|
||||
scales: {
|
||||
y: { beginAtZero: true }
|
||||
const reports = {{ reports | tojson }};
|
||||
reports.forEach(rep => {
|
||||
fetch(rep.json)
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
const labels = data.map(x => x.bucket);
|
||||
const values = data.map(x => x.value);
|
||||
new Chart(document.getElementById('chart-' + rep.name), {
|
||||
type: rep.chart,
|
||||
data: {
|
||||
labels: labels,
|
||||
datasets: [{
|
||||
label: rep.label,
|
||||
data: values,
|
||||
backgroundColor: 'rgba(54, 162, 235, 0.5)',
|
||||
borderColor: 'rgba(54, 162, 235, 1)',
|
||||
borderWidth: 1,
|
||||
fill: rep.chart !== 'bar',
|
||||
}]
|
||||
},
|
||||
options: {
|
||||
scales: {
|
||||
y: { beginAtZero: true }
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
87
tests/test_reports.py
Normal file
87
tests/test_reports.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
import sqlite3
|
||||
from pathlib import Path
|
||||
import json
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
sys.path.append(str(REPO_ROOT))
|
||||
from scripts import generate_reports as gr
|
||||
|
||||
|
||||
def setup_db(path: Path):
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(path)
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
CREATE TABLE logs (
|
||||
id INTEGER PRIMARY KEY,
|
||||
ip TEXT,
|
||||
host TEXT,
|
||||
time TEXT,
|
||||
request TEXT,
|
||||
status INTEGER,
|
||||
bytes_sent INTEGER,
|
||||
referer TEXT,
|
||||
user_agent TEXT,
|
||||
cache_status TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
cur.execute(
|
||||
"INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
("127.0.0.1", "example.com", "2024-01-01 10:00:00", "GET / HTTP/1.1", 200, 100, "-", "curl", "MISS"),
|
||||
)
|
||||
cur.execute(
|
||||
"INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
("127.0.0.1", "example.com", "2024-01-01 10:05:00", "GET /err HTTP/1.1", 500, 100, "-", "curl", "MISS"),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_reports(tmp_path):
|
||||
cfg = tmp_path / "reports.yml"
|
||||
cfg.write_text(
|
||||
"""
|
||||
- name: hits
|
||||
interval: hourly
|
||||
query: |
|
||||
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, COUNT(*) AS value
|
||||
FROM logs
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
||||
- name: error_rate
|
||||
interval: hourly
|
||||
query: |
|
||||
SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket,
|
||||
SUM(CASE WHEN status >= 400 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value
|
||||
FROM logs
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
||||
"""
|
||||
)
|
||||
return cfg
|
||||
|
||||
|
||||
def test_generate_interval(tmp_path, sample_reports, monkeypatch):
|
||||
db_path = tmp_path / "database" / "ngxstat.db"
|
||||
setup_db(db_path)
|
||||
|
||||
monkeypatch.setattr(gr, "DB_PATH", db_path)
|
||||
monkeypatch.setattr(gr, "OUTPUT_DIR", tmp_path / "output")
|
||||
monkeypatch.setattr(gr, "REPORT_CONFIG", sample_reports)
|
||||
monkeypatch.setattr(gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates")
|
||||
|
||||
gr._generate_interval("hourly")
|
||||
|
||||
hits = json.loads((tmp_path / "output" / "hourly" / "hits.json").read_text())
|
||||
assert hits[0]["value"] == 2
|
||||
error_rate = json.loads((tmp_path / "output" / "hourly" / "error_rate.json").read_text())
|
||||
assert error_rate[0]["value"] == pytest.approx(50.0)
|
||||
reports = json.loads((tmp_path / "output" / "hourly" / "reports.json").read_text())
|
||||
assert {r["name"] for r in reports} == {"hits", "error_rate"}
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue