Merge pull request #11 from wagesj45/codex/add-reports.yml-and-update-report-generation
Enable YAML-driven reporting
This commit is contained in:
		
				commit
				
					
						99a6b45d4c
					
				
			
		
					 6 changed files with 214 additions and 60 deletions
				
			
		
							
								
								
									
										23
									
								
								README.md
									
										
									
									
									
								
							
							
						
						
									
										23
									
								
								README.md
									
										
									
									
									
								
							|  | @ -24,6 +24,29 @@ python scripts/generate_reports.py monthly | |||
| 
 | ||||
| Reports are written under the `output/` directory. Each command updates the corresponding `<interval>.json` file and produces an HTML dashboard using Chart.js. | ||||
| 
 | ||||
| ### Configuring Reports | ||||
| 
 | ||||
| Report queries are defined in `reports.yml`. Each entry specifies the `name`, | ||||
| `interval`, optional `label` and `chart` type, and a SQL `query` that must return | ||||
| `bucket` and `value` columns. When `generate_reports.py` runs, every matching | ||||
| definition creates `output/<interval>/<name>.json` and an interval dashboard. | ||||
| 
 | ||||
| Example snippet: | ||||
| 
 | ||||
| ```yaml | ||||
| - name: hits | ||||
|   interval: hourly | ||||
|   chart: bar | ||||
|   query: | | ||||
|     SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, | ||||
|            COUNT(*) AS value | ||||
|     FROM logs | ||||
|     GROUP BY bucket | ||||
|     ORDER BY bucket | ||||
| ``` | ||||
| 
 | ||||
| Add or modify entries in `reports.yml` to tailor the generated metrics. | ||||
| 
 | ||||
| ## Importing Logs | ||||
| 
 | ||||
| Use the `run-import.sh` script to set up the Python environment if needed and import the latest Nginx log entries into `database/ngxstat.db`. | ||||
|  |  | |||
							
								
								
									
										21
									
								
								reports.yml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								reports.yml
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,21 @@ | |||
| - name: hits | ||||
|   interval: hourly | ||||
|   label: Hits | ||||
|   chart: bar | ||||
|   query: | | ||||
|     SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, | ||||
|            COUNT(*) AS value | ||||
|     FROM logs | ||||
|     GROUP BY bucket | ||||
|     ORDER BY bucket | ||||
| 
 | ||||
| - name: error_rate | ||||
|   interval: hourly | ||||
|   label: Error Rate (%) | ||||
|   chart: line | ||||
|   query: | | ||||
|     SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, | ||||
|            SUM(CASE WHEN status >= 500 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value | ||||
|     FROM logs | ||||
|     GROUP BY bucket | ||||
|     ORDER BY bucket | ||||
|  | @ -7,3 +7,4 @@ Flask                # For optional lightweight API server | |||
| # Linting / formatting (optional but recommended) | ||||
| black | ||||
| flake8 | ||||
| PyYAML | ||||
|  |  | |||
|  | @ -3,77 +3,91 @@ import sqlite3 | |||
| from pathlib import Path | ||||
| from typing import List, Dict | ||||
| 
 | ||||
| import yaml | ||||
| 
 | ||||
| import typer | ||||
| from jinja2 import Environment, FileSystemLoader | ||||
| 
 | ||||
| DB_PATH = Path("database/ngxstat.db") | ||||
| OUTPUT_DIR = Path("output") | ||||
| TEMPLATE_DIR = Path("templates") | ||||
| REPORT_CONFIG = Path("reports.yml") | ||||
| 
 | ||||
| app = typer.Typer(help="Generate aggregated log reports") | ||||
| 
 | ||||
| def _load_existing(path: Path) -> List[Dict]: | ||||
|     if path.exists(): | ||||
|         try: | ||||
|             return json.loads(path.read_text()) | ||||
|         except Exception: | ||||
|             return [] | ||||
|     return [] | ||||
| def _load_config() -> List[Dict]: | ||||
|     if not REPORT_CONFIG.exists(): | ||||
|         typer.echo(f"Config file not found: {REPORT_CONFIG}") | ||||
|         raise typer.Exit(1) | ||||
|     with REPORT_CONFIG.open("r") as fh: | ||||
|         data = yaml.safe_load(fh) or [] | ||||
|     if not isinstance(data, list): | ||||
|         typer.echo("reports.yml must contain a list of report definitions") | ||||
|         raise typer.Exit(1) | ||||
|     return data | ||||
| 
 | ||||
| def _save_json(path: Path, data: List[Dict]) -> None: | ||||
|     path.parent.mkdir(parents=True, exist_ok=True) | ||||
|     path.write_text(json.dumps(data, indent=2)) | ||||
| 
 | ||||
| def _render_html(interval: str, json_name: str, out_path: Path) -> None: | ||||
| def _render_html(interval: str, reports: List[Dict], out_path: Path) -> None: | ||||
|     env = Environment(loader=FileSystemLoader(TEMPLATE_DIR)) | ||||
|     template = env.get_template("report.html") | ||||
|     out_path.write_text(template.render(interval=interval, json_path=json_name)) | ||||
|     out_path.write_text(template.render(interval=interval, reports=reports)) | ||||
| 
 | ||||
| def _aggregate(interval: str, fmt: str) -> None: | ||||
|     json_path = OUTPUT_DIR / f"{interval}.json" | ||||
|     html_path = OUTPUT_DIR / f"{interval}.html" | ||||
| 
 | ||||
|     existing = _load_existing(json_path) | ||||
|     last_bucket = existing[-1]["bucket"] if existing else None | ||||
| def _generate_interval(interval: str) -> None: | ||||
|     cfg = _load_config() | ||||
|     defs = [d for d in cfg if d.get("interval") == interval] | ||||
|     if not defs: | ||||
|         typer.echo(f"No reports defined for {interval}") | ||||
|         return | ||||
| 
 | ||||
|     conn = sqlite3.connect(DB_PATH) | ||||
|     cur = conn.cursor() | ||||
| 
 | ||||
|     query = f"SELECT strftime('{fmt}', datetime(time)) as bucket, COUNT(*) as hits FROM logs" | ||||
|     params = [] | ||||
|     if last_bucket: | ||||
|         query += " WHERE datetime(time) > datetime(?)" | ||||
|         params.append(last_bucket) | ||||
|     query += " GROUP BY bucket ORDER BY bucket" | ||||
|     out_dir = OUTPUT_DIR / interval | ||||
|     out_dir.mkdir(parents=True, exist_ok=True) | ||||
| 
 | ||||
|     rows = cur.execute(query, params).fetchall() | ||||
|     for bucket, hits in rows: | ||||
|         existing.append({"bucket": bucket, "hits": hits}) | ||||
|     report_list = [] | ||||
|     for definition in defs: | ||||
|         name = definition["name"] | ||||
|         query = definition["query"] | ||||
|         cur.execute(query) | ||||
|         rows = cur.fetchall() | ||||
|         headers = [c[0] for c in cur.description] | ||||
|         data = [dict(zip(headers, row)) for row in rows] | ||||
|         json_path = out_dir / f"{name}.json" | ||||
|         _save_json(json_path, data) | ||||
|         report_list.append({ | ||||
|             "name": name, | ||||
|             "label": definition.get("label", name.title()), | ||||
|             "chart": definition.get("chart", "line"), | ||||
|             "json": f"{name}.json", | ||||
|         }) | ||||
| 
 | ||||
|     existing.sort(key=lambda x: x["bucket"]) | ||||
|     _save_json(json_path, existing) | ||||
|     _render_html(interval, json_path.name, html_path) | ||||
|     typer.echo(f"Generated {json_path} and {html_path}") | ||||
|     _save_json(out_dir / "reports.json", report_list) | ||||
|     _render_html(interval, report_list, out_dir / "index.html") | ||||
|     typer.echo(f"Generated {interval} reports") | ||||
| 
 | ||||
| @app.command() | ||||
| def hourly() -> None: | ||||
|     """Aggregate logs into hourly buckets.""" | ||||
|     _aggregate("hourly", "%Y-%m-%d %H:00:00") | ||||
|     """Generate hourly reports.""" | ||||
|     _generate_interval("hourly") | ||||
| 
 | ||||
| @app.command() | ||||
| def daily() -> None: | ||||
|     """Aggregate logs into daily buckets.""" | ||||
|     _aggregate("daily", "%Y-%m-%d") | ||||
|     """Generate daily reports.""" | ||||
|     _generate_interval("daily") | ||||
| 
 | ||||
| @app.command() | ||||
| def weekly() -> None: | ||||
|     """Aggregate logs into weekly buckets.""" | ||||
|     _aggregate("weekly", "%Y-%W") | ||||
|     """Generate weekly reports.""" | ||||
|     _generate_interval("weekly") | ||||
| 
 | ||||
| @app.command() | ||||
| def monthly() -> None: | ||||
|     """Aggregate logs into monthly buckets.""" | ||||
|     _aggregate("monthly", "%Y-%m") | ||||
|     """Generate monthly reports.""" | ||||
|     _generate_interval("monthly") | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     app() | ||||
|  |  | |||
|  | @ -9,34 +9,42 @@ | |||
| <body class="section"> | ||||
|   <div class="container"> | ||||
|     <h1 class="title">{{ interval.title() }} Report</h1> | ||||
|     <canvas id="chart"></canvas> | ||||
|     {% for report in reports %} | ||||
|     <div class="box"> | ||||
|       <h2 class="subtitle">{{ report.label }}</h2> | ||||
|       <canvas id="chart-{{ report.name }}"></canvas> | ||||
|     </div> | ||||
|     {% endfor %} | ||||
|   </div> | ||||
|   <script> | ||||
|     fetch('{{ json_path }}') | ||||
|       .then(r => r.json()) | ||||
|       .then(data => { | ||||
|         const labels = data.map(x => x.bucket); | ||||
|         const hits = data.map(x => x.hits); | ||||
|         new Chart(document.getElementById('chart'), { | ||||
|           type: '{{ 'bar' if interval == 'hourly' else 'line' }}', | ||||
|           data: { | ||||
|             labels: labels, | ||||
|             datasets: [{ | ||||
|               label: 'Hits', | ||||
|               data: hits, | ||||
|               backgroundColor: 'rgba(54, 162, 235, 0.5)', | ||||
|               borderColor: 'rgba(54, 162, 235, 1)', | ||||
|               borderWidth: 1, | ||||
|               fill: true, | ||||
|             }] | ||||
|           }, | ||||
|           options: { | ||||
|             scales: { | ||||
|               y: { beginAtZero: true } | ||||
|     const reports = {{ reports | tojson }}; | ||||
|     reports.forEach(rep => { | ||||
|       fetch(rep.json) | ||||
|         .then(r => r.json()) | ||||
|         .then(data => { | ||||
|           const labels = data.map(x => x.bucket); | ||||
|           const values = data.map(x => x.value); | ||||
|           new Chart(document.getElementById('chart-' + rep.name), { | ||||
|             type: rep.chart, | ||||
|             data: { | ||||
|               labels: labels, | ||||
|               datasets: [{ | ||||
|                 label: rep.label, | ||||
|                 data: values, | ||||
|                 backgroundColor: 'rgba(54, 162, 235, 0.5)', | ||||
|                 borderColor: 'rgba(54, 162, 235, 1)', | ||||
|                 borderWidth: 1, | ||||
|                 fill: rep.chart !== 'bar', | ||||
|               }] | ||||
|             }, | ||||
|             options: { | ||||
|               scales: { | ||||
|                 y: { beginAtZero: true } | ||||
|               } | ||||
|             } | ||||
|           } | ||||
|           }); | ||||
|         }); | ||||
|       }); | ||||
|     }); | ||||
|   </script> | ||||
| </body> | ||||
| </html> | ||||
|  |  | |||
							
								
								
									
										87
									
								
								tests/test_reports.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								tests/test_reports.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,87 @@ | |||
| import sqlite3 | ||||
| from pathlib import Path | ||||
| import json | ||||
| import sys | ||||
| 
 | ||||
| import pytest | ||||
| 
 | ||||
| REPO_ROOT = Path(__file__).resolve().parents[1] | ||||
| sys.path.append(str(REPO_ROOT)) | ||||
| from scripts import generate_reports as gr | ||||
| 
 | ||||
| 
 | ||||
| def setup_db(path: Path): | ||||
|     path.parent.mkdir(parents=True, exist_ok=True) | ||||
|     conn = sqlite3.connect(path) | ||||
|     cur = conn.cursor() | ||||
|     cur.execute( | ||||
|         """ | ||||
|         CREATE TABLE logs ( | ||||
|             id INTEGER PRIMARY KEY, | ||||
|             ip TEXT, | ||||
|             host TEXT, | ||||
|             time TEXT, | ||||
|             request TEXT, | ||||
|             status INTEGER, | ||||
|             bytes_sent INTEGER, | ||||
|             referer TEXT, | ||||
|             user_agent TEXT, | ||||
|             cache_status TEXT | ||||
|         ) | ||||
|         """ | ||||
|     ) | ||||
|     cur.execute( | ||||
|         "INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", | ||||
|         ("127.0.0.1", "example.com", "2024-01-01 10:00:00", "GET / HTTP/1.1", 200, 100, "-", "curl", "MISS"), | ||||
|     ) | ||||
|     cur.execute( | ||||
|         "INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", | ||||
|         ("127.0.0.1", "example.com", "2024-01-01 10:05:00", "GET /err HTTP/1.1", 500, 100, "-", "curl", "MISS"), | ||||
|     ) | ||||
|     conn.commit() | ||||
|     conn.close() | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture() | ||||
| def sample_reports(tmp_path): | ||||
|     cfg = tmp_path / "reports.yml" | ||||
|     cfg.write_text( | ||||
|         """ | ||||
| - name: hits | ||||
|   interval: hourly | ||||
|   query: | | ||||
|     SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, COUNT(*) AS value | ||||
|     FROM logs | ||||
|     GROUP BY bucket | ||||
|     ORDER BY bucket | ||||
| - name: error_rate | ||||
|   interval: hourly | ||||
|   query: | | ||||
|     SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, | ||||
|            SUM(CASE WHEN status >= 400 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value | ||||
|     FROM logs | ||||
|     GROUP BY bucket | ||||
|     ORDER BY bucket | ||||
| """ | ||||
|     ) | ||||
|     return cfg | ||||
| 
 | ||||
| 
 | ||||
| def test_generate_interval(tmp_path, sample_reports, monkeypatch): | ||||
|     db_path = tmp_path / "database" / "ngxstat.db" | ||||
|     setup_db(db_path) | ||||
| 
 | ||||
|     monkeypatch.setattr(gr, "DB_PATH", db_path) | ||||
|     monkeypatch.setattr(gr, "OUTPUT_DIR", tmp_path / "output") | ||||
|     monkeypatch.setattr(gr, "REPORT_CONFIG", sample_reports) | ||||
|     monkeypatch.setattr(gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates") | ||||
| 
 | ||||
|     gr._generate_interval("hourly") | ||||
| 
 | ||||
|     hits = json.loads((tmp_path / "output" / "hourly" / "hits.json").read_text()) | ||||
|     assert hits[0]["value"] == 2 | ||||
|     error_rate = json.loads((tmp_path / "output" / "hourly" / "error_rate.json").read_text()) | ||||
|     assert error_rate[0]["value"] == pytest.approx(50.0) | ||||
|     reports = json.loads((tmp_path / "output" / "hourly" / "reports.json").read_text()) | ||||
|     assert {r["name"] for r in reports} == {"hits", "error_rate"} | ||||
| 
 | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue