Merge pull request #11 from wagesj45/codex/add-reports.yml-and-update-report-generation
Enable YAML-driven reporting
This commit is contained in:
		
				commit
				
					
						99a6b45d4c
					
				
			
		
					 6 changed files with 214 additions and 60 deletions
				
			
		
							
								
								
									
										23
									
								
								README.md
									
										
									
									
									
								
							
							
						
						
									
										23
									
								
								README.md
									
										
									
									
									
								
							|  | @ -24,6 +24,29 @@ python scripts/generate_reports.py monthly | ||||||
| 
 | 
 | ||||||
| Reports are written under the `output/` directory. Each command updates the corresponding `<interval>.json` file and produces an HTML dashboard using Chart.js. | Reports are written under the `output/` directory. Each command updates the corresponding `<interval>.json` file and produces an HTML dashboard using Chart.js. | ||||||
| 
 | 
 | ||||||
|  | ### Configuring Reports | ||||||
|  | 
 | ||||||
|  | Report queries are defined in `reports.yml`. Each entry specifies the `name`, | ||||||
|  | `interval`, optional `label` and `chart` type, and a SQL `query` that must return | ||||||
|  | `bucket` and `value` columns. When `generate_reports.py` runs, every matching | ||||||
|  | definition creates `output/<interval>/<name>.json` and an interval dashboard. | ||||||
|  | 
 | ||||||
|  | Example snippet: | ||||||
|  | 
 | ||||||
|  | ```yaml | ||||||
|  | - name: hits | ||||||
|  |   interval: hourly | ||||||
|  |   chart: bar | ||||||
|  |   query: | | ||||||
|  |     SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, | ||||||
|  |            COUNT(*) AS value | ||||||
|  |     FROM logs | ||||||
|  |     GROUP BY bucket | ||||||
|  |     ORDER BY bucket | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | Add or modify entries in `reports.yml` to tailor the generated metrics. | ||||||
|  | 
 | ||||||
| ## Importing Logs | ## Importing Logs | ||||||
| 
 | 
 | ||||||
| Use the `run-import.sh` script to set up the Python environment if needed and import the latest Nginx log entries into `database/ngxstat.db`. | Use the `run-import.sh` script to set up the Python environment if needed and import the latest Nginx log entries into `database/ngxstat.db`. | ||||||
|  |  | ||||||
							
								
								
									
										21
									
								
								reports.yml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								reports.yml
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,21 @@ | ||||||
|  | - name: hits | ||||||
|  |   interval: hourly | ||||||
|  |   label: Hits | ||||||
|  |   chart: bar | ||||||
|  |   query: | | ||||||
|  |     SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, | ||||||
|  |            COUNT(*) AS value | ||||||
|  |     FROM logs | ||||||
|  |     GROUP BY bucket | ||||||
|  |     ORDER BY bucket | ||||||
|  | 
 | ||||||
|  | - name: error_rate | ||||||
|  |   interval: hourly | ||||||
|  |   label: Error Rate (%) | ||||||
|  |   chart: line | ||||||
|  |   query: | | ||||||
|  |     SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, | ||||||
|  |            SUM(CASE WHEN status >= 500 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value | ||||||
|  |     FROM logs | ||||||
|  |     GROUP BY bucket | ||||||
|  |     ORDER BY bucket | ||||||
|  | @ -7,3 +7,4 @@ Flask                # For optional lightweight API server | ||||||
| # Linting / formatting (optional but recommended) | # Linting / formatting (optional but recommended) | ||||||
| black | black | ||||||
| flake8 | flake8 | ||||||
|  | PyYAML | ||||||
|  |  | ||||||
|  | @ -3,77 +3,91 @@ import sqlite3 | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
| from typing import List, Dict | from typing import List, Dict | ||||||
| 
 | 
 | ||||||
|  | import yaml | ||||||
|  | 
 | ||||||
| import typer | import typer | ||||||
| from jinja2 import Environment, FileSystemLoader | from jinja2 import Environment, FileSystemLoader | ||||||
| 
 | 
 | ||||||
| DB_PATH = Path("database/ngxstat.db") | DB_PATH = Path("database/ngxstat.db") | ||||||
| OUTPUT_DIR = Path("output") | OUTPUT_DIR = Path("output") | ||||||
| TEMPLATE_DIR = Path("templates") | TEMPLATE_DIR = Path("templates") | ||||||
|  | REPORT_CONFIG = Path("reports.yml") | ||||||
| 
 | 
 | ||||||
| app = typer.Typer(help="Generate aggregated log reports") | app = typer.Typer(help="Generate aggregated log reports") | ||||||
| 
 | 
 | ||||||
| def _load_existing(path: Path) -> List[Dict]: | def _load_config() -> List[Dict]: | ||||||
|     if path.exists(): |     if not REPORT_CONFIG.exists(): | ||||||
|         try: |         typer.echo(f"Config file not found: {REPORT_CONFIG}") | ||||||
|             return json.loads(path.read_text()) |         raise typer.Exit(1) | ||||||
|         except Exception: |     with REPORT_CONFIG.open("r") as fh: | ||||||
|             return [] |         data = yaml.safe_load(fh) or [] | ||||||
|     return [] |     if not isinstance(data, list): | ||||||
|  |         typer.echo("reports.yml must contain a list of report definitions") | ||||||
|  |         raise typer.Exit(1) | ||||||
|  |     return data | ||||||
| 
 | 
 | ||||||
| def _save_json(path: Path, data: List[Dict]) -> None: | def _save_json(path: Path, data: List[Dict]) -> None: | ||||||
|     path.parent.mkdir(parents=True, exist_ok=True) |     path.parent.mkdir(parents=True, exist_ok=True) | ||||||
|     path.write_text(json.dumps(data, indent=2)) |     path.write_text(json.dumps(data, indent=2)) | ||||||
| 
 | 
 | ||||||
| def _render_html(interval: str, json_name: str, out_path: Path) -> None: | def _render_html(interval: str, reports: List[Dict], out_path: Path) -> None: | ||||||
|     env = Environment(loader=FileSystemLoader(TEMPLATE_DIR)) |     env = Environment(loader=FileSystemLoader(TEMPLATE_DIR)) | ||||||
|     template = env.get_template("report.html") |     template = env.get_template("report.html") | ||||||
|     out_path.write_text(template.render(interval=interval, json_path=json_name)) |     out_path.write_text(template.render(interval=interval, reports=reports)) | ||||||
| 
 | 
 | ||||||
| def _aggregate(interval: str, fmt: str) -> None: | def _generate_interval(interval: str) -> None: | ||||||
|     json_path = OUTPUT_DIR / f"{interval}.json" |     cfg = _load_config() | ||||||
|     html_path = OUTPUT_DIR / f"{interval}.html" |     defs = [d for d in cfg if d.get("interval") == interval] | ||||||
| 
 |     if not defs: | ||||||
|     existing = _load_existing(json_path) |         typer.echo(f"No reports defined for {interval}") | ||||||
|     last_bucket = existing[-1]["bucket"] if existing else None |         return | ||||||
| 
 | 
 | ||||||
|     conn = sqlite3.connect(DB_PATH) |     conn = sqlite3.connect(DB_PATH) | ||||||
|     cur = conn.cursor() |     cur = conn.cursor() | ||||||
| 
 | 
 | ||||||
|     query = f"SELECT strftime('{fmt}', datetime(time)) as bucket, COUNT(*) as hits FROM logs" |     out_dir = OUTPUT_DIR / interval | ||||||
|     params = [] |     out_dir.mkdir(parents=True, exist_ok=True) | ||||||
|     if last_bucket: |  | ||||||
|         query += " WHERE datetime(time) > datetime(?)" |  | ||||||
|         params.append(last_bucket) |  | ||||||
|     query += " GROUP BY bucket ORDER BY bucket" |  | ||||||
| 
 | 
 | ||||||
|     rows = cur.execute(query, params).fetchall() |     report_list = [] | ||||||
|     for bucket, hits in rows: |     for definition in defs: | ||||||
|         existing.append({"bucket": bucket, "hits": hits}) |         name = definition["name"] | ||||||
|  |         query = definition["query"] | ||||||
|  |         cur.execute(query) | ||||||
|  |         rows = cur.fetchall() | ||||||
|  |         headers = [c[0] for c in cur.description] | ||||||
|  |         data = [dict(zip(headers, row)) for row in rows] | ||||||
|  |         json_path = out_dir / f"{name}.json" | ||||||
|  |         _save_json(json_path, data) | ||||||
|  |         report_list.append({ | ||||||
|  |             "name": name, | ||||||
|  |             "label": definition.get("label", name.title()), | ||||||
|  |             "chart": definition.get("chart", "line"), | ||||||
|  |             "json": f"{name}.json", | ||||||
|  |         }) | ||||||
| 
 | 
 | ||||||
|     existing.sort(key=lambda x: x["bucket"]) |     _save_json(out_dir / "reports.json", report_list) | ||||||
|     _save_json(json_path, existing) |     _render_html(interval, report_list, out_dir / "index.html") | ||||||
|     _render_html(interval, json_path.name, html_path) |     typer.echo(f"Generated {interval} reports") | ||||||
|     typer.echo(f"Generated {json_path} and {html_path}") |  | ||||||
| 
 | 
 | ||||||
| @app.command() | @app.command() | ||||||
| def hourly() -> None: | def hourly() -> None: | ||||||
|     """Aggregate logs into hourly buckets.""" |     """Generate hourly reports.""" | ||||||
|     _aggregate("hourly", "%Y-%m-%d %H:00:00") |     _generate_interval("hourly") | ||||||
| 
 | 
 | ||||||
| @app.command() | @app.command() | ||||||
| def daily() -> None: | def daily() -> None: | ||||||
|     """Aggregate logs into daily buckets.""" |     """Generate daily reports.""" | ||||||
|     _aggregate("daily", "%Y-%m-%d") |     _generate_interval("daily") | ||||||
| 
 | 
 | ||||||
| @app.command() | @app.command() | ||||||
| def weekly() -> None: | def weekly() -> None: | ||||||
|     """Aggregate logs into weekly buckets.""" |     """Generate weekly reports.""" | ||||||
|     _aggregate("weekly", "%Y-%W") |     _generate_interval("weekly") | ||||||
| 
 | 
 | ||||||
| @app.command() | @app.command() | ||||||
| def monthly() -> None: | def monthly() -> None: | ||||||
|     """Aggregate logs into monthly buckets.""" |     """Generate monthly reports.""" | ||||||
|     _aggregate("monthly", "%Y-%m") |     _generate_interval("monthly") | ||||||
| 
 | 
 | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|     app() |     app() | ||||||
|  |  | ||||||
|  | @ -9,25 +9,32 @@ | ||||||
| <body class="section"> | <body class="section"> | ||||||
|   <div class="container"> |   <div class="container"> | ||||||
|     <h1 class="title">{{ interval.title() }} Report</h1> |     <h1 class="title">{{ interval.title() }} Report</h1> | ||||||
|     <canvas id="chart"></canvas> |     {% for report in reports %} | ||||||
|  |     <div class="box"> | ||||||
|  |       <h2 class="subtitle">{{ report.label }}</h2> | ||||||
|  |       <canvas id="chart-{{ report.name }}"></canvas> | ||||||
|  |     </div> | ||||||
|  |     {% endfor %} | ||||||
|   </div> |   </div> | ||||||
|   <script> |   <script> | ||||||
|     fetch('{{ json_path }}') |     const reports = {{ reports | tojson }}; | ||||||
|  |     reports.forEach(rep => { | ||||||
|  |       fetch(rep.json) | ||||||
|         .then(r => r.json()) |         .then(r => r.json()) | ||||||
|         .then(data => { |         .then(data => { | ||||||
|           const labels = data.map(x => x.bucket); |           const labels = data.map(x => x.bucket); | ||||||
|         const hits = data.map(x => x.hits); |           const values = data.map(x => x.value); | ||||||
|         new Chart(document.getElementById('chart'), { |           new Chart(document.getElementById('chart-' + rep.name), { | ||||||
|           type: '{{ 'bar' if interval == 'hourly' else 'line' }}', |             type: rep.chart, | ||||||
|             data: { |             data: { | ||||||
|               labels: labels, |               labels: labels, | ||||||
|               datasets: [{ |               datasets: [{ | ||||||
|               label: 'Hits', |                 label: rep.label, | ||||||
|               data: hits, |                 data: values, | ||||||
|                 backgroundColor: 'rgba(54, 162, 235, 0.5)', |                 backgroundColor: 'rgba(54, 162, 235, 0.5)', | ||||||
|                 borderColor: 'rgba(54, 162, 235, 1)', |                 borderColor: 'rgba(54, 162, 235, 1)', | ||||||
|                 borderWidth: 1, |                 borderWidth: 1, | ||||||
|               fill: true, |                 fill: rep.chart !== 'bar', | ||||||
|               }] |               }] | ||||||
|             }, |             }, | ||||||
|             options: { |             options: { | ||||||
|  | @ -37,6 +44,7 @@ | ||||||
|             } |             } | ||||||
|           }); |           }); | ||||||
|         }); |         }); | ||||||
|  |     }); | ||||||
|   </script> |   </script> | ||||||
| </body> | </body> | ||||||
| </html> | </html> | ||||||
|  |  | ||||||
							
								
								
									
										87
									
								
								tests/test_reports.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								tests/test_reports.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,87 @@ | ||||||
|  | import sqlite3 | ||||||
|  | from pathlib import Path | ||||||
|  | import json | ||||||
|  | import sys | ||||||
|  | 
 | ||||||
|  | import pytest | ||||||
|  | 
 | ||||||
|  | REPO_ROOT = Path(__file__).resolve().parents[1] | ||||||
|  | sys.path.append(str(REPO_ROOT)) | ||||||
|  | from scripts import generate_reports as gr | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def setup_db(path: Path): | ||||||
|  |     path.parent.mkdir(parents=True, exist_ok=True) | ||||||
|  |     conn = sqlite3.connect(path) | ||||||
|  |     cur = conn.cursor() | ||||||
|  |     cur.execute( | ||||||
|  |         """ | ||||||
|  |         CREATE TABLE logs ( | ||||||
|  |             id INTEGER PRIMARY KEY, | ||||||
|  |             ip TEXT, | ||||||
|  |             host TEXT, | ||||||
|  |             time TEXT, | ||||||
|  |             request TEXT, | ||||||
|  |             status INTEGER, | ||||||
|  |             bytes_sent INTEGER, | ||||||
|  |             referer TEXT, | ||||||
|  |             user_agent TEXT, | ||||||
|  |             cache_status TEXT | ||||||
|  |         ) | ||||||
|  |         """ | ||||||
|  |     ) | ||||||
|  |     cur.execute( | ||||||
|  |         "INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", | ||||||
|  |         ("127.0.0.1", "example.com", "2024-01-01 10:00:00", "GET / HTTP/1.1", 200, 100, "-", "curl", "MISS"), | ||||||
|  |     ) | ||||||
|  |     cur.execute( | ||||||
|  |         "INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", | ||||||
|  |         ("127.0.0.1", "example.com", "2024-01-01 10:05:00", "GET /err HTTP/1.1", 500, 100, "-", "curl", "MISS"), | ||||||
|  |     ) | ||||||
|  |     conn.commit() | ||||||
|  |     conn.close() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @pytest.fixture() | ||||||
|  | def sample_reports(tmp_path): | ||||||
|  |     cfg = tmp_path / "reports.yml" | ||||||
|  |     cfg.write_text( | ||||||
|  |         """ | ||||||
|  | - name: hits | ||||||
|  |   interval: hourly | ||||||
|  |   query: | | ||||||
|  |     SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, COUNT(*) AS value | ||||||
|  |     FROM logs | ||||||
|  |     GROUP BY bucket | ||||||
|  |     ORDER BY bucket | ||||||
|  | - name: error_rate | ||||||
|  |   interval: hourly | ||||||
|  |   query: | | ||||||
|  |     SELECT strftime('%Y-%m-%d %H:00:00', datetime(time)) AS bucket, | ||||||
|  |            SUM(CASE WHEN status >= 400 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value | ||||||
|  |     FROM logs | ||||||
|  |     GROUP BY bucket | ||||||
|  |     ORDER BY bucket | ||||||
|  | """ | ||||||
|  |     ) | ||||||
|  |     return cfg | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_generate_interval(tmp_path, sample_reports, monkeypatch): | ||||||
|  |     db_path = tmp_path / "database" / "ngxstat.db" | ||||||
|  |     setup_db(db_path) | ||||||
|  | 
 | ||||||
|  |     monkeypatch.setattr(gr, "DB_PATH", db_path) | ||||||
|  |     monkeypatch.setattr(gr, "OUTPUT_DIR", tmp_path / "output") | ||||||
|  |     monkeypatch.setattr(gr, "REPORT_CONFIG", sample_reports) | ||||||
|  |     monkeypatch.setattr(gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates") | ||||||
|  | 
 | ||||||
|  |     gr._generate_interval("hourly") | ||||||
|  | 
 | ||||||
|  |     hits = json.loads((tmp_path / "output" / "hourly" / "hits.json").read_text()) | ||||||
|  |     assert hits[0]["value"] == 2 | ||||||
|  |     error_rate = json.loads((tmp_path / "output" / "hourly" / "error_rate.json").read_text()) | ||||||
|  |     assert error_rate[0]["value"] == pytest.approx(50.0) | ||||||
|  |     reports = json.loads((tmp_path / "output" / "hourly" / "reports.json").read_text()) | ||||||
|  |     assert {r["name"] for r in reports} == {"hits", "error_rate"} | ||||||
|  | 
 | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue