Add report generation timing stats #46

Merged
wagesj45 merged 1 commit from codex/add-report-generation-time-statistics into main 2025-07-19 04:50:22 -05:00
3 changed files with 33 additions and 17 deletions
Showing only changes of commit 669b51e03c - Show all commits

Add report generation timing stats

Jordan Wages 2025-07-19 04:50:09 -05:00

View file

@ -3,6 +3,8 @@ import sqlite3
from pathlib import Path from pathlib import Path
import shutil import shutil
from typing import List, Dict, Optional from typing import List, Dict, Optional
from datetime import datetime
import time
import yaml import yaml
@ -74,7 +76,9 @@ def _render_snippet(report: Dict, out_dir: Path) -> None:
snippet_path.write_text(template.render(report=report)) snippet_path.write_text(template.render(report=report))
def _write_stats() -> None: def _write_stats(
generated_at: Optional[str] = None, generation_seconds: Optional[float] = None
) -> None:
"""Query basic dataset stats and write them to ``output/global/stats.json``.""" """Query basic dataset stats and write them to ``output/global/stats.json``."""
conn = sqlite3.connect(DB_PATH) conn = sqlite3.connect(DB_PATH)
cur = conn.cursor() cur = conn.cursor()
@ -98,6 +102,10 @@ def _write_stats() -> None:
"end_date": end_date, "end_date": end_date,
"unique_domains": unique_domains, "unique_domains": unique_domains,
} }
if generated_at:
stats["generated_at"] = generated_at
if generation_seconds is not None:
stats["generation_seconds"] = generation_seconds
out_path = OUTPUT_DIR / "global" / "stats.json" out_path = OUTPUT_DIR / "global" / "stats.json"
_save_json(out_path, stats) _save_json(out_path, stats)
@ -197,14 +205,9 @@ def _generate_root_index() -> None:
"""Render the top-level index listing all intervals and domains.""" """Render the top-level index listing all intervals and domains."""
_copy_icons() _copy_icons()
intervals = sorted( intervals = sorted(
[ [name for name in INTERVAL_FORMATS if (OUTPUT_DIR / name).is_dir()]
name
for name in INTERVAL_FORMATS
if (OUTPUT_DIR / name).is_dir()
]
) )
domains_dir = OUTPUT_DIR / "domains" domains_dir = OUTPUT_DIR / "domains"
domains: List[str] = [] domains: List[str] = []
if domains_dir.is_dir(): if domains_dir.is_dir():
@ -227,6 +230,9 @@ def _generate_global() -> None:
typer.echo("No report definitions found") typer.echo("No report definitions found")
return return
start_time = time.time()
generated_at = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
_copy_icons() _copy_icons()
conn = sqlite3.connect(DB_PATH) conn = sqlite3.connect(DB_PATH)
@ -269,7 +275,8 @@ def _generate_global() -> None:
report_list.append(entry) report_list.append(entry)
_save_json(out_dir / "reports.json", report_list) _save_json(out_dir / "reports.json", report_list)
_write_stats() elapsed = round(time.time() - start_time, 2)
_write_stats(generated_at, elapsed)
typer.echo("Generated global reports") typer.echo("Generated global reports")

View file

@ -49,6 +49,8 @@
<p>Total logs: <span id="stat-total">-</span></p> <p>Total logs: <span id="stat-total">-</span></p>
<p>Date range: <span id="stat-start">-</span> to <span id="stat-end">-</span></p> <p>Date range: <span id="stat-start">-</span> to <span id="stat-end">-</span></p>
<p>Unique domains: <span id="stat-domains">-</span></p> <p>Unique domains: <span id="stat-domains">-</span></p>
<p>Last generated: <span id="stat-generated">-</span></p>
<p>Generation time: <span id="stat-elapsed">-</span> seconds</p>
</div> </div>
<div id="overview-reports"></div> <div id="overview-reports"></div>
</div> </div>
@ -96,6 +98,8 @@
const startElem = document.getElementById('stat-start'); const startElem = document.getElementById('stat-start');
const endElem = document.getElementById('stat-end'); const endElem = document.getElementById('stat-end');
const domainsElem = document.getElementById('stat-domains'); const domainsElem = document.getElementById('stat-domains');
const generatedElem = document.getElementById('stat-generated');
const elapsedElem = document.getElementById('stat-elapsed');
let currentInterval = intervalSelect.value; let currentInterval = intervalSelect.value;
let currentDomain = domainSelect.value; let currentDomain = domainSelect.value;
@ -161,6 +165,9 @@
startElem.textContent = stats.start_date; startElem.textContent = stats.start_date;
endElem.textContent = stats.end_date; endElem.textContent = stats.end_date;
domainsElem.textContent = stats.unique_domains; domainsElem.textContent = stats.unique_domains;
generatedElem.textContent = stats.generated_at || '-';
elapsedElem.textContent =
stats.generation_seconds !== undefined ? stats.generation_seconds : '-';
}); });
} }

View file

@ -148,20 +148,19 @@ def test_generate_interval_domain_filter(tmp_path, sample_reports, monkeypatch):
gr._generate_interval("hourly", "example.com") gr._generate_interval("hourly", "example.com")
hits = json.loads( hits = json.loads(
(tmp_path / "output" / "domains" / "example.com" / "hourly" / "hits.json").read_text() (
tmp_path / "output" / "domains" / "example.com" / "hourly" / "hits.json"
).read_text()
) )
assert hits[0]["value"] == 2 assert hits[0]["value"] == 2
reports = json.loads( reports = json.loads(
(tmp_path / "output" / "domains" / "example.com" / "hourly" / "reports.json").read_text() (
tmp_path / "output" / "domains" / "example.com" / "hourly" / "reports.json"
).read_text()
) )
assert {r["name"] for r in reports} == {"hits", "error_rate"} assert {r["name"] for r in reports} == {"hits", "error_rate"}
assert not ( assert not (
tmp_path tmp_path / "output" / "domains" / "example.com" / "hourly" / "skip_report.json"
/ "output"
/ "domains"
/ "example.com"
/ "hourly"
/ "skip_report.json"
).exists() ).exists()
@ -245,9 +244,12 @@ def test_global_stats_file(tmp_path, sample_reports, monkeypatch):
"start_date", "start_date",
"end_date", "end_date",
"unique_domains", "unique_domains",
"generated_at",
"generation_seconds",
} }
assert stats["total_logs"] == 2 assert stats["total_logs"] == 2
assert stats["start_date"] == "2024-01-01 10:00:00" assert stats["start_date"] == "2024-01-01 10:00:00"
assert stats["end_date"] == "2024-01-01 10:05:00" assert stats["end_date"] == "2024-01-01 10:05:00"
assert stats["unique_domains"] == 1 assert stats["unique_domains"] == 1
assert isinstance(stats["generated_at"], str)
assert stats["generation_seconds"] >= 0