Merge pull request #46 from wagesj45/codex/add-report-generation-time-statistics

Add report generation timing stats
This commit is contained in:
Jordan Wages 2025-07-19 04:50:22 -05:00 committed by GitHub
commit 7271da95ac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 33 additions and 17 deletions

View file

@ -3,6 +3,8 @@ import sqlite3
from pathlib import Path
import shutil
from typing import List, Dict, Optional
from datetime import datetime
import time
import yaml
@ -74,7 +76,9 @@ def _render_snippet(report: Dict, out_dir: Path) -> None:
snippet_path.write_text(template.render(report=report))
def _write_stats() -> None:
def _write_stats(
generated_at: Optional[str] = None, generation_seconds: Optional[float] = None
) -> None:
"""Query basic dataset stats and write them to ``output/global/stats.json``."""
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
@ -98,6 +102,10 @@ def _write_stats() -> None:
"end_date": end_date,
"unique_domains": unique_domains,
}
if generated_at:
stats["generated_at"] = generated_at
if generation_seconds is not None:
stats["generation_seconds"] = generation_seconds
out_path = OUTPUT_DIR / "global" / "stats.json"
_save_json(out_path, stats)
@ -197,14 +205,9 @@ def _generate_root_index() -> None:
"""Render the top-level index listing all intervals and domains."""
_copy_icons()
intervals = sorted(
[
name
for name in INTERVAL_FORMATS
if (OUTPUT_DIR / name).is_dir()
]
[name for name in INTERVAL_FORMATS if (OUTPUT_DIR / name).is_dir()]
)
domains_dir = OUTPUT_DIR / "domains"
domains: List[str] = []
if domains_dir.is_dir():
@ -227,6 +230,9 @@ def _generate_global() -> None:
typer.echo("No report definitions found")
return
start_time = time.time()
generated_at = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
_copy_icons()
conn = sqlite3.connect(DB_PATH)
@ -269,7 +275,8 @@ def _generate_global() -> None:
report_list.append(entry)
_save_json(out_dir / "reports.json", report_list)
_write_stats()
elapsed = round(time.time() - start_time, 2)
_write_stats(generated_at, elapsed)
typer.echo("Generated global reports")

View file

@ -49,6 +49,8 @@
<p>Total logs: <span id="stat-total">-</span></p>
<p>Date range: <span id="stat-start">-</span> to <span id="stat-end">-</span></p>
<p>Unique domains: <span id="stat-domains">-</span></p>
<p>Last generated: <span id="stat-generated">-</span></p>
<p>Generation time: <span id="stat-elapsed">-</span> seconds</p>
</div>
<div id="overview-reports"></div>
</div>
@ -96,6 +98,8 @@
const startElem = document.getElementById('stat-start');
const endElem = document.getElementById('stat-end');
const domainsElem = document.getElementById('stat-domains');
const generatedElem = document.getElementById('stat-generated');
const elapsedElem = document.getElementById('stat-elapsed');
let currentInterval = intervalSelect.value;
let currentDomain = domainSelect.value;
@ -161,6 +165,9 @@
startElem.textContent = stats.start_date;
endElem.textContent = stats.end_date;
domainsElem.textContent = stats.unique_domains;
generatedElem.textContent = stats.generated_at || '-';
elapsedElem.textContent =
stats.generation_seconds !== undefined ? stats.generation_seconds : '-';
});
}

View file

@ -148,20 +148,19 @@ def test_generate_interval_domain_filter(tmp_path, sample_reports, monkeypatch):
gr._generate_interval("hourly", "example.com")
hits = json.loads(
(tmp_path / "output" / "domains" / "example.com" / "hourly" / "hits.json").read_text()
(
tmp_path / "output" / "domains" / "example.com" / "hourly" / "hits.json"
).read_text()
)
assert hits[0]["value"] == 2
reports = json.loads(
(tmp_path / "output" / "domains" / "example.com" / "hourly" / "reports.json").read_text()
(
tmp_path / "output" / "domains" / "example.com" / "hourly" / "reports.json"
).read_text()
)
assert {r["name"] for r in reports} == {"hits", "error_rate"}
assert not (
tmp_path
/ "output"
/ "domains"
/ "example.com"
/ "hourly"
/ "skip_report.json"
tmp_path / "output" / "domains" / "example.com" / "hourly" / "skip_report.json"
).exists()
@ -245,9 +244,12 @@ def test_global_stats_file(tmp_path, sample_reports, monkeypatch):
"start_date",
"end_date",
"unique_domains",
"generated_at",
"generation_seconds",
}
assert stats["total_logs"] == 2
assert stats["start_date"] == "2024-01-01 10:00:00"
assert stats["end_date"] == "2024-01-01 10:05:00"
assert stats["unique_domains"] == 1
assert isinstance(stats["generated_at"], str)
assert stats["generation_seconds"] >= 0