From 8abd5dedacde708b60354638919d6f1c130df002 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Fri, 18 Jul 2025 22:25:25 -0500 Subject: [PATCH 01/49] Refactor UI with dropdown selectors --- templates/index.html | 97 ++++++++++++++++++------------------------- tests/test_reports.py | 12 +++--- 2 files changed, 46 insertions(+), 63 deletions(-) diff --git a/templates/index.html b/templates/index.html index c4f63d1..222e347 100644 --- a/templates/index.html +++ b/templates/index.html @@ -8,81 +8,64 @@

ngxstat Reports

-
-
-
- +
+ + + diff --git a/templates/report.html b/templates/report.html deleted file mode 100644 index 6e1dd37..0000000 --- a/templates/report.html +++ /dev/null @@ -1,84 +0,0 @@ - - - - - {{ interval.title() }} Report - - - - - - - -
-

{{ interval.title() }} Report

- {% for report in reports %} -
-

{{ report.label }}

- {% if report.chart == 'table' %} -
- {% else %} - - {% endif %} -
- {% endfor %} -
- - - diff --git a/templates/report_snippet.html b/templates/report_snippet.html new file mode 100644 index 0000000..c0a69d6 --- /dev/null +++ b/templates/report_snippet.html @@ -0,0 +1,8 @@ +
+

{{ report.label }}

+ {% if report.chart == 'table' %} +
+ {% else %} + + {% endif %} +
diff --git a/tests/test_reports.py b/tests/test_reports.py index dfd2c6d..f1be1d0 100644 --- a/tests/test_reports.py +++ b/tests/test_reports.py @@ -106,6 +106,9 @@ def test_generate_interval(tmp_path, sample_reports, monkeypatch): assert error_rate[0]["value"] == pytest.approx(50.0) reports = json.loads((tmp_path / "output" / "hourly" / "reports.json").read_text()) assert {r["name"] for r in reports} == {"hits", "error_rate"} + for r in reports: + snippet = tmp_path / "output" / "hourly" / r["html"] + assert snippet.exists() def test_generate_interval_domain_filter(tmp_path, sample_reports, monkeypatch): From a3f06fd9e2b979699f13ba97fa00d108d07e3b08 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 00:09:26 -0500 Subject: [PATCH 05/49] Add global report generation --- run-reports.sh | 1 + scripts/generate_reports.py | 54 +++++++++++++++++++++++++++++++++++++ tests/test_reports.py | 28 +++++++++++++++++++ 3 files changed, 83 insertions(+) diff --git a/run-reports.sh b/run-reports.sh index ade36c6..a0d718f 100755 --- a/run-reports.sh +++ b/run-reports.sh @@ -24,6 +24,7 @@ python scripts/generate_reports.py hourly python scripts/generate_reports.py daily python scripts/generate_reports.py weekly python scripts/generate_reports.py monthly +python scripts/generate_reports.py global # Generate reports for each individual domain echo "[INFO] Generating per-domain reports..." diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index 8b9fad6..e7c42cb 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -101,6 +101,10 @@ def _generate_interval(interval: str, domain: Optional[str] = None) -> None: report_list = [] for definition in cfg: + if "{bucket}" not in definition["query"] or definition.get("global"): + # Global reports are generated separately + continue + name = definition["name"] query = definition["query"].replace("{bucket}", bucket) query = query.replace("FROM logs", "FROM logs_view") @@ -154,6 +158,50 @@ def _generate_root_index() -> None: typer.echo(f"Generated root index at {out_path}") +def _generate_global() -> None: + """Generate reports that do not depend on an interval.""" + cfg = _load_config() + if not cfg: + typer.echo("No report definitions found") + return + + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + out_dir = OUTPUT_DIR / "global" + out_dir.mkdir(parents=True, exist_ok=True) + + report_list = [] + for definition in cfg: + if "{bucket}" in definition["query"] and not definition.get("global"): + continue + + name = definition["name"] + query = definition["query"] + cur.execute(query) + rows = cur.fetchall() + headers = [c[0] for c in cur.description] + data = [dict(zip(headers, row)) for row in rows] + json_path = out_dir / f"{name}.json" + _save_json(json_path, data) + entry = { + "name": name, + "label": definition.get("label", name.title()), + "chart": definition.get("chart", "line"), + "json": f"{name}.json", + "html": f"{name}.html", + } + if "color" in definition: + entry["color"] = definition["color"] + if "colors" in definition: + entry["colors"] = definition["colors"] + _render_snippet(entry, out_dir) + report_list.append(entry) + + _save_json(out_dir / "reports.json", report_list) + typer.echo("Generated global reports") + + @app.command() def hourly( domain: Optional[str] = typer.Option( @@ -218,6 +266,12 @@ def monthly( _generate_interval("monthly", domain) +@app.command("global") +def global_reports() -> None: + """Generate global reports.""" + _generate_global() + + @app.command() def index() -> None: """Generate the root index page linking all reports.""" diff --git a/tests/test_reports.py b/tests/test_reports.py index f1be1d0..d8259f2 100644 --- a/tests/test_reports.py +++ b/tests/test_reports.py @@ -80,6 +80,14 @@ def sample_reports(tmp_path): FROM logs GROUP BY bucket ORDER BY bucket +- name: domain_totals + global: true + query: | + SELECT host AS bucket, + COUNT(*) AS value + FROM logs + GROUP BY host + ORDER BY value DESC """ ) return cfg @@ -161,3 +169,23 @@ def test_generate_root_index(tmp_path, sample_reports, monkeypatch): # check for domain options assert '
+
+

Overview

+

Total logs: -

+

Date range: - to -

+

Unique domains: -

+
@@ -42,6 +48,10 @@ const intervalSelect = document.getElementById('interval-select'); const domainSelect = document.getElementById('domain-select'); const container = document.getElementById('reports-container'); + const totalElem = document.getElementById('stat-total'); + const startElem = document.getElementById('stat-start'); + const endElem = document.getElementById('stat-end'); + const domainsElem = document.getElementById('stat-domains'); let currentInterval = intervalSelect.value; let currentDomain = domainSelect.value; @@ -97,6 +107,17 @@ }); } + function loadStats() { + fetch('global/stats.json') + .then(r => r.json()) + .then(stats => { + totalElem.textContent = stats.total_logs; + startElem.textContent = stats.start_date; + endElem.textContent = stats.end_date; + domainsElem.textContent = stats.unique_domains; + }); + } + function loadReports() { let path = currentInterval; if (currentDomain) { @@ -128,6 +149,7 @@ }); loadReports(); + loadStats(); feather.replace(); diff --git a/tests/test_reports.py b/tests/test_reports.py index d8259f2..8b4bad1 100644 --- a/tests/test_reports.py +++ b/tests/test_reports.py @@ -189,3 +189,31 @@ def test_global_reports_once(tmp_path, sample_reports, monkeypatch): assert global_snippet.exists() assert not (tmp_path / "output" / "hourly" / "domain_totals.html").exists() + +def test_global_stats_file(tmp_path, sample_reports, monkeypatch): + db_path = tmp_path / "database" / "ngxstat.db" + setup_db(db_path) + + monkeypatch.setattr(gr, "DB_PATH", db_path) + monkeypatch.setattr(gr, "OUTPUT_DIR", tmp_path / "output") + monkeypatch.setattr(gr, "REPORT_CONFIG", sample_reports) + monkeypatch.setattr( + gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates" + ) + + gr._generate_global() + + stats_path = tmp_path / "output" / "global" / "stats.json" + assert stats_path.exists() + stats = json.loads(stats_path.read_text()) + assert set(stats.keys()) == { + "total_logs", + "start_date", + "end_date", + "unique_domains", + } + assert stats["total_logs"] == 2 + assert stats["start_date"] == "2024-01-01 10:00:00" + assert stats["end_date"] == "2024-01-01 10:05:00" + assert stats["unique_domains"] == 1 + From e03c7bc434a7319c2607df6ba926626c0267ce09 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 00:21:42 -0500 Subject: [PATCH 07/49] Add per_domain flag to report config --- reports.yml | 1 + scripts/generate_reports.py | 3 +++ tests/test_reports.py | 29 ++++++++++++++++++++++++++++- 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/reports.yml b/reports.yml index 0df9913..1a6f5ef 100644 --- a/reports.yml +++ b/reports.yml @@ -38,6 +38,7 @@ - name: domain_traffic label: Top Domains chart: table + per_domain: false query: | SELECT host AS bucket, COUNT(*) AS value diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index b92a54c..d934b8e 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -133,6 +133,9 @@ def _generate_interval(interval: str, domain: Optional[str] = None) -> None: if "{bucket}" not in definition["query"] or definition.get("global"): # Global reports are generated separately continue + if domain and not definition.get("per_domain", True): + # Skip reports marked as not applicable to per-domain runs + continue name = definition["name"] query = definition["query"].replace("{bucket}", bucket) diff --git a/tests/test_reports.py b/tests/test_reports.py index 8b4bad1..4a537cb 100644 --- a/tests/test_reports.py +++ b/tests/test_reports.py @@ -80,6 +80,21 @@ def sample_reports(tmp_path): FROM logs GROUP BY bucket ORDER BY bucket +- name: domain_traffic + per_domain: false + query: | + SELECT host AS bucket, + COUNT(*) AS value + FROM logs + GROUP BY host + ORDER BY value DESC +- name: skip_report + per_domain: false + query: | + SELECT {bucket} AS bucket, COUNT(*) AS value + FROM logs + GROUP BY bucket + ORDER BY bucket - name: domain_totals global: true query: | @@ -113,7 +128,7 @@ def test_generate_interval(tmp_path, sample_reports, monkeypatch): ) assert error_rate[0]["value"] == pytest.approx(50.0) reports = json.loads((tmp_path / "output" / "hourly" / "reports.json").read_text()) - assert {r["name"] for r in reports} == {"hits", "error_rate"} + assert {r["name"] for r in reports} == {"hits", "error_rate", "skip_report"} for r in reports: snippet = tmp_path / "output" / "hourly" / r["html"] assert snippet.exists() @@ -136,6 +151,18 @@ def test_generate_interval_domain_filter(tmp_path, sample_reports, monkeypatch): (tmp_path / "output" / "domains" / "example.com" / "hourly" / "hits.json").read_text() ) assert hits[0]["value"] == 2 + reports = json.loads( + (tmp_path / "output" / "domains" / "example.com" / "hourly" / "reports.json").read_text() + ) + assert {r["name"] for r in reports} == {"hits", "error_rate"} + assert not ( + tmp_path + / "output" + / "domains" + / "example.com" + / "hourly" + / "skip_report.json" + ).exists() def test_generate_root_index(tmp_path, sample_reports, monkeypatch): From b67266d3b323fe4748e4be0a2c91b7f282c0772a Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 00:26:00 -0500 Subject: [PATCH 08/49] Add tabbed interface to dashboard --- templates/index.html | 94 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 80 insertions(+), 14 deletions(-) diff --git a/templates/index.html b/templates/index.html index 6294602..98e76ac 100644 --- a/templates/index.html +++ b/templates/index.html @@ -9,8 +9,17 @@

ngxstat Reports

-
-
+ + + +
+ -
-

Overview

-

Total logs: -

-

Date range: - to -

-

Unique domains: -

+ +
+
+

Overview

+

Total logs: -

+

Date range: - to -

+

Unique domains: -

+
+
+
+ + + + -
@@ -47,7 +67,19 @@ From f6b04e7e2e4decfc4e65e8728117c703b6150459 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 00:43:25 -0500 Subject: [PATCH 09/49] Exclude global directory from interval dropdown --- scripts/generate_reports.py | 6 +++++- tests/test_reports.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index d934b8e..cd9395b 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -172,7 +172,11 @@ def _generate_all_domains(interval: str) -> None: def _generate_root_index() -> None: """Render the top-level index listing all intervals and domains.""" - intervals = [p.name for p in OUTPUT_DIR.iterdir() if p.is_dir() and p.name != "domains"] + intervals = [ + p.name + for p in OUTPUT_DIR.iterdir() + if p.is_dir() and p.name not in {"domains", "global"} + ] intervals.sort() domains_dir = OUTPUT_DIR / "domains" diff --git a/tests/test_reports.py b/tests/test_reports.py index 4a537cb..64fcc2d 100644 --- a/tests/test_reports.py +++ b/tests/test_reports.py @@ -192,6 +192,7 @@ def test_generate_root_index(tmp_path, sample_reports, monkeypatch): # check for interval options assert '
@@ -56,9 +57,15 @@
- + + +
@@ -73,13 +80,19 @@ const sections = { overview: document.getElementById('overview-section'), all: document.getElementById('all-section'), - domain: document.getElementById('domain-section') + domain: document.getElementById('domain-section'), + analysis: document.getElementById('analysis-section') }; const containers = { overview: document.getElementById('overview-reports'), all: document.getElementById('reports-all'), domain: document.getElementById('reports-domain') }; + const analysisElems = { + missing: document.getElementById('analysis-missing'), + cache: document.getElementById('analysis-cache'), + threats: document.getElementById('analysis-threats') + }; const totalElem = document.getElementById('stat-total'); const startElem = document.getElementById('stat-start'); const endElem = document.getElementById('stat-end'); @@ -169,19 +182,99 @@ path = 'domains/' + encodeURIComponent(currentDomain) + '/' + currentInterval; } - fetch(path + '/reports.json') - .then(r => r.json()) - .then(reports => { - container.innerHTML = ''; - reports.forEach(rep => { - fetch(path + '/' + rep.html) - .then(r => r.text()) - .then(html => { - container.insertAdjacentHTML('beforeend', html); - initReport(rep, path); - }); + fetch(path + '/reports.json') + .then(r => r.json()) + .then(reports => { + container.innerHTML = ''; + reports.forEach(rep => { + fetch(path + '/' + rep.html) + .then(r => r.text()) + .then(html => { + container.insertAdjacentHTML('beforeend', html); + initReport(rep, path); + }); + }); + feather.replace(); }); - feather.replace(); + } + + function loadAnalysis() { + analysisElems.missing.innerHTML = '

Missing Domains

'; + analysisElems.cache.innerHTML = '

Cache Suggestions

'; + analysisElems.threats.innerHTML = '

Threat Report

'; + + fetch('analysis/missing_domains.json') + .then(r => r.json()) + .then(list => { + if (list.length === 0) { + analysisElems.missing.insertAdjacentHTML('beforeend', '

None

'); + return; + } + const items = list.map(d => `
  • ${d}
  • `).join(''); + analysisElems.missing.insertAdjacentHTML('beforeend', `
      ${items}
    `); + }); + + fetch('analysis/cache_suggestions.json') + .then(r => r.json()) + .then(data => { + if (data.length === 0) { + analysisElems.cache.insertAdjacentHTML('beforeend', '

    No suggestions

    '); + return; + } + analysisElems.cache.insertAdjacentHTML('beforeend', '
    '); + const rows = data.map(x => [x.host, x.path, x.misses]); + new DataTable('#table-cache', { + data: rows, + columns: [ + { title: 'Domain' }, + { title: 'Path' }, + { title: 'Misses' } + ] + }); + }); + + fetch('analysis/threat_report.json') + .then(r => r.json()) + .then(rep => { + const hasData = rep.error_spikes?.length || rep.suspicious_agents?.length || rep.high_ip_requests?.length; + if (!hasData) { + analysisElems.threats.insertAdjacentHTML('beforeend', '

    No threats detected

    '); + return; + } + if (rep.error_spikes && rep.error_spikes.length) { + analysisElems.threats.insertAdjacentHTML('beforeend', '

    Error Spikes

    '); + const rows = rep.error_spikes.map(x => [x.host, x.recent_error_rate, x.previous_error_rate]); + new DataTable('#table-errors', { + data: rows, + columns: [ + { title: 'Domain' }, + { title: 'Recent %' }, + { title: 'Previous %' } + ] + }); + } + if (rep.suspicious_agents && rep.suspicious_agents.length) { + analysisElems.threats.insertAdjacentHTML('beforeend', '

    Suspicious User Agents

    '); + const rows = rep.suspicious_agents.map(x => [x.user_agent, x.requests]); + new DataTable('#table-agents', { + data: rows, + columns: [ + { title: 'User Agent' }, + { title: 'Requests' } + ] + }); + } + if (rep.high_ip_requests && rep.high_ip_requests.length) { + analysisElems.threats.insertAdjacentHTML('beforeend', '

    High IP Requests

    '); + const rows = rep.high_ip_requests.map(x => [x.ip, x.requests]); + new DataTable('#table-ips', { + data: rows, + columns: [ + { title: 'IP' }, + { title: 'Requests' } + ] + }); + } }); } @@ -198,7 +291,11 @@ if (name === 'overview') { loadStats(); } - loadReports(); + if (name === 'analysis') { + loadAnalysis(); + } else { + loadReports(); + } } intervalSelect.addEventListener('change', () => { From 0a05f0c010d791c8fcf9f33a9787f224bbac7ae3 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 02:55:15 -0500 Subject: [PATCH 19/49] Fix module imports in run-analysis --- run-analysis.sh | 6 +++--- scripts/__init__.py | 1 + tests/test_run_analysis.py | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 scripts/__init__.py diff --git a/run-analysis.sh b/run-analysis.sh index 1f5c213..e487224 100755 --- a/run-analysis.sh +++ b/run-analysis.sh @@ -20,13 +20,13 @@ fi # Run analysis helpers echo "[INFO] Checking for missing domains..." -python scripts/analyze.py check-missing-domains +python -m scripts.analyze check-missing-domains echo "[INFO] Suggesting cache improvements..." -python scripts/analyze.py suggest-cache +python -m scripts.analyze suggest-cache echo "[INFO] Detecting threats..." -python scripts/analyze.py detect-threats +python -m scripts.analyze detect-threats # Deactivate to keep cron environment clean if type deactivate >/dev/null 2>&1; then diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..f4c57a1 --- /dev/null +++ b/scripts/__init__.py @@ -0,0 +1 @@ +"Utility package for ngxstat scripts" diff --git a/tests/test_run_analysis.py b/tests/test_run_analysis.py index 60d2810..66d00df 100644 --- a/tests/test_run_analysis.py +++ b/tests/test_run_analysis.py @@ -26,7 +26,7 @@ def test_script_invokes_commands(tmp_path): lines = calls.read_text().strip().splitlines() assert lines == [ - "scripts/analyze.py check-missing-domains", - "scripts/analyze.py suggest-cache", - "scripts/analyze.py detect-threats", + "-m scripts.analyze check-missing-domains", + "-m scripts.analyze suggest-cache", + "-m scripts.analyze detect-threats", ] From 2443aecaf65c4bbd8b66a4cf7ee34dc8ece69ab7 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 03:17:07 -0500 Subject: [PATCH 20/49] fix nginx config discovery for absolute include patterns --- scripts/nginx_config.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/nginx_config.py b/scripts/nginx_config.py index e2dc447..dbd635d 100644 --- a/scripts/nginx_config.py +++ b/scripts/nginx_config.py @@ -49,7 +49,15 @@ def discover_configs() -> Set[Path]: found.add(path) for pattern in INCLUDE_RE.findall(text): pattern = os.path.expanduser(pattern.strip()) - for included in path.parent.glob(pattern): + if os.path.isabs(pattern): + # ``Path.glob`` does not allow absolute patterns, so we + # anchor at the filesystem root and remove the leading + # separator. + base = Path(os.sep) + glob_iter = base.glob(pattern.lstrip(os.sep)) + else: + glob_iter = path.parent.glob(pattern) + for included in glob_iter: if included.is_file() and included not in found: queue.append(included) return found From d1f3c5a9ae0fa66e56828743c7103eb2f967753d Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 03:30:08 -0500 Subject: [PATCH 21/49] Hide analysis duration --- scripts/generate_reports.py | 2 +- templates/index.html | 2 +- tests/test_reports.py | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index ae7a3a7..4b5692a 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -175,7 +175,7 @@ def _generate_root_index() -> None: intervals = [ p.name for p in OUTPUT_DIR.iterdir() - if p.is_dir() and p.name.lower() not in {"domains", "global"} + if p.is_dir() and p.name.lower() not in {"domains", "global", "analysis"} ] intervals.sort() diff --git a/templates/index.html b/templates/index.html index 7b0b98f..0a8abed 100644 --- a/templates/index.html +++ b/templates/index.html @@ -286,7 +286,7 @@ Object.entries(sections).forEach(([key, section]) => { section.classList.toggle('is-hidden', key !== name); }); - intervalControl.classList.toggle('is-hidden', name === 'overview'); + intervalControl.classList.toggle('is-hidden', name === 'overview' || name === 'analysis'); domainControl.classList.toggle('is-hidden', name !== 'domain'); if (name === 'overview') { loadStats(); diff --git a/tests/test_reports.py b/tests/test_reports.py index fec898c..dbe71c2 100644 --- a/tests/test_reports.py +++ b/tests/test_reports.py @@ -184,6 +184,8 @@ def test_generate_root_index(tmp_path, sample_reports, monkeypatch): (tmp_path / "output" / "domains" / "bar.com").mkdir(parents=True) # add an extra directory with capitalized name to ensure it's ignored (tmp_path / "output" / "Global").mkdir(parents=True) + # add an analysis directory to ensure it's excluded + (tmp_path / "output" / "analysis").mkdir(parents=True) gr._generate_root_index() @@ -196,6 +198,7 @@ def test_generate_root_index(tmp_path, sample_reports, monkeypatch): assert '
    - + Interval @@ -67,7 +67,6 @@
    - @@ -194,7 +193,6 @@ initReport(rep, path); }); }); - feather.replace(); }); } @@ -313,7 +311,6 @@ }); switchTab('overview'); - feather.replace(); diff --git a/templates/report_snippet.html b/templates/report_snippet.html index c0a69d6..2c0ff62 100644 --- a/templates/report_snippet.html +++ b/templates/report_snippet.html @@ -1,5 +1,8 @@
    -

    {{ report.label }}

    +

    + {% if report.icon %}{{ report.icon }}{% endif %} + {{ report.label }} +

    {% if report.chart == 'table' %}
    {% else %} From 66b1209486838b12110ae4b2d42479f22c1b2717 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 04:08:40 -0500 Subject: [PATCH 24/49] Fix dropdown duration dirs --- scripts/generate_reports.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index 4c0c911..b9100f4 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -192,12 +192,14 @@ def _generate_all_domains(interval: str) -> None: def _generate_root_index() -> None: """Render the top-level index listing all intervals and domains.""" _copy_icons() - intervals = [ - p.name - for p in OUTPUT_DIR.iterdir() - if p.is_dir() and p.name.lower() not in {"domains", "global", "analysis"} - ] - intervals.sort() + intervals = sorted( + [ + name + for name in INTERVAL_FORMATS + if (OUTPUT_DIR / name).is_dir() + ] + ) + domains_dir = OUTPUT_DIR / "domains" domains: List[str] = [] From 87608ccdb91023fb7bdbc2d3674524eae579af94 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 04:17:38 -0500 Subject: [PATCH 25/49] refactor: improve report field names --- reports.yml | 56 ++++++++++++++++++++++++------------- scripts/generate_reports.py | 8 ++++++ templates/index.html | 7 +++-- 3 files changed, 49 insertions(+), 22 deletions(-) diff --git a/reports.yml b/reports.yml index 1622538..c9b9cd3 100644 --- a/reports.yml +++ b/reports.yml @@ -2,30 +2,36 @@ label: Hits icon: pulse chart: line + bucket: time_bucket + bucket_label: Time query: | - SELECT {bucket} AS bucket, + SELECT {bucket} AS time_bucket, COUNT(*) AS value FROM logs - GROUP BY bucket - ORDER BY bucket + GROUP BY time_bucket + ORDER BY time_bucket - name: error_rate label: Error Rate (%) icon: file-alert chart: line + bucket: time_bucket + bucket_label: Time query: | - SELECT {bucket} AS bucket, + SELECT {bucket} AS time_bucket, SUM(CASE WHEN status BETWEEN 400 AND 599 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value FROM logs - GROUP BY bucket - ORDER BY bucket + GROUP BY time_bucket + ORDER BY time_bucket - name: cache_status_breakdown label: Cache Status icon: archive chart: polarArea + bucket: cache_status + bucket_label: Cache Status query: | - SELECT cache_status AS bucket, + SELECT cache_status AS cache_status, COUNT(*) AS value FROM logs GROUP BY cache_status @@ -43,30 +49,36 @@ icon: globe chart: table per_domain: false + bucket: domain + bucket_label: Domain query: | - SELECT host AS bucket, + SELECT host AS domain, COUNT(*) AS value FROM logs - GROUP BY host + GROUP BY domain ORDER BY value DESC - name: bytes_sent label: Bytes Sent icon: upload chart: line + bucket: time_bucket + bucket_label: Time query: | - SELECT {bucket} AS bucket, + SELECT {bucket} AS time_bucket, SUM(bytes_sent) AS value FROM logs - GROUP BY bucket - ORDER BY bucket + GROUP BY time_bucket + ORDER BY time_bucket - name: top_paths label: Top Paths icon: map chart: table + bucket: path + bucket_label: Path query: | - SELECT path AS bucket, + SELECT path AS path, COUNT(*) AS value FROM ( SELECT substr(substr(request, instr(request, ' ') + 1), 1, @@ -81,8 +93,10 @@ label: User Agents icon: user chart: table + bucket: user_agent + bucket_label: User Agent query: | - SELECT user_agent AS bucket, + SELECT user_agent AS user_agent, COUNT(*) AS value FROM logs GROUP BY user_agent @@ -93,11 +107,13 @@ label: Referrers icon: link chart: table + bucket: referrer + bucket_label: Referrer query: | - SELECT referer AS bucket, + SELECT referer AS referrer, COUNT(*) AS value FROM logs - GROUP BY referer + GROUP BY referrer ORDER BY value DESC LIMIT 20 @@ -105,17 +121,19 @@ label: HTTP Statuses icon: server chart: pie + bucket: status_group + bucket_label: Status query: | SELECT CASE WHEN status BETWEEN 200 AND 299 THEN '2xx' WHEN status BETWEEN 300 AND 399 THEN '3xx' WHEN status BETWEEN 400 AND 499 THEN '4xx' ELSE '5xx' - END AS bucket, + END AS status_group, COUNT(*) AS value FROM logs - GROUP BY bucket - ORDER BY bucket + GROUP BY status_group + ORDER BY status_group colors: - "#48c78e" - "#209cee" diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index b9100f4..0a95bdc 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -169,6 +169,10 @@ def _generate_interval(interval: str, domain: Optional[str] = None) -> None: } if "icon" in definition: entry["icon"] = definition["icon"] + if "bucket" in definition: + entry["bucket"] = definition["bucket"] + if "bucket_label" in definition: + entry["bucket_label"] = definition["bucket_label"] if "color" in definition: entry["color"] = definition["color"] if "colors" in definition: @@ -253,6 +257,10 @@ def _generate_global() -> None: } if "icon" in definition: entry["icon"] = definition["icon"] + if "bucket" in definition: + entry["bucket"] = definition["bucket"] + if "bucket_label" in definition: + entry["bucket_label"] = definition["bucket_label"] if "color" in definition: entry["color"] = definition["color"] if "colors" in definition: diff --git a/templates/index.html b/templates/index.html index 237f563..4db9e53 100644 --- a/templates/index.html +++ b/templates/index.html @@ -105,19 +105,20 @@ fetch(base + '/' + rep.json) .then(r => r.json()) .then(data => { + const bucketField = rep.bucket || 'bucket'; if (rep.chart === 'table') { - const rows = data.map(x => [x.bucket, x.value]); + const rows = data.map(x => [x[bucketField], x.value]); new DataTable('#table-' + rep.name, { data: rows, columns: [ - { title: 'Bucket' }, + { title: rep.bucket_label || 'Bucket' }, { title: 'Value' } ] }); return; } - const labels = data.map(x => x.bucket); + const labels = data.map(x => x[bucketField]); const values = data.map(x => x.value); const chartType = rep.chart === 'stackedBar' ? 'bar' : rep.chart; const options = { scales: { y: { beginAtZero: true } } }; From 832a0a49dde1611dad0f39f8cb6221249dc9823b Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 04:31:13 -0500 Subject: [PATCH 26/49] docs: refresh README and agent guide --- AGENTS.md | 12 +++++ README.md | 141 ++++++++++++++++++++++-------------------------------- 2 files changed, 70 insertions(+), 83 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 4cdfa62..7e7d3c5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -24,6 +24,9 @@ This document outlines general practices and expectations for AI agents assistin The `run-import.sh` script can initialize this environment automatically. Always activate the virtual environment before running scripts or tests. +* Before committing code run `black` for consistent formatting and execute + the test suite with `pytest`. All tests should pass. + * Dependency management: Use `requirements.txt` or `pip-tools` * Use standard libraries where feasible (e.g., `sqlite3`, `argparse`, `datetime`) * Adopt `typer` for CLI command interface (if CLI ergonomics matter) @@ -89,6 +92,14 @@ ngxstat/ If uncertain, the agent should prompt the human for clarification before making architectural assumptions. +## Testing + +Use `pytest` for automated tests. Run the suite from an activated virtual environment and ensure all tests pass before committing: + +```bash +pytest -q +``` + --- ## Future Capabilities @@ -106,3 +117,4 @@ As the project matures, agents may also: * **2025-07-17**: Initial version by Jordan + ChatGPT * **2025-07-17**: Expanded virtual environment usage guidance + diff --git a/README.md b/README.md index acb1055..f641d96 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,16 @@ # ngxstat -Per-domain Nginx log analytics with hybrid static reports and live insights. -## Generating Reports +`ngxstat` is a lightweight log analytics toolkit for Nginx. It imports access +logs into an SQLite database and renders static dashboards so you can explore +per-domain metrics without running a heavy backend service. -Use the `generate_reports.py` script to build aggregated JSON and HTML snippet files from `database/ngxstat.db`. +## Requirements -Create a virtual environment and install dependencies: +* Python 3.10+ +* Access to the Nginx log files (default: `/var/log/nginx`) + +The helper scripts create a virtual environment on first run, but you can also +set one up manually: ```bash python3 -m venv .venv @@ -13,118 +18,88 @@ source .venv/bin/activate pip install -r requirements.txt ``` -Then run one or more of the interval commands: - -```bash -python scripts/generate_reports.py hourly -python scripts/generate_reports.py daily -python scripts/generate_reports.py weekly -python scripts/generate_reports.py monthly -``` - -Each command accepts optional flags to generate per-domain reports. Use -`--domain ` to limit output to a specific domain or `--all-domains` -to generate a subdirectory for every domain found in the database: - -```bash -# Hourly reports for example.com only -python scripts/generate_reports.py hourly --domain example.com - -# Weekly reports for all domains individually -python scripts/generate_reports.py weekly --all-domains -``` - -Reports are written under the `output/` directory. Each command updates the corresponding `.json` file and writes one HTML snippet per report. These snippets are loaded dynamically by the main dashboard using Chart.js and DataTables. - -### Configuring Reports - -Report queries are defined in `reports.yml`. Each entry specifies the `name`, -optional `label` and `chart` type, and a SQL `query` that must return `bucket` -and `value` columns. The special token `{bucket}` is replaced with the -appropriate SQLite `strftime` expression for each interval (hourly, daily, -weekly or monthly) so that a single definition works across all durations. -When `generate_reports.py` runs, every definition is executed for the requested -interval and creates `output//.json` plus a small HTML snippet -`output//.html` used by the dashboard. - -Example snippet: - -```yaml -- name: hits - chart: bar - query: | - SELECT {bucket} AS bucket, - COUNT(*) AS value - FROM logs - GROUP BY bucket - ORDER BY bucket -``` - -Add or modify entries in `reports.yml` to tailor the generated metrics. - ## Importing Logs -Use the `run-import.sh` script to set up the Python environment if needed and import the latest Nginx log entries into `database/ngxstat.db`. +Run the importer to ingest new log entries into `database/ngxstat.db`: ```bash ./run-import.sh ``` -This script is suitable for cron jobs as it creates the virtual environment on first run, installs dependencies and reuses the environment on subsequent runs. +Rotated logs are processed in order and only entries newer than the last +imported timestamp are added. -The importer handles rotated logs in order from oldest to newest so entries are -processed exactly once. If you rerun the script, it only ingests records with a -timestamp newer than the latest one already stored in the database, preventing -duplicates. +## Generating Reports -## Cron Report Generation - -Use the `run-reports.sh` script to run all report intervals in one step. The script sets up the Python environment the same way as `run-import.sh`, making it convenient for automation via cron. +To build the HTML dashboard and JSON data files use `run-reports.sh` which runs +all intervals in one go: ```bash ./run-reports.sh ``` -Running this script will create or update the hourly, daily, weekly and monthly reports under `output/`. It also detects all unique domains found in the database and writes per-domain reports to `output/domains//` alongside the aggregate data. After generation, open `output/index.html` in your browser to browse the reports. +The script calls `scripts/generate_reports.py` internally to create hourly, +daily, weekly and monthly reports. Per-domain reports are written under +`output/domains/` alongside the aggregate data. Open +`output/index.html` in a browser to view the dashboard. +If you prefer to run individual commands you can invoke the generator directly: -## Log Analysis +```bash +python scripts/generate_reports.py hourly +python scripts/generate_reports.py daily --all-domains +``` -The `run-analysis.sh` script runs helper routines that inspect the database. It -creates or reuses the virtual environment and then executes a set of analysis -commands to spot missing domains, suggest cache rules and detect potential -threats. +## Analysis Helpers + +`run-analysis.sh` executes additional utilities that examine the database for +missing domains, caching opportunities and potential threats. The JSON output is +saved under `output/analysis` and appears in the "Analysis" tab of the +dashboard. ```bash ./run-analysis.sh ``` -The JSON results are written under `output/analysis` and can be viewed from the -"Analysis" tab in the generated dashboard. -## Serving Reports with Nginx -To expose the generated HTML dashboards and JSON files over HTTP you can use a -simple Nginx server block. Point the `root` directive to the repository's -`output/` directory and optionally restrict access to your local network. +## Serving the Reports + +The generated files are static. You can serve them with a simple Nginx block: ```nginx server { listen 80; server_name example.com; - - # Path to the generated reports root /path/to/ngxstat/output; location / { try_files $uri $uri/ =404; } - - # Allow access only from private networks - allow 192.0.0.0/8; - allow 10.0.0.0/8; - deny all; } ``` -With this configuration the generated static files are served directly by -Nginx while connections outside of `192.*` and `10.*` are denied. +Restrict access if the reports should not be public. +## Running Tests + +Install the development dependencies and execute the suite with `pytest`: + +```bash +pip install -r requirements.txt +pytest -q +``` + +All tests must pass before submitting changes. + +## Acknowledgements + +ngxstat uses the following third‑party resources: + +* [Chart.js](https://www.chartjs.org/) for charts +* [DataTables](https://datatables.net/) and [jQuery](https://jquery.com/) for table views +* [Bulma CSS](https://bulma.io/) for styling +* Icons from [Free CC0 Icons](https://cc0-icons.jonh.eu/) by Jon Hicks (CC0 / MIT) +* [Typer](https://typer.tiangolo.com/) for the command-line interface +* [Jinja2](https://palletsprojects.com/p/jinja/) for templating + +The project is licensed under the GPLv3. Icon assets remain in the public domain +via the CC0 license. From 669b51e03cdea7575ae5c6ee754781b7274c1610 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 04:50:09 -0500 Subject: [PATCH 27/49] Add report generation timing stats --- scripts/generate_reports.py | 23 +++++++++++++++-------- templates/index.html | 7 +++++++ tests/test_reports.py | 20 +++++++++++--------- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index 0a95bdc..664a3a5 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -3,6 +3,8 @@ import sqlite3 from pathlib import Path import shutil from typing import List, Dict, Optional +from datetime import datetime +import time import yaml @@ -74,7 +76,9 @@ def _render_snippet(report: Dict, out_dir: Path) -> None: snippet_path.write_text(template.render(report=report)) -def _write_stats() -> None: +def _write_stats( + generated_at: Optional[str] = None, generation_seconds: Optional[float] = None +) -> None: """Query basic dataset stats and write them to ``output/global/stats.json``.""" conn = sqlite3.connect(DB_PATH) cur = conn.cursor() @@ -98,6 +102,10 @@ def _write_stats() -> None: "end_date": end_date, "unique_domains": unique_domains, } + if generated_at: + stats["generated_at"] = generated_at + if generation_seconds is not None: + stats["generation_seconds"] = generation_seconds out_path = OUTPUT_DIR / "global" / "stats.json" _save_json(out_path, stats) @@ -197,14 +205,9 @@ def _generate_root_index() -> None: """Render the top-level index listing all intervals and domains.""" _copy_icons() intervals = sorted( - [ - name - for name in INTERVAL_FORMATS - if (OUTPUT_DIR / name).is_dir() - ] + [name for name in INTERVAL_FORMATS if (OUTPUT_DIR / name).is_dir()] ) - domains_dir = OUTPUT_DIR / "domains" domains: List[str] = [] if domains_dir.is_dir(): @@ -227,6 +230,9 @@ def _generate_global() -> None: typer.echo("No report definitions found") return + start_time = time.time() + generated_at = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") + _copy_icons() conn = sqlite3.connect(DB_PATH) @@ -269,7 +275,8 @@ def _generate_global() -> None: report_list.append(entry) _save_json(out_dir / "reports.json", report_list) - _write_stats() + elapsed = round(time.time() - start_time, 2) + _write_stats(generated_at, elapsed) typer.echo("Generated global reports") diff --git a/templates/index.html b/templates/index.html index 4db9e53..472ef25 100644 --- a/templates/index.html +++ b/templates/index.html @@ -49,6 +49,8 @@

    Total logs: -

    Date range: - to -

    Unique domains: -

    +

    Last generated: -

    +

    Generation time: - seconds

    @@ -96,6 +98,8 @@ const startElem = document.getElementById('stat-start'); const endElem = document.getElementById('stat-end'); const domainsElem = document.getElementById('stat-domains'); + const generatedElem = document.getElementById('stat-generated'); + const elapsedElem = document.getElementById('stat-elapsed'); let currentInterval = intervalSelect.value; let currentDomain = domainSelect.value; @@ -161,6 +165,9 @@ startElem.textContent = stats.start_date; endElem.textContent = stats.end_date; domainsElem.textContent = stats.unique_domains; + generatedElem.textContent = stats.generated_at || '-'; + elapsedElem.textContent = + stats.generation_seconds !== undefined ? stats.generation_seconds : '-'; }); } diff --git a/tests/test_reports.py b/tests/test_reports.py index dbe71c2..75d7737 100644 --- a/tests/test_reports.py +++ b/tests/test_reports.py @@ -148,20 +148,19 @@ def test_generate_interval_domain_filter(tmp_path, sample_reports, monkeypatch): gr._generate_interval("hourly", "example.com") hits = json.loads( - (tmp_path / "output" / "domains" / "example.com" / "hourly" / "hits.json").read_text() + ( + tmp_path / "output" / "domains" / "example.com" / "hourly" / "hits.json" + ).read_text() ) assert hits[0]["value"] == 2 reports = json.loads( - (tmp_path / "output" / "domains" / "example.com" / "hourly" / "reports.json").read_text() + ( + tmp_path / "output" / "domains" / "example.com" / "hourly" / "reports.json" + ).read_text() ) assert {r["name"] for r in reports} == {"hits", "error_rate"} assert not ( - tmp_path - / "output" - / "domains" - / "example.com" - / "hourly" - / "skip_report.json" + tmp_path / "output" / "domains" / "example.com" / "hourly" / "skip_report.json" ).exists() @@ -245,9 +244,12 @@ def test_global_stats_file(tmp_path, sample_reports, monkeypatch): "start_date", "end_date", "unique_domains", + "generated_at", + "generation_seconds", } assert stats["total_logs"] == 2 assert stats["start_date"] == "2024-01-01 10:00:00" assert stats["end_date"] == "2024-01-01 10:05:00" assert stats["unique_domains"] == 1 - + assert isinstance(stats["generated_at"], str) + assert stats["generation_seconds"] >= 0 From fb753a1189dbaab3b2d35a63f1b81668933f01ee Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 16:41:58 -0500 Subject: [PATCH 28/49] fix: destroy charts when reloading domain reports --- templates/index.html | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/templates/index.html b/templates/index.html index 472ef25..b1cbf89 100644 --- a/templates/index.html +++ b/templates/index.html @@ -171,6 +171,15 @@ }); } + function destroyCharts(container) { + container.querySelectorAll('canvas').forEach(c => { + const chart = Chart.getChart(c); + if (chart) { + chart.destroy(); + } + }); + } + function loadReports() { let path; let container; @@ -183,6 +192,7 @@ } else { container = containers.domain; if (!currentDomain) { + destroyCharts(container); container.innerHTML = '

    Select a domain

    '; return; } @@ -192,6 +202,7 @@ fetch(path + '/reports.json') .then(r => r.json()) .then(reports => { + destroyCharts(container); container.innerHTML = ''; reports.forEach(rep => { fetch(path + '/' + rep.html) From 297c913f2a314a843f0304baaa8cd2a7bf1ad8e5 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 17:03:48 -0500 Subject: [PATCH 29/49] fix: destroy charts when switching tabs --- scripts/analyze.py | 22 +++++++++++----------- scripts/init_db.py | 4 +++- scripts/nginx_config.py | 1 - templates/index.html | 5 +++++ tests/test_importer.py | 5 ++--- tests/test_nginx_config.py | 1 - tests/test_run_analysis.py | 2 +- 7 files changed, 22 insertions(+), 18 deletions(-) diff --git a/scripts/analyze.py b/scripts/analyze.py index 8ac7c30..fe7b818 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -105,7 +105,9 @@ def hits(domain: Optional[str] = typer.Option(None, help="Filter by domain")) -> @app.command("cache-ratio") -def cache_ratio_cmd(domain: Optional[str] = typer.Option(None, help="Filter by domain")) -> None: +def cache_ratio_cmd( + domain: Optional[str] = typer.Option(None, help="Filter by domain") +) -> None: """Display cache hit ratio as a percentage.""" ratio = get_cache_ratio(domain) * 100 if domain: @@ -115,7 +117,11 @@ def cache_ratio_cmd(domain: Optional[str] = typer.Option(None, help="Filter by d @app.command("check-missing-domains") -def check_missing_domains(json_output: bool = typer.Option(False, "--json", help="Output missing domains as JSON")) -> None: +def check_missing_domains( + json_output: bool = typer.Option( + False, "--json", help="Output missing domains as JSON" + ) +) -> None: """Show domains present in the database but absent from Nginx config.""" try: from scripts.generate_reports import _get_domains as _db_domains @@ -151,9 +157,7 @@ def check_missing_domains(json_output: bool = typer.Option(False, "--json", help @app.command("suggest-cache") def suggest_cache( - threshold: int = typer.Option( - 10, help="Minimum number of MISS entries to report" - ), + threshold: int = typer.Option(10, help="Minimum number of MISS entries to report"), json_output: bool = typer.Option(False, "--json", help="Output results as JSON"), ) -> None: """Suggest domain/path pairs that could benefit from caching. @@ -211,9 +215,7 @@ def suggest_cache( @app.command("detect-threats") def detect_threats( hours: int = typer.Option(1, help="Number of recent hours to analyze"), - ip_threshold: int = typer.Option( - 100, help="Requests from a single IP to flag" - ), + ip_threshold: int = typer.Option(100, help="Requests from a single IP to flag"), ) -> None: """Detect potential security threats from recent logs.""" @@ -316,9 +318,7 @@ def detect_threats( """, (recent_start_s, recent_end_s, ip_threshold), ) - high_ip_requests = [ - {"ip": ip, "requests": cnt} for ip, cnt in cur.fetchall() - ] + high_ip_requests = [{"ip": ip, "requests": cnt} for ip, cnt in cur.fetchall()] conn.close() diff --git a/scripts/init_db.py b/scripts/init_db.py index f378b5c..b9ea07d 100644 --- a/scripts/init_db.py +++ b/scripts/init_db.py @@ -61,7 +61,9 @@ try: suffix = match.group(1) number = int(suffix.lstrip(".")) if suffix else 0 log_files.append((number, os.path.join(LOG_DIR, f))) - log_files = [path for _, path in sorted(log_files, key=lambda x: x[0], reverse=True)] + log_files = [ + path for _, path in sorted(log_files, key=lambda x: x[0], reverse=True) + ] except FileNotFoundError: print(f"[ERROR] Log directory not found: {LOG_DIR}") exit(1) diff --git a/scripts/nginx_config.py b/scripts/nginx_config.py index dbd635d..bc585a7 100644 --- a/scripts/nginx_config.py +++ b/scripts/nginx_config.py @@ -93,4 +93,3 @@ def parse_servers(paths: Set[Path]) -> List[Dict[str, str]]: entry["root"] = " ".join(directives["root"]) servers.append(entry) return servers - diff --git a/templates/index.html b/templates/index.html index b1cbf89..84c6214 100644 --- a/templates/index.html +++ b/templates/index.html @@ -180,6 +180,10 @@ }); } + function destroyAllCharts() { + Object.values(containers).forEach(destroyCharts); + } + function loadReports() { let path; let container; @@ -296,6 +300,7 @@ } function switchTab(name) { + destroyAllCharts(); currentTab = name; tabs.forEach(tab => { tab.classList.toggle('is-active', tab.dataset.tab === name); diff --git a/tests/test_importer.py b/tests/test_importer.py index 349786b..38c9be9 100644 --- a/tests/test_importer.py +++ b/tests/test_importer.py @@ -15,10 +15,10 @@ def sample_logs(tmp_path): log_dir.mkdir(parents=True, exist_ok=True) (log_dir / "access.log.1").write_text( - "127.0.0.1 - example.com [01/Jan/2024:10:00:00 +0000] \"GET / HTTP/1.1\" 200 123 \"-\" \"curl\" MISS\n" + '127.0.0.1 - example.com [01/Jan/2024:10:00:00 +0000] "GET / HTTP/1.1" 200 123 "-" "curl" MISS\n' ) (log_dir / "access.log").write_text( - "127.0.0.1 - example.com [01/Jan/2024:10:05:00 +0000] \"GET /about HTTP/1.1\" 200 123 \"-\" \"curl\" MISS\n" + '127.0.0.1 - example.com [01/Jan/2024:10:05:00 +0000] "GET /about HTTP/1.1" 200 123 "-" "curl" MISS\n' ) yield log_dir @@ -59,4 +59,3 @@ def test_idempotent_import(sample_logs, tmp_path): assert first_count == 2 assert second_count == first_count - diff --git a/tests/test_nginx_config.py b/tests/test_nginx_config.py index 782c635..a6494a7 100644 --- a/tests/test_nginx_config.py +++ b/tests/test_nginx_config.py @@ -67,4 +67,3 @@ server { assert servers[1]["server_name"] == "example.org" assert servers[1]["listen"] == "443 ssl" assert "proxy_cache" not in servers[1] - diff --git a/tests/test_run_analysis.py b/tests/test_run_analysis.py index 66d00df..fad7953 100644 --- a/tests/test_run_analysis.py +++ b/tests/test_run_analysis.py @@ -14,7 +14,7 @@ def test_script_invokes_commands(tmp_path): calls = tmp_path / "calls.txt" python_stub = tmp_path / "python" - python_stub.write_text(f"#!/usr/bin/env bash\necho \"$*\" >> \"{calls}\"\n") + python_stub.write_text(f'#!/usr/bin/env bash\necho "$*" >> "{calls}"\n') python_stub.chmod(0o755) (tmp_path / "python3").write_text(f"#!/usr/bin/env bash\nexit 0\n") (tmp_path / "python3").chmod(0o755) From 5d2546ad60d4676faf2d2c8944b75ba2fb51b104 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 18:01:26 -0500 Subject: [PATCH 30/49] Add chart loading management --- scripts/generate_reports.py | 15 ++++---- static/chartManager.js | 49 +++++++++++++++++++++++++ templates/index.html | 71 ++++++++++++++++++++----------------- 3 files changed, 97 insertions(+), 38 deletions(-) create mode 100644 static/chartManager.js diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index 664a3a5..e587e6e 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -58,14 +58,17 @@ def _save_json(path: Path, data: List[Dict]) -> None: def _copy_icons() -> None: - """Copy vendored icons to the output directory.""" + """Copy vendored icons and scripts to the output directory.""" src_dir = Path("static/icons") dst_dir = OUTPUT_DIR / "icons" - if not src_dir.is_dir(): - return - dst_dir.mkdir(parents=True, exist_ok=True) - for icon in src_dir.glob("*.svg"): - shutil.copy(icon, dst_dir / icon.name) + if src_dir.is_dir(): + dst_dir.mkdir(parents=True, exist_ok=True) + for icon in src_dir.glob("*.svg"): + shutil.copy(icon, dst_dir / icon.name) + + js_src = Path("static/chartManager.js") + if js_src.is_file(): + shutil.copy(js_src, OUTPUT_DIR / js_src.name) def _render_snippet(report: Dict, out_dir: Path) -> None: diff --git a/static/chartManager.js b/static/chartManager.js new file mode 100644 index 0000000..79d83fc --- /dev/null +++ b/static/chartManager.js @@ -0,0 +1,49 @@ +export let currentLoad = null; +const loadInfo = new Map(); + +export function newLoad(container) { + if (currentLoad) { + abortLoad(currentLoad); + } + reset(container); + const controller = new AbortController(); + const token = { controller, charts: new Map() }; + loadInfo.set(token, token); + currentLoad = token; + return token; +} + +export function abortLoad(token) { + const info = loadInfo.get(token); + if (!info) return; + info.controller.abort(); + info.charts.forEach(chart => { + try { + chart.destroy(); + } catch (e) {} + }); + loadInfo.delete(token); + if (currentLoad === token) { + currentLoad = null; + } +} + +export function registerChart(token, id, chart) { + const info = loadInfo.get(token); + if (info) { + info.charts.set(id, chart); + } else { + chart.destroy(); + } +} + +export function reset(container) { + if (!container) return; + container.querySelectorAll('canvas').forEach(c => { + const chart = Chart.getChart(c); + if (chart) { + chart.destroy(); + } + }); + container.innerHTML = ''; +} diff --git a/templates/index.html b/templates/index.html index 84c6214..1b27003 100644 --- a/templates/index.html +++ b/templates/index.html @@ -72,7 +72,14 @@ - From 6de85b7cc50536f5d13629e62bade6e8922f2292 Mon Sep 17 00:00:00 2001 From: ngxstat-bot Date: Mon, 18 Aug 2025 23:47:23 -0500 Subject: [PATCH 43/49] UX Phase 1 follow-ups: state v2 + reset, window defaults + support, palette support; analysis JSON generation; tests for LIMIT/metadata; README updates --- README.md | 17 ++++--- run-reports.sh | 4 ++ scripts/generate_reports.py | 34 ++++++++++++++ templates/index.html | 64 +++++++++++++++++++++----- tests/test_reports.py | 90 +++++++++++++++++++++++++++++++++++++ 5 files changed, 193 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index f641d96..ac601fc 100644 --- a/README.md +++ b/README.md @@ -39,9 +39,10 @@ all intervals in one go: ``` The script calls `scripts/generate_reports.py` internally to create hourly, -daily, weekly and monthly reports. Per-domain reports are written under -`output/domains/` alongside the aggregate data. Open -`output/index.html` in a browser to view the dashboard. +daily, weekly and monthly reports, then writes analysis JSON files used by the +"Analysis" tab. Per-domain reports are written under `output/domains/` +alongside the aggregate data. Open `output/index.html` in a browser to view the +dashboard. If you prefer to run individual commands you can invoke the generator directly: @@ -54,8 +55,14 @@ python scripts/generate_reports.py daily --all-domains `run-analysis.sh` executes additional utilities that examine the database for missing domains, caching opportunities and potential threats. The JSON output is -saved under `output/analysis` and appears in the "Analysis" tab of the -dashboard. +saved under `output/analysis` and appears in the "Analysis" tab. The +`run-reports.sh` script also generates these JSON files as part of the build. + +## UX Controls + +The dashboard defaults to a 7‑day window for time series. Your view preferences +persist locally in the browser under the `ngxstat-state-v2` key. Use the +"Reset view" button to clear saved state and restore defaults. ```bash ./run-analysis.sh diff --git a/run-reports.sh b/run-reports.sh index bfe736d..4556f32 100755 --- a/run-reports.sh +++ b/run-reports.sh @@ -42,6 +42,10 @@ python scripts/generate_reports.py daily --all-domains python scripts/generate_reports.py weekly --all-domains python scripts/generate_reports.py monthly --all-domains +# Generate analysis JSON +echo "[INFO] Generating analysis files..." +python scripts/generate_reports.py analysis + # Generate root index python scripts/generate_reports.py index diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index 073e0b7..178951e 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -344,6 +344,34 @@ def _generate_global() -> None: typer.echo("Generated global reports") +def _generate_analysis() -> None: + """Generate analysis JSON files consumed by the Analysis tab.""" + try: + # Import lazily to avoid circulars and keep dependencies optional + from scripts import analyze + except Exception as exc: # pragma: no cover - defensive + typer.echo(f"Failed to import analysis module: {exc}") + return + + # Ensure output root and icons present for parity + _copy_icons() + + # These commands write JSON files under output/analysis/ + try: + analyze.check_missing_domains(json_output=True) + except Exception as exc: # pragma: no cover - continue best-effort + typer.echo(f"check_missing_domains failed: {exc}") + try: + analyze.suggest_cache(json_output=True) + except Exception as exc: # pragma: no cover + typer.echo(f"suggest_cache failed: {exc}") + try: + analyze.detect_threats() + except Exception as exc: # pragma: no cover + typer.echo(f"detect_threats failed: {exc}") + typer.echo("Generated analysis JSON files") + + @app.command() def hourly( domain: Optional[str] = typer.Option( @@ -414,6 +442,12 @@ def global_reports() -> None: _generate_global() +@app.command() +def analysis() -> None: + """Generate analysis JSON files for the Analysis tab.""" + _generate_analysis() + + @app.command() def index() -> None: """Generate the root index page linking all reports.""" diff --git a/templates/index.html b/templates/index.html index 56dfd6f..5b877ef 100644 --- a/templates/index.html +++ b/templates/index.html @@ -74,6 +74,9 @@ Exclude “-” +
    + +
    @@ -122,6 +125,7 @@ groupOthers, movingAverage, } from './chartManager.js'; + const STATE_KEY = 'ngxstat-state-v2'; const intervalSelect = document.getElementById('interval-select'); const domainSelect = document.getElementById('domain-select'); const intervalControl = document.getElementById('interval-control'); @@ -131,6 +135,7 @@ const modeGroupControl = document.getElementById('mode-group-control'); const excludeUncachedControl = document.getElementById('exclude-uncached-control'); const smoothControl = document.getElementById('smooth-control'); + const resetButton = document.getElementById('reset-view'); const tabs = document.querySelectorAll('#report-tabs li'); const sections = { recent: document.getElementById('recent-section'), @@ -172,10 +177,11 @@ let modeGroup = true; let excludeUncached = true; let smoothError = false; + let hadExplicitWindow = false; // URL or saved-state provided window function saveState() { try { - localStorage.setItem('ngxstat-state', JSON.stringify({ + localStorage.setItem(STATE_KEY, JSON.stringify({ tab: currentTab, interval: currentInterval, domain: currentDomain, @@ -190,11 +196,11 @@ function loadSavedState() { try { - const s = JSON.parse(localStorage.getItem('ngxstat-state') || '{}'); + const s = JSON.parse(localStorage.getItem(STATE_KEY) || '{}'); if (s.tab) currentTab = s.tab; if (s.interval) currentInterval = s.interval; if (s.domain !== undefined) currentDomain = s.domain; - if (s.window) currentWindow = s.window; + if (s.window) { currentWindow = s.window; hadExplicitWindow = true; } if (s.percent !== undefined) modePercent = !!Number(s.percent); if (s.group !== undefined) modeGroup = !!Number(s.group); if (s.exclude_dash !== undefined) excludeUncached = !!Number(s.exclude_dash); @@ -207,7 +213,7 @@ if (params.get('tab')) currentTab = params.get('tab'); if (params.get('interval')) currentInterval = params.get('interval'); if (params.get('domain') !== null) currentDomain = params.get('domain') || ''; - if (params.get('window')) currentWindow = params.get('window'); + if (params.get('window')) { currentWindow = params.get('window'); hadExplicitWindow = true; } if (params.get('percent') !== null) modePercent = params.get('percent') === '1'; if (params.get('group') !== null) modeGroup = params.get('group') === '1'; if (params.get('exclude_dash') !== null) excludeUncached = params.get('exclude_dash') === '1'; @@ -273,8 +279,13 @@ } // Windowing for time series if (isTimeSeries) { - const n = bucketsForWindow(currentWindow, currentInterval); - transformed = sliceWindow(transformed, n); + // Only apply windowing if report supports current window (if constrained) + const supported = Array.isArray(rep.windows_supported) ? rep.windows_supported : null; + const canWindow = !supported || supported.includes(currentWindow); + if (canWindow) { + const n = bucketsForWindow(currentWindow, currentInterval); + transformed = sliceWindow(transformed, n); + } } // Distributions: percent + group small const isDistribution = ['pie', 'polarArea', 'doughnut', 'donut'].includes(rep.chart); @@ -306,7 +317,7 @@ options.scales.y.stacked = true; // Build multiple series from columns (exclude bucket & total) const keys = transformed.length ? Object.keys(transformed[0]).filter(k => k !== bucketField && k !== 'total') : []; - const palette = rep.colors || [ + const palette = rep.colors || rep.palette || [ '#3273dc', '#23d160', '#ffdd57', '#ff3860', '#7957d5', '#363636' ]; datasets = keys.map((k, i) => ({ @@ -327,6 +338,9 @@ if (rep.colors) { dataset.backgroundColor = rep.colors; dataset.borderColor = rep.colors; + } else if (rep.palette) { + dataset.backgroundColor = rep.palette; + dataset.borderColor = rep.palette; } else if (rep.color) { dataset.backgroundColor = rep.color; dataset.borderColor = rep.color; @@ -392,6 +406,15 @@ if (currentTab === 'tables') return rep.chart === 'table'; return true; }); + // If no explicit window was given (URL or saved state), honor first report's default + if (!hadExplicitWindow) { + const withDefault = filtered.find(r => r.window_default); + if (withDefault && typeof withDefault.window_default === 'string') { + currentWindow = withDefault.window_default; + windowSelect.value = currentWindow; + updateURL(); + } + } filtered.forEach(rep => { fetch(path + '/' + rep.html, { signal: token.controller.signal }) .then(r => r.text()) @@ -499,10 +522,12 @@ intervalControl.classList.toggle('is-hidden', !showInterval); domainControl.classList.toggle('is-hidden', !showDomain); windowControl.classList.toggle('is-hidden', !showInterval); - modePercentControl.classList.toggle('is-hidden', !showInterval); - modeGroupControl.classList.toggle('is-hidden', !showInterval); - excludeUncachedControl.classList.toggle('is-hidden', !showInterval); - smoothControl.classList.toggle('is-hidden', !showInterval); + // Only show percent/group/exclude toggles on Distribution tab, + // and smoothing only on Trends tab + modePercentControl.classList.toggle('is-hidden', name !== 'distribution'); + modeGroupControl.classList.toggle('is-hidden', name !== 'distribution'); + excludeUncachedControl.classList.toggle('is-hidden', name !== 'distribution'); + smoothControl.classList.toggle('is-hidden', name !== 'trends'); updateURL(); if (name === 'recent') { loadStats(); @@ -570,6 +595,23 @@ switchTab(tab.dataset.tab); }); }); + resetButton.addEventListener('click', () => { + try { + localStorage.removeItem('ngxstat-state'); // clear legacy + localStorage.removeItem(STATE_KEY); + } catch {} + // Reset to hard defaults + currentTab = 'recent'; + currentInterval = intervalSelect.value = intervalSelect.options[0]?.value || currentInterval; + currentDomain = domainSelect.value = ''; + currentWindow = windowSelect.value = '7d'; + modePercent = percentToggle.checked = false; + modeGroup = groupToggle.checked = true; + excludeUncached = excludeUncachedToggle.checked = true; + smoothError = smoothToggle.checked = false; + hadExplicitWindow = false; + switchTab(currentTab); + }); // Initialize state (URL -> localStorage -> defaults) loadSavedState(); applyURLParams(); diff --git a/tests/test_reports.py b/tests/test_reports.py index f6c6918..60a6df6 100644 --- a/tests/test_reports.py +++ b/tests/test_reports.py @@ -323,3 +323,93 @@ def test_multi_bucket_table(tmp_path, monkeypatch): entry = next(r for r in reports if r["name"] == "multi") assert entry["buckets"] == ["domain", "agent"] assert entry["bucket_label"] == ["Domain", "Agent"] + + +def test_top_n_limit_applied(tmp_path, monkeypatch): + # Prepare DB with many distinct agents + db_path = tmp_path / "database" / "ngxstat.db" + setup_db(db_path) + conn = sqlite3.connect(db_path) + cur = conn.cursor() + for i in range(10): + cur.execute( + "INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", + ( + "127.0.0.1", + "example.com", + f"2024-01-01 11:{i:02d}:00", + "GET /x HTTP/1.1", + 200, + 100, + "-", + f"ua-{i}", + "MISS", + ), + ) + conn.commit() + conn.close() + + cfg = tmp_path / "reports.yml" + cfg.write_text( + """ +- name: agents + chart: table + global: true + top_n: 3 + query: | + SELECT user_agent AS agent, COUNT(*) AS value + FROM logs + GROUP BY user_agent + ORDER BY value DESC +""" + ) + + monkeypatch.setattr(gr, "DB_PATH", db_path) + monkeypatch.setattr(gr, "OUTPUT_DIR", tmp_path / "output") + monkeypatch.setattr(gr, "REPORT_CONFIG", cfg) + monkeypatch.setattr( + gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates" + ) + + gr._generate_global() + + data = json.loads((tmp_path / "output" / "global" / "agents.json").read_text()) + # Should be limited to 3 rows + assert len(data) <= 3 + + +def test_metadata_passthrough(tmp_path, monkeypatch): + db_path = tmp_path / "database" / "ngxstat.db" + setup_db(db_path) + + cfg = tmp_path / "reports.yml" + cfg.write_text( + """ +- name: custom_ts + label: Custom TS + chart: line + window_default: 24h + windows_supported: [1h, 24h, 7d] + palette: ["#111111", "#222222"] + query: | + SELECT {bucket} AS time_bucket, COUNT(*) AS value + FROM logs + GROUP BY time_bucket + ORDER BY time_bucket +""" + ) + + monkeypatch.setattr(gr, "DB_PATH", db_path) + monkeypatch.setattr(gr, "OUTPUT_DIR", tmp_path / "output") + monkeypatch.setattr(gr, "REPORT_CONFIG", cfg) + monkeypatch.setattr( + gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates" + ) + + gr._generate_interval("hourly") + + reports = json.loads((tmp_path / "output" / "hourly" / "reports.json").read_text()) + entry = next(r for r in reports if r["name"] == "custom_ts") + assert entry["window_default"] == "24h" + assert entry["windows_supported"] == ["1h", "24h", "7d"] + assert entry["palette"] == ["#111111", "#222222"] From 95e54359d7cc0289655161d0f271b533f9dcf7a1 Mon Sep 17 00:00:00 2001 From: ngxstat-bot Date: Tue, 19 Aug 2025 00:09:49 -0500 Subject: [PATCH 44/49] UX: unify time selection and simplify controls\n\n- Replace separate Interval + Window with a single Time preset (Last hour/24h/7d/30d/12w/12m/All time)\n- Map presets to sensible grouping (hourly/daily/weekly/monthly) based on available intervals\n- Keep backward compatibility: preserve existing URL/state params; keep legacy controls hidden\n- Add client support for new windows (12w, 12m) in time-bucket slicing\n- Show only relevant controls per tab (Trends: smoothing; Distribution: percent/group/exclude)\n- Streamline reset flow to a sane default (Last 7 days) --- templates/index.html | 133 +++++++++++++++++++++++++++++++++---------- 1 file changed, 102 insertions(+), 31 deletions(-) diff --git a/templates/index.html b/templates/index.html index 5b877ef..7e16527 100644 --- a/templates/index.html +++ b/templates/index.html @@ -21,6 +21,7 @@
    + @@ -64,17 +63,17 @@
    @@ -99,12 +98,17 @@ - - - @@ -157,6 +159,7 @@ trends: document.getElementById('reports-trends'), breakdown: document.getElementById('reports-breakdown') }; + const recentRow = document.getElementById('recent-row'); const analysisElems = { missing: document.getElementById('analysis-missing'), cache: document.getElementById('analysis-cache'), @@ -453,6 +456,11 @@ path = currentDomain ? ('domains/' + encodeURIComponent(currentDomain) + '/' + currentInterval) : currentInterval; } + // Clear the top row on each load of Recent + if (currentTab === 'recent' && recentRow) { + recentRow.innerHTML = ''; + } + const token = newLoad(container); fetch(path + '/reports.json', { signal: token.controller.signal }) @@ -480,7 +488,15 @@ .then(r => r.text()) .then(html => { if (token !== currentLoad) return; - container.insertAdjacentHTML('beforeend', html); + // On Recent tab, render Cache Status and HTTP Statuses side-by-side + const inTopRow = currentTab === 'recent' && + (rep.name === 'cache_status_breakdown' || rep.name === 'status_distribution'); + if (inTopRow && recentRow) { + const wrapped = `
    ${html}
    `; + recentRow.insertAdjacentHTML('beforeend', wrapped); + } else { + container.insertAdjacentHTML('beforeend', html); + } initReport(token, rep, path); }); }); From 371719799192a775c91c5be1c71a40a50dcbb243 Mon Sep 17 00:00:00 2001 From: ngxstat-bot Date: Tue, 19 Aug 2025 00:51:10 -0500 Subject: [PATCH 49/49] analysis: make suggest_cache and detect_threats pure-callable, add CLI wrappers\n\n- Replace Typer Option defaults with plain Python defaults in functions used by generator/tests\n- Add CLI wrapper commands (, ) that delegate to the pure functions\n- Cast params to int for SQL/timedelta to avoid type issues\n- Resolves OptionInfo errors during run-reports analysis phase --- scripts/analyze.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/scripts/analyze.py b/scripts/analyze.py index 7c4c141..9f49978 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -155,10 +155,9 @@ def check_missing_domains( typer.echo(d) -@app.command("suggest-cache") def suggest_cache( - threshold: int = typer.Option(10, help="Minimum number of MISS entries to report"), - json_output: bool = typer.Option(False, "--json", help="Output results as JSON"), + threshold: int = 10, + json_output: bool = False, ) -> None: """Suggest domain/path pairs that could benefit from caching. @@ -191,7 +190,7 @@ def suggest_cache( HAVING miss_count >= ? ORDER BY miss_count DESC """, - (threshold,), + (int(threshold),), ) rows = [r for r in cur.fetchall() if r[0] in no_cache] @@ -211,11 +210,18 @@ def suggest_cache( for item in result: typer.echo(f"{item['host']} {item['path']} {item['misses']}") +@app.command("suggest-cache") +def suggest_cache_cli( + threshold: int = typer.Option(10, help="Minimum number of MISS entries to report"), + json_output: bool = typer.Option(False, "--json", help="Output results as JSON"), +) -> None: + """CLI wrapper for suggest_cache.""" + suggest_cache(threshold=threshold, json_output=json_output) + -@app.command("detect-threats") def detect_threats( - hours: int = typer.Option(1, help="Number of recent hours to analyze"), - ip_threshold: int = typer.Option(100, help="Requests from a single IP to flag"), + hours: int = 1, + ip_threshold: int = 100, ) -> None: """Detect potential security threats from recent logs.""" @@ -231,8 +237,8 @@ def detect_threats( max_dt = datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S") recent_end = max_dt - recent_start = recent_end - timedelta(hours=hours) - prev_start = recent_start - timedelta(hours=hours) + recent_start = recent_end - timedelta(hours=int(hours)) + prev_start = recent_start - timedelta(hours=int(hours)) prev_end = recent_start fmt = "%Y-%m-%d %H:%M:%S" @@ -339,6 +345,14 @@ def detect_threats( out_path.write_text(json.dumps(report, indent=2)) typer.echo(json.dumps(report)) +@app.command("detect-threats") +def detect_threats_cli( + hours: int = typer.Option(1, help="Number of recent hours to analyze"), + ip_threshold: int = typer.Option(100, help="Requests from a single IP to flag"), +) -> None: + """CLI wrapper for detect_threats.""" + detect_threats(hours=hours, ip_threshold=ip_threshold) + if __name__ == "__main__": app()