From 2e7e75e4ce8dfdb60506ea523d5c324a8d806069 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 02:19:08 -0500 Subject: [PATCH 01/33] Add run-analysis helper script and tests --- README.md | 11 +++++++++++ run-analysis.sh | 34 ++++++++++++++++++++++++++++++++++ tests/test_run_analysis.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) create mode 100755 run-analysis.sh create mode 100644 tests/test_run_analysis.py diff --git a/README.md b/README.md index d206658..70f21d9 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,17 @@ Use the `run-reports.sh` script to run all report intervals in one step. The scr Running this script will create or update the hourly, daily, weekly and monthly reports under `output/`. It also detects all unique domains found in the database and writes per-domain reports to `output/domains//` alongside the aggregate data. After generation, open `output/index.html` in your browser to browse the reports. + +## Log Analysis + +The `run-analysis.sh` script runs helper routines that inspect the database. It +creates or reuses the virtual environment and then executes a set of analysis +commands to spot missing domains, suggest cache rules and detect potential +threats. + +```bash +./run-analysis.sh +``` ## Serving Reports with Nginx To expose the generated HTML dashboards and JSON files over HTTP you can use a diff --git a/run-analysis.sh b/run-analysis.sh new file mode 100755 index 0000000..1f5c213 --- /dev/null +++ b/run-analysis.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +set -e + +# Ensure virtual environment exists +if [ ! -d ".venv" ]; then + echo "[INFO] Creating virtual environment..." + python3 -m venv .venv + source .venv/bin/activate + echo "[INFO] Installing dependencies..." + pip install --upgrade pip + if [ -f requirements.txt ]; then + pip install -r requirements.txt + else + echo "[WARN] requirements.txt not found, skipping." + fi +else + echo "[INFO] Activating virtual environment..." + source .venv/bin/activate +fi + +# Run analysis helpers +echo "[INFO] Checking for missing domains..." +python scripts/analyze.py check-missing-domains + +echo "[INFO] Suggesting cache improvements..." +python scripts/analyze.py suggest-cache + +echo "[INFO] Detecting threats..." +python scripts/analyze.py detect-threats + +# Deactivate to keep cron environment clean +if type deactivate >/dev/null 2>&1; then + deactivate +fi diff --git a/tests/test_run_analysis.py b/tests/test_run_analysis.py new file mode 100644 index 0000000..60d2810 --- /dev/null +++ b/tests/test_run_analysis.py @@ -0,0 +1,32 @@ +import os +import subprocess +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] +SCRIPT = REPO_ROOT / "run-analysis.sh" + + +def test_script_invokes_commands(tmp_path): + # create stub virtualenv so the script skips creation + venv = tmp_path / ".venv" / "bin" + venv.mkdir(parents=True) + (venv / "activate").write_text(":") + + calls = tmp_path / "calls.txt" + python_stub = tmp_path / "python" + python_stub.write_text(f"#!/usr/bin/env bash\necho \"$*\" >> \"{calls}\"\n") + python_stub.chmod(0o755) + (tmp_path / "python3").write_text(f"#!/usr/bin/env bash\nexit 0\n") + (tmp_path / "python3").chmod(0o755) + + env = os.environ.copy() + env["PATH"] = f"{tmp_path}:" + env.get("PATH", "") + + subprocess.run([str(SCRIPT)], cwd=tmp_path, env=env, check=True) + + lines = calls.read_text().strip().splitlines() + assert lines == [ + "scripts/analyze.py check-missing-domains", + "scripts/analyze.py suggest-cache", + "scripts/analyze.py detect-threats", + ] From 9cf27ecb2f917b5a2ac91ded9c8b50a548ef2ba3 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 02:30:15 -0500 Subject: [PATCH 02/33] Add analysis tab and JSON outputs --- README.md | 2 + scripts/analyze.py | 19 +++++-- templates/index.html | 131 +++++++++++++++++++++++++++++++++++++------ 3 files changed, 130 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 70f21d9..acb1055 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,8 @@ threats. ```bash ./run-analysis.sh ``` +The JSON results are written under `output/analysis` and can be viewed from the +"Analysis" tab in the generated dashboard. ## Serving Reports with Nginx To expose the generated HTML dashboards and JSON files over HTTP you can use a diff --git a/scripts/analyze.py b/scripts/analyze.py index 219ceeb..8ac7c30 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -138,6 +138,10 @@ def check_missing_domains(json_output: bool = typer.Option(False, "--json", help missing = sorted(db_domains - config_domains) + ANALYSIS_DIR.mkdir(parents=True, exist_ok=True) + out_path = ANALYSIS_DIR / "missing_domains.json" + out_path.write_text(json.dumps(missing, indent=2)) + if json_output: typer.echo(json.dumps(missing)) else: @@ -189,14 +193,19 @@ def suggest_cache( rows = [r for r in cur.fetchall() if r[0] in no_cache] conn.close() + result = [ + {"host": host, "path": path, "misses": count} for host, path, count in rows + ] + + ANALYSIS_DIR.mkdir(parents=True, exist_ok=True) + out_path = ANALYSIS_DIR / "cache_suggestions.json" + out_path.write_text(json.dumps(result, indent=2)) + if json_output: - result = [ - {"host": host, "path": path, "misses": count} for host, path, count in rows - ] typer.echo(json.dumps(result)) else: - for host, path, count in rows: - typer.echo(f"{host} {path} {count}") + for item in result: + typer.echo(f"{item['host']} {item['path']} {item['misses']}") @app.command("detect-threats") diff --git a/templates/index.html b/templates/index.html index 91482f7..7b0b98f 100644 --- a/templates/index.html +++ b/templates/index.html @@ -15,6 +15,7 @@
  • Overview
  • All Domains
  • Per Domain
  • +
  • Analysis
  • @@ -56,9 +57,15 @@
    - + + + @@ -73,13 +80,19 @@ const sections = { overview: document.getElementById('overview-section'), all: document.getElementById('all-section'), - domain: document.getElementById('domain-section') + domain: document.getElementById('domain-section'), + analysis: document.getElementById('analysis-section') }; const containers = { overview: document.getElementById('overview-reports'), all: document.getElementById('reports-all'), domain: document.getElementById('reports-domain') }; + const analysisElems = { + missing: document.getElementById('analysis-missing'), + cache: document.getElementById('analysis-cache'), + threats: document.getElementById('analysis-threats') + }; const totalElem = document.getElementById('stat-total'); const startElem = document.getElementById('stat-start'); const endElem = document.getElementById('stat-end'); @@ -169,19 +182,99 @@ path = 'domains/' + encodeURIComponent(currentDomain) + '/' + currentInterval; } - fetch(path + '/reports.json') - .then(r => r.json()) - .then(reports => { - container.innerHTML = ''; - reports.forEach(rep => { - fetch(path + '/' + rep.html) - .then(r => r.text()) - .then(html => { - container.insertAdjacentHTML('beforeend', html); - initReport(rep, path); - }); + fetch(path + '/reports.json') + .then(r => r.json()) + .then(reports => { + container.innerHTML = ''; + reports.forEach(rep => { + fetch(path + '/' + rep.html) + .then(r => r.text()) + .then(html => { + container.insertAdjacentHTML('beforeend', html); + initReport(rep, path); + }); + }); + feather.replace(); }); - feather.replace(); + } + + function loadAnalysis() { + analysisElems.missing.innerHTML = '

    Missing Domains

    '; + analysisElems.cache.innerHTML = '

    Cache Suggestions

    '; + analysisElems.threats.innerHTML = '

    Threat Report

    '; + + fetch('analysis/missing_domains.json') + .then(r => r.json()) + .then(list => { + if (list.length === 0) { + analysisElems.missing.insertAdjacentHTML('beforeend', '

    None

    '); + return; + } + const items = list.map(d => `
  • ${d}
  • `).join(''); + analysisElems.missing.insertAdjacentHTML('beforeend', `
      ${items}
    `); + }); + + fetch('analysis/cache_suggestions.json') + .then(r => r.json()) + .then(data => { + if (data.length === 0) { + analysisElems.cache.insertAdjacentHTML('beforeend', '

    No suggestions

    '); + return; + } + analysisElems.cache.insertAdjacentHTML('beforeend', '
    '); + const rows = data.map(x => [x.host, x.path, x.misses]); + new DataTable('#table-cache', { + data: rows, + columns: [ + { title: 'Domain' }, + { title: 'Path' }, + { title: 'Misses' } + ] + }); + }); + + fetch('analysis/threat_report.json') + .then(r => r.json()) + .then(rep => { + const hasData = rep.error_spikes?.length || rep.suspicious_agents?.length || rep.high_ip_requests?.length; + if (!hasData) { + analysisElems.threats.insertAdjacentHTML('beforeend', '

    No threats detected

    '); + return; + } + if (rep.error_spikes && rep.error_spikes.length) { + analysisElems.threats.insertAdjacentHTML('beforeend', '

    Error Spikes

    '); + const rows = rep.error_spikes.map(x => [x.host, x.recent_error_rate, x.previous_error_rate]); + new DataTable('#table-errors', { + data: rows, + columns: [ + { title: 'Domain' }, + { title: 'Recent %' }, + { title: 'Previous %' } + ] + }); + } + if (rep.suspicious_agents && rep.suspicious_agents.length) { + analysisElems.threats.insertAdjacentHTML('beforeend', '

    Suspicious User Agents

    '); + const rows = rep.suspicious_agents.map(x => [x.user_agent, x.requests]); + new DataTable('#table-agents', { + data: rows, + columns: [ + { title: 'User Agent' }, + { title: 'Requests' } + ] + }); + } + if (rep.high_ip_requests && rep.high_ip_requests.length) { + analysisElems.threats.insertAdjacentHTML('beforeend', '

    High IP Requests

    '); + const rows = rep.high_ip_requests.map(x => [x.ip, x.requests]); + new DataTable('#table-ips', { + data: rows, + columns: [ + { title: 'IP' }, + { title: 'Requests' } + ] + }); + } }); } @@ -198,7 +291,11 @@ if (name === 'overview') { loadStats(); } - loadReports(); + if (name === 'analysis') { + loadAnalysis(); + } else { + loadReports(); + } } intervalSelect.addEventListener('change', () => { From 0a05f0c010d791c8fcf9f33a9787f224bbac7ae3 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 02:55:15 -0500 Subject: [PATCH 03/33] Fix module imports in run-analysis --- run-analysis.sh | 6 +++--- scripts/__init__.py | 1 + tests/test_run_analysis.py | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 scripts/__init__.py diff --git a/run-analysis.sh b/run-analysis.sh index 1f5c213..e487224 100755 --- a/run-analysis.sh +++ b/run-analysis.sh @@ -20,13 +20,13 @@ fi # Run analysis helpers echo "[INFO] Checking for missing domains..." -python scripts/analyze.py check-missing-domains +python -m scripts.analyze check-missing-domains echo "[INFO] Suggesting cache improvements..." -python scripts/analyze.py suggest-cache +python -m scripts.analyze suggest-cache echo "[INFO] Detecting threats..." -python scripts/analyze.py detect-threats +python -m scripts.analyze detect-threats # Deactivate to keep cron environment clean if type deactivate >/dev/null 2>&1; then diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..f4c57a1 --- /dev/null +++ b/scripts/__init__.py @@ -0,0 +1 @@ +"Utility package for ngxstat scripts" diff --git a/tests/test_run_analysis.py b/tests/test_run_analysis.py index 60d2810..66d00df 100644 --- a/tests/test_run_analysis.py +++ b/tests/test_run_analysis.py @@ -26,7 +26,7 @@ def test_script_invokes_commands(tmp_path): lines = calls.read_text().strip().splitlines() assert lines == [ - "scripts/analyze.py check-missing-domains", - "scripts/analyze.py suggest-cache", - "scripts/analyze.py detect-threats", + "-m scripts.analyze check-missing-domains", + "-m scripts.analyze suggest-cache", + "-m scripts.analyze detect-threats", ] From 2443aecaf65c4bbd8b66a4cf7ee34dc8ece69ab7 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 03:17:07 -0500 Subject: [PATCH 04/33] fix nginx config discovery for absolute include patterns --- scripts/nginx_config.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/nginx_config.py b/scripts/nginx_config.py index e2dc447..dbd635d 100644 --- a/scripts/nginx_config.py +++ b/scripts/nginx_config.py @@ -49,7 +49,15 @@ def discover_configs() -> Set[Path]: found.add(path) for pattern in INCLUDE_RE.findall(text): pattern = os.path.expanduser(pattern.strip()) - for included in path.parent.glob(pattern): + if os.path.isabs(pattern): + # ``Path.glob`` does not allow absolute patterns, so we + # anchor at the filesystem root and remove the leading + # separator. + base = Path(os.sep) + glob_iter = base.glob(pattern.lstrip(os.sep)) + else: + glob_iter = path.parent.glob(pattern) + for included in glob_iter: if included.is_file() and included not in found: queue.append(included) return found From d1f3c5a9ae0fa66e56828743c7103eb2f967753d Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 03:30:08 -0500 Subject: [PATCH 05/33] Hide analysis duration --- scripts/generate_reports.py | 2 +- templates/index.html | 2 +- tests/test_reports.py | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index ae7a3a7..4b5692a 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -175,7 +175,7 @@ def _generate_root_index() -> None: intervals = [ p.name for p in OUTPUT_DIR.iterdir() - if p.is_dir() and p.name.lower() not in {"domains", "global"} + if p.is_dir() and p.name.lower() not in {"domains", "global", "analysis"} ] intervals.sort() diff --git a/templates/index.html b/templates/index.html index 7b0b98f..0a8abed 100644 --- a/templates/index.html +++ b/templates/index.html @@ -286,7 +286,7 @@ Object.entries(sections).forEach(([key, section]) => { section.classList.toggle('is-hidden', key !== name); }); - intervalControl.classList.toggle('is-hidden', name === 'overview'); + intervalControl.classList.toggle('is-hidden', name === 'overview' || name === 'analysis'); domainControl.classList.toggle('is-hidden', name !== 'domain'); if (name === 'overview') { loadStats(); diff --git a/tests/test_reports.py b/tests/test_reports.py index fec898c..dbe71c2 100644 --- a/tests/test_reports.py +++ b/tests/test_reports.py @@ -184,6 +184,8 @@ def test_generate_root_index(tmp_path, sample_reports, monkeypatch): (tmp_path / "output" / "domains" / "bar.com").mkdir(parents=True) # add an extra directory with capitalized name to ensure it's ignored (tmp_path / "output" / "Global").mkdir(parents=True) + # add an analysis directory to ensure it's excluded + (tmp_path / "output" / "analysis").mkdir(parents=True) gr._generate_root_index() @@ -196,6 +198,7 @@ def test_generate_root_index(tmp_path, sample_reports, monkeypatch): assert '