From 66b1209486838b12110ae4b2d42479f22c1b2717 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 04:08:40 -0500 Subject: [PATCH 01/26] Fix dropdown duration dirs --- scripts/generate_reports.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index 4c0c911..b9100f4 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -192,12 +192,14 @@ def _generate_all_domains(interval: str) -> None: def _generate_root_index() -> None: """Render the top-level index listing all intervals and domains.""" _copy_icons() - intervals = [ - p.name - for p in OUTPUT_DIR.iterdir() - if p.is_dir() and p.name.lower() not in {"domains", "global", "analysis"} - ] - intervals.sort() + intervals = sorted( + [ + name + for name in INTERVAL_FORMATS + if (OUTPUT_DIR / name).is_dir() + ] + ) + domains_dir = OUTPUT_DIR / "domains" domains: List[str] = [] From 87608ccdb91023fb7bdbc2d3674524eae579af94 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 04:17:38 -0500 Subject: [PATCH 02/26] refactor: improve report field names --- reports.yml | 56 ++++++++++++++++++++++++------------- scripts/generate_reports.py | 8 ++++++ templates/index.html | 7 +++-- 3 files changed, 49 insertions(+), 22 deletions(-) diff --git a/reports.yml b/reports.yml index 1622538..c9b9cd3 100644 --- a/reports.yml +++ b/reports.yml @@ -2,30 +2,36 @@ label: Hits icon: pulse chart: line + bucket: time_bucket + bucket_label: Time query: | - SELECT {bucket} AS bucket, + SELECT {bucket} AS time_bucket, COUNT(*) AS value FROM logs - GROUP BY bucket - ORDER BY bucket + GROUP BY time_bucket + ORDER BY time_bucket - name: error_rate label: Error Rate (%) icon: file-alert chart: line + bucket: time_bucket + bucket_label: Time query: | - SELECT {bucket} AS bucket, + SELECT {bucket} AS time_bucket, SUM(CASE WHEN status BETWEEN 400 AND 599 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value FROM logs - GROUP BY bucket - ORDER BY bucket + GROUP BY time_bucket + ORDER BY time_bucket - name: cache_status_breakdown label: Cache Status icon: archive chart: polarArea + bucket: cache_status + bucket_label: Cache Status query: | - SELECT cache_status AS bucket, + SELECT cache_status AS cache_status, COUNT(*) AS value FROM logs GROUP BY cache_status @@ -43,30 +49,36 @@ icon: globe chart: table per_domain: false + bucket: domain + bucket_label: Domain query: | - SELECT host AS bucket, + SELECT host AS domain, COUNT(*) AS value FROM logs - GROUP BY host + GROUP BY domain ORDER BY value DESC - name: bytes_sent label: Bytes Sent icon: upload chart: line + bucket: time_bucket + bucket_label: Time query: | - SELECT {bucket} AS bucket, + SELECT {bucket} AS time_bucket, SUM(bytes_sent) AS value FROM logs - GROUP BY bucket - ORDER BY bucket + GROUP BY time_bucket + ORDER BY time_bucket - name: top_paths label: Top Paths icon: map chart: table + bucket: path + bucket_label: Path query: | - SELECT path AS bucket, + SELECT path AS path, COUNT(*) AS value FROM ( SELECT substr(substr(request, instr(request, ' ') + 1), 1, @@ -81,8 +93,10 @@ label: User Agents icon: user chart: table + bucket: user_agent + bucket_label: User Agent query: | - SELECT user_agent AS bucket, + SELECT user_agent AS user_agent, COUNT(*) AS value FROM logs GROUP BY user_agent @@ -93,11 +107,13 @@ label: Referrers icon: link chart: table + bucket: referrer + bucket_label: Referrer query: | - SELECT referer AS bucket, + SELECT referer AS referrer, COUNT(*) AS value FROM logs - GROUP BY referer + GROUP BY referrer ORDER BY value DESC LIMIT 20 @@ -105,17 +121,19 @@ label: HTTP Statuses icon: server chart: pie + bucket: status_group + bucket_label: Status query: | SELECT CASE WHEN status BETWEEN 200 AND 299 THEN '2xx' WHEN status BETWEEN 300 AND 399 THEN '3xx' WHEN status BETWEEN 400 AND 499 THEN '4xx' ELSE '5xx' - END AS bucket, + END AS status_group, COUNT(*) AS value FROM logs - GROUP BY bucket - ORDER BY bucket + GROUP BY status_group + ORDER BY status_group colors: - "#48c78e" - "#209cee" diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index b9100f4..0a95bdc 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -169,6 +169,10 @@ def _generate_interval(interval: str, domain: Optional[str] = None) -> None: } if "icon" in definition: entry["icon"] = definition["icon"] + if "bucket" in definition: + entry["bucket"] = definition["bucket"] + if "bucket_label" in definition: + entry["bucket_label"] = definition["bucket_label"] if "color" in definition: entry["color"] = definition["color"] if "colors" in definition: @@ -253,6 +257,10 @@ def _generate_global() -> None: } if "icon" in definition: entry["icon"] = definition["icon"] + if "bucket" in definition: + entry["bucket"] = definition["bucket"] + if "bucket_label" in definition: + entry["bucket_label"] = definition["bucket_label"] if "color" in definition: entry["color"] = definition["color"] if "colors" in definition: diff --git a/templates/index.html b/templates/index.html index 237f563..4db9e53 100644 --- a/templates/index.html +++ b/templates/index.html @@ -105,19 +105,20 @@ fetch(base + '/' + rep.json) .then(r => r.json()) .then(data => { + const bucketField = rep.bucket || 'bucket'; if (rep.chart === 'table') { - const rows = data.map(x => [x.bucket, x.value]); + const rows = data.map(x => [x[bucketField], x.value]); new DataTable('#table-' + rep.name, { data: rows, columns: [ - { title: 'Bucket' }, + { title: rep.bucket_label || 'Bucket' }, { title: 'Value' } ] }); return; } - const labels = data.map(x => x.bucket); + const labels = data.map(x => x[bucketField]); const values = data.map(x => x.value); const chartType = rep.chart === 'stackedBar' ? 'bar' : rep.chart; const options = { scales: { y: { beginAtZero: true } } }; From 832a0a49dde1611dad0f39f8cb6221249dc9823b Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 04:31:13 -0500 Subject: [PATCH 03/26] docs: refresh README and agent guide --- AGENTS.md | 12 +++++ README.md | 141 ++++++++++++++++++++++-------------------------------- 2 files changed, 70 insertions(+), 83 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 4cdfa62..7e7d3c5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -24,6 +24,9 @@ This document outlines general practices and expectations for AI agents assistin The `run-import.sh` script can initialize this environment automatically. Always activate the virtual environment before running scripts or tests. +* Before committing code run `black` for consistent formatting and execute + the test suite with `pytest`. All tests should pass. + * Dependency management: Use `requirements.txt` or `pip-tools` * Use standard libraries where feasible (e.g., `sqlite3`, `argparse`, `datetime`) * Adopt `typer` for CLI command interface (if CLI ergonomics matter) @@ -89,6 +92,14 @@ ngxstat/ If uncertain, the agent should prompt the human for clarification before making architectural assumptions. +## Testing + +Use `pytest` for automated tests. Run the suite from an activated virtual environment and ensure all tests pass before committing: + +```bash +pytest -q +``` + --- ## Future Capabilities @@ -106,3 +117,4 @@ As the project matures, agents may also: * **2025-07-17**: Initial version by Jordan + ChatGPT * **2025-07-17**: Expanded virtual environment usage guidance + diff --git a/README.md b/README.md index acb1055..f641d96 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,16 @@ # ngxstat -Per-domain Nginx log analytics with hybrid static reports and live insights. -## Generating Reports +`ngxstat` is a lightweight log analytics toolkit for Nginx. It imports access +logs into an SQLite database and renders static dashboards so you can explore +per-domain metrics without running a heavy backend service. -Use the `generate_reports.py` script to build aggregated JSON and HTML snippet files from `database/ngxstat.db`. +## Requirements -Create a virtual environment and install dependencies: +* Python 3.10+ +* Access to the Nginx log files (default: `/var/log/nginx`) + +The helper scripts create a virtual environment on first run, but you can also +set one up manually: ```bash python3 -m venv .venv @@ -13,118 +18,88 @@ source .venv/bin/activate pip install -r requirements.txt ``` -Then run one or more of the interval commands: - -```bash -python scripts/generate_reports.py hourly -python scripts/generate_reports.py daily -python scripts/generate_reports.py weekly -python scripts/generate_reports.py monthly -``` - -Each command accepts optional flags to generate per-domain reports. Use -`--domain ` to limit output to a specific domain or `--all-domains` -to generate a subdirectory for every domain found in the database: - -```bash -# Hourly reports for example.com only -python scripts/generate_reports.py hourly --domain example.com - -# Weekly reports for all domains individually -python scripts/generate_reports.py weekly --all-domains -``` - -Reports are written under the `output/` directory. Each command updates the corresponding `.json` file and writes one HTML snippet per report. These snippets are loaded dynamically by the main dashboard using Chart.js and DataTables. - -### Configuring Reports - -Report queries are defined in `reports.yml`. Each entry specifies the `name`, -optional `label` and `chart` type, and a SQL `query` that must return `bucket` -and `value` columns. The special token `{bucket}` is replaced with the -appropriate SQLite `strftime` expression for each interval (hourly, daily, -weekly or monthly) so that a single definition works across all durations. -When `generate_reports.py` runs, every definition is executed for the requested -interval and creates `output//.json` plus a small HTML snippet -`output//.html` used by the dashboard. - -Example snippet: - -```yaml -- name: hits - chart: bar - query: | - SELECT {bucket} AS bucket, - COUNT(*) AS value - FROM logs - GROUP BY bucket - ORDER BY bucket -``` - -Add or modify entries in `reports.yml` to tailor the generated metrics. - ## Importing Logs -Use the `run-import.sh` script to set up the Python environment if needed and import the latest Nginx log entries into `database/ngxstat.db`. +Run the importer to ingest new log entries into `database/ngxstat.db`: ```bash ./run-import.sh ``` -This script is suitable for cron jobs as it creates the virtual environment on first run, installs dependencies and reuses the environment on subsequent runs. +Rotated logs are processed in order and only entries newer than the last +imported timestamp are added. -The importer handles rotated logs in order from oldest to newest so entries are -processed exactly once. If you rerun the script, it only ingests records with a -timestamp newer than the latest one already stored in the database, preventing -duplicates. +## Generating Reports -## Cron Report Generation - -Use the `run-reports.sh` script to run all report intervals in one step. The script sets up the Python environment the same way as `run-import.sh`, making it convenient for automation via cron. +To build the HTML dashboard and JSON data files use `run-reports.sh` which runs +all intervals in one go: ```bash ./run-reports.sh ``` -Running this script will create or update the hourly, daily, weekly and monthly reports under `output/`. It also detects all unique domains found in the database and writes per-domain reports to `output/domains//` alongside the aggregate data. After generation, open `output/index.html` in your browser to browse the reports. +The script calls `scripts/generate_reports.py` internally to create hourly, +daily, weekly and monthly reports. Per-domain reports are written under +`output/domains/` alongside the aggregate data. Open +`output/index.html` in a browser to view the dashboard. +If you prefer to run individual commands you can invoke the generator directly: -## Log Analysis +```bash +python scripts/generate_reports.py hourly +python scripts/generate_reports.py daily --all-domains +``` -The `run-analysis.sh` script runs helper routines that inspect the database. It -creates or reuses the virtual environment and then executes a set of analysis -commands to spot missing domains, suggest cache rules and detect potential -threats. +## Analysis Helpers + +`run-analysis.sh` executes additional utilities that examine the database for +missing domains, caching opportunities and potential threats. The JSON output is +saved under `output/analysis` and appears in the "Analysis" tab of the +dashboard. ```bash ./run-analysis.sh ``` -The JSON results are written under `output/analysis` and can be viewed from the -"Analysis" tab in the generated dashboard. -## Serving Reports with Nginx -To expose the generated HTML dashboards and JSON files over HTTP you can use a -simple Nginx server block. Point the `root` directive to the repository's -`output/` directory and optionally restrict access to your local network. +## Serving the Reports + +The generated files are static. You can serve them with a simple Nginx block: ```nginx server { listen 80; server_name example.com; - - # Path to the generated reports root /path/to/ngxstat/output; location / { try_files $uri $uri/ =404; } - - # Allow access only from private networks - allow 192.0.0.0/8; - allow 10.0.0.0/8; - deny all; } ``` -With this configuration the generated static files are served directly by -Nginx while connections outside of `192.*` and `10.*` are denied. +Restrict access if the reports should not be public. +## Running Tests + +Install the development dependencies and execute the suite with `pytest`: + +```bash +pip install -r requirements.txt +pytest -q +``` + +All tests must pass before submitting changes. + +## Acknowledgements + +ngxstat uses the following third‑party resources: + +* [Chart.js](https://www.chartjs.org/) for charts +* [DataTables](https://datatables.net/) and [jQuery](https://jquery.com/) for table views +* [Bulma CSS](https://bulma.io/) for styling +* Icons from [Free CC0 Icons](https://cc0-icons.jonh.eu/) by Jon Hicks (CC0 / MIT) +* [Typer](https://typer.tiangolo.com/) for the command-line interface +* [Jinja2](https://palletsprojects.com/p/jinja/) for templating + +The project is licensed under the GPLv3. Icon assets remain in the public domain +via the CC0 license. From 669b51e03cdea7575ae5c6ee754781b7274c1610 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 04:50:09 -0500 Subject: [PATCH 04/26] Add report generation timing stats --- scripts/generate_reports.py | 23 +++++++++++++++-------- templates/index.html | 7 +++++++ tests/test_reports.py | 20 +++++++++++--------- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index 0a95bdc..664a3a5 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -3,6 +3,8 @@ import sqlite3 from pathlib import Path import shutil from typing import List, Dict, Optional +from datetime import datetime +import time import yaml @@ -74,7 +76,9 @@ def _render_snippet(report: Dict, out_dir: Path) -> None: snippet_path.write_text(template.render(report=report)) -def _write_stats() -> None: +def _write_stats( + generated_at: Optional[str] = None, generation_seconds: Optional[float] = None +) -> None: """Query basic dataset stats and write them to ``output/global/stats.json``.""" conn = sqlite3.connect(DB_PATH) cur = conn.cursor() @@ -98,6 +102,10 @@ def _write_stats() -> None: "end_date": end_date, "unique_domains": unique_domains, } + if generated_at: + stats["generated_at"] = generated_at + if generation_seconds is not None: + stats["generation_seconds"] = generation_seconds out_path = OUTPUT_DIR / "global" / "stats.json" _save_json(out_path, stats) @@ -197,14 +205,9 @@ def _generate_root_index() -> None: """Render the top-level index listing all intervals and domains.""" _copy_icons() intervals = sorted( - [ - name - for name in INTERVAL_FORMATS - if (OUTPUT_DIR / name).is_dir() - ] + [name for name in INTERVAL_FORMATS if (OUTPUT_DIR / name).is_dir()] ) - domains_dir = OUTPUT_DIR / "domains" domains: List[str] = [] if domains_dir.is_dir(): @@ -227,6 +230,9 @@ def _generate_global() -> None: typer.echo("No report definitions found") return + start_time = time.time() + generated_at = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") + _copy_icons() conn = sqlite3.connect(DB_PATH) @@ -269,7 +275,8 @@ def _generate_global() -> None: report_list.append(entry) _save_json(out_dir / "reports.json", report_list) - _write_stats() + elapsed = round(time.time() - start_time, 2) + _write_stats(generated_at, elapsed) typer.echo("Generated global reports") diff --git a/templates/index.html b/templates/index.html index 4db9e53..472ef25 100644 --- a/templates/index.html +++ b/templates/index.html @@ -49,6 +49,8 @@

Total logs: -

Date range: - to -

Unique domains: -

+

Last generated: -

+

Generation time: - seconds

@@ -96,6 +98,8 @@ const startElem = document.getElementById('stat-start'); const endElem = document.getElementById('stat-end'); const domainsElem = document.getElementById('stat-domains'); + const generatedElem = document.getElementById('stat-generated'); + const elapsedElem = document.getElementById('stat-elapsed'); let currentInterval = intervalSelect.value; let currentDomain = domainSelect.value; @@ -161,6 +165,9 @@ startElem.textContent = stats.start_date; endElem.textContent = stats.end_date; domainsElem.textContent = stats.unique_domains; + generatedElem.textContent = stats.generated_at || '-'; + elapsedElem.textContent = + stats.generation_seconds !== undefined ? stats.generation_seconds : '-'; }); } diff --git a/tests/test_reports.py b/tests/test_reports.py index dbe71c2..75d7737 100644 --- a/tests/test_reports.py +++ b/tests/test_reports.py @@ -148,20 +148,19 @@ def test_generate_interval_domain_filter(tmp_path, sample_reports, monkeypatch): gr._generate_interval("hourly", "example.com") hits = json.loads( - (tmp_path / "output" / "domains" / "example.com" / "hourly" / "hits.json").read_text() + ( + tmp_path / "output" / "domains" / "example.com" / "hourly" / "hits.json" + ).read_text() ) assert hits[0]["value"] == 2 reports = json.loads( - (tmp_path / "output" / "domains" / "example.com" / "hourly" / "reports.json").read_text() + ( + tmp_path / "output" / "domains" / "example.com" / "hourly" / "reports.json" + ).read_text() ) assert {r["name"] for r in reports} == {"hits", "error_rate"} assert not ( - tmp_path - / "output" - / "domains" - / "example.com" - / "hourly" - / "skip_report.json" + tmp_path / "output" / "domains" / "example.com" / "hourly" / "skip_report.json" ).exists() @@ -245,9 +244,12 @@ def test_global_stats_file(tmp_path, sample_reports, monkeypatch): "start_date", "end_date", "unique_domains", + "generated_at", + "generation_seconds", } assert stats["total_logs"] == 2 assert stats["start_date"] == "2024-01-01 10:00:00" assert stats["end_date"] == "2024-01-01 10:05:00" assert stats["unique_domains"] == 1 - + assert isinstance(stats["generated_at"], str) + assert stats["generation_seconds"] >= 0 From fb753a1189dbaab3b2d35a63f1b81668933f01ee Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 16:41:58 -0500 Subject: [PATCH 05/26] fix: destroy charts when reloading domain reports --- templates/index.html | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/templates/index.html b/templates/index.html index 472ef25..b1cbf89 100644 --- a/templates/index.html +++ b/templates/index.html @@ -171,6 +171,15 @@ }); } + function destroyCharts(container) { + container.querySelectorAll('canvas').forEach(c => { + const chart = Chart.getChart(c); + if (chart) { + chart.destroy(); + } + }); + } + function loadReports() { let path; let container; @@ -183,6 +192,7 @@ } else { container = containers.domain; if (!currentDomain) { + destroyCharts(container); container.innerHTML = '

Select a domain

'; return; } @@ -192,6 +202,7 @@ fetch(path + '/reports.json') .then(r => r.json()) .then(reports => { + destroyCharts(container); container.innerHTML = ''; reports.forEach(rep => { fetch(path + '/' + rep.html) From 297c913f2a314a843f0304baaa8cd2a7bf1ad8e5 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 17:03:48 -0500 Subject: [PATCH 06/26] fix: destroy charts when switching tabs --- scripts/analyze.py | 22 +++++++++++----------- scripts/init_db.py | 4 +++- scripts/nginx_config.py | 1 - templates/index.html | 5 +++++ tests/test_importer.py | 5 ++--- tests/test_nginx_config.py | 1 - tests/test_run_analysis.py | 2 +- 7 files changed, 22 insertions(+), 18 deletions(-) diff --git a/scripts/analyze.py b/scripts/analyze.py index 8ac7c30..fe7b818 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -105,7 +105,9 @@ def hits(domain: Optional[str] = typer.Option(None, help="Filter by domain")) -> @app.command("cache-ratio") -def cache_ratio_cmd(domain: Optional[str] = typer.Option(None, help="Filter by domain")) -> None: +def cache_ratio_cmd( + domain: Optional[str] = typer.Option(None, help="Filter by domain") +) -> None: """Display cache hit ratio as a percentage.""" ratio = get_cache_ratio(domain) * 100 if domain: @@ -115,7 +117,11 @@ def cache_ratio_cmd(domain: Optional[str] = typer.Option(None, help="Filter by d @app.command("check-missing-domains") -def check_missing_domains(json_output: bool = typer.Option(False, "--json", help="Output missing domains as JSON")) -> None: +def check_missing_domains( + json_output: bool = typer.Option( + False, "--json", help="Output missing domains as JSON" + ) +) -> None: """Show domains present in the database but absent from Nginx config.""" try: from scripts.generate_reports import _get_domains as _db_domains @@ -151,9 +157,7 @@ def check_missing_domains(json_output: bool = typer.Option(False, "--json", help @app.command("suggest-cache") def suggest_cache( - threshold: int = typer.Option( - 10, help="Minimum number of MISS entries to report" - ), + threshold: int = typer.Option(10, help="Minimum number of MISS entries to report"), json_output: bool = typer.Option(False, "--json", help="Output results as JSON"), ) -> None: """Suggest domain/path pairs that could benefit from caching. @@ -211,9 +215,7 @@ def suggest_cache( @app.command("detect-threats") def detect_threats( hours: int = typer.Option(1, help="Number of recent hours to analyze"), - ip_threshold: int = typer.Option( - 100, help="Requests from a single IP to flag" - ), + ip_threshold: int = typer.Option(100, help="Requests from a single IP to flag"), ) -> None: """Detect potential security threats from recent logs.""" @@ -316,9 +318,7 @@ def detect_threats( """, (recent_start_s, recent_end_s, ip_threshold), ) - high_ip_requests = [ - {"ip": ip, "requests": cnt} for ip, cnt in cur.fetchall() - ] + high_ip_requests = [{"ip": ip, "requests": cnt} for ip, cnt in cur.fetchall()] conn.close() diff --git a/scripts/init_db.py b/scripts/init_db.py index f378b5c..b9ea07d 100644 --- a/scripts/init_db.py +++ b/scripts/init_db.py @@ -61,7 +61,9 @@ try: suffix = match.group(1) number = int(suffix.lstrip(".")) if suffix else 0 log_files.append((number, os.path.join(LOG_DIR, f))) - log_files = [path for _, path in sorted(log_files, key=lambda x: x[0], reverse=True)] + log_files = [ + path for _, path in sorted(log_files, key=lambda x: x[0], reverse=True) + ] except FileNotFoundError: print(f"[ERROR] Log directory not found: {LOG_DIR}") exit(1) diff --git a/scripts/nginx_config.py b/scripts/nginx_config.py index dbd635d..bc585a7 100644 --- a/scripts/nginx_config.py +++ b/scripts/nginx_config.py @@ -93,4 +93,3 @@ def parse_servers(paths: Set[Path]) -> List[Dict[str, str]]: entry["root"] = " ".join(directives["root"]) servers.append(entry) return servers - diff --git a/templates/index.html b/templates/index.html index b1cbf89..84c6214 100644 --- a/templates/index.html +++ b/templates/index.html @@ -180,6 +180,10 @@ }); } + function destroyAllCharts() { + Object.values(containers).forEach(destroyCharts); + } + function loadReports() { let path; let container; @@ -296,6 +300,7 @@ } function switchTab(name) { + destroyAllCharts(); currentTab = name; tabs.forEach(tab => { tab.classList.toggle('is-active', tab.dataset.tab === name); diff --git a/tests/test_importer.py b/tests/test_importer.py index 349786b..38c9be9 100644 --- a/tests/test_importer.py +++ b/tests/test_importer.py @@ -15,10 +15,10 @@ def sample_logs(tmp_path): log_dir.mkdir(parents=True, exist_ok=True) (log_dir / "access.log.1").write_text( - "127.0.0.1 - example.com [01/Jan/2024:10:00:00 +0000] \"GET / HTTP/1.1\" 200 123 \"-\" \"curl\" MISS\n" + '127.0.0.1 - example.com [01/Jan/2024:10:00:00 +0000] "GET / HTTP/1.1" 200 123 "-" "curl" MISS\n' ) (log_dir / "access.log").write_text( - "127.0.0.1 - example.com [01/Jan/2024:10:05:00 +0000] \"GET /about HTTP/1.1\" 200 123 \"-\" \"curl\" MISS\n" + '127.0.0.1 - example.com [01/Jan/2024:10:05:00 +0000] "GET /about HTTP/1.1" 200 123 "-" "curl" MISS\n' ) yield log_dir @@ -59,4 +59,3 @@ def test_idempotent_import(sample_logs, tmp_path): assert first_count == 2 assert second_count == first_count - diff --git a/tests/test_nginx_config.py b/tests/test_nginx_config.py index 782c635..a6494a7 100644 --- a/tests/test_nginx_config.py +++ b/tests/test_nginx_config.py @@ -67,4 +67,3 @@ server { assert servers[1]["server_name"] == "example.org" assert servers[1]["listen"] == "443 ssl" assert "proxy_cache" not in servers[1] - diff --git a/tests/test_run_analysis.py b/tests/test_run_analysis.py index 66d00df..fad7953 100644 --- a/tests/test_run_analysis.py +++ b/tests/test_run_analysis.py @@ -14,7 +14,7 @@ def test_script_invokes_commands(tmp_path): calls = tmp_path / "calls.txt" python_stub = tmp_path / "python" - python_stub.write_text(f"#!/usr/bin/env bash\necho \"$*\" >> \"{calls}\"\n") + python_stub.write_text(f'#!/usr/bin/env bash\necho "$*" >> "{calls}"\n') python_stub.chmod(0o755) (tmp_path / "python3").write_text(f"#!/usr/bin/env bash\nexit 0\n") (tmp_path / "python3").chmod(0o755) From 5d2546ad60d4676faf2d2c8944b75ba2fb51b104 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 18:01:26 -0500 Subject: [PATCH 07/26] Add chart loading management --- scripts/generate_reports.py | 15 ++++---- static/chartManager.js | 49 +++++++++++++++++++++++++ templates/index.html | 71 ++++++++++++++++++++----------------- 3 files changed, 97 insertions(+), 38 deletions(-) create mode 100644 static/chartManager.js diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index 664a3a5..e587e6e 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -58,14 +58,17 @@ def _save_json(path: Path, data: List[Dict]) -> None: def _copy_icons() -> None: - """Copy vendored icons to the output directory.""" + """Copy vendored icons and scripts to the output directory.""" src_dir = Path("static/icons") dst_dir = OUTPUT_DIR / "icons" - if not src_dir.is_dir(): - return - dst_dir.mkdir(parents=True, exist_ok=True) - for icon in src_dir.glob("*.svg"): - shutil.copy(icon, dst_dir / icon.name) + if src_dir.is_dir(): + dst_dir.mkdir(parents=True, exist_ok=True) + for icon in src_dir.glob("*.svg"): + shutil.copy(icon, dst_dir / icon.name) + + js_src = Path("static/chartManager.js") + if js_src.is_file(): + shutil.copy(js_src, OUTPUT_DIR / js_src.name) def _render_snippet(report: Dict, out_dir: Path) -> None: diff --git a/static/chartManager.js b/static/chartManager.js new file mode 100644 index 0000000..79d83fc --- /dev/null +++ b/static/chartManager.js @@ -0,0 +1,49 @@ +export let currentLoad = null; +const loadInfo = new Map(); + +export function newLoad(container) { + if (currentLoad) { + abortLoad(currentLoad); + } + reset(container); + const controller = new AbortController(); + const token = { controller, charts: new Map() }; + loadInfo.set(token, token); + currentLoad = token; + return token; +} + +export function abortLoad(token) { + const info = loadInfo.get(token); + if (!info) return; + info.controller.abort(); + info.charts.forEach(chart => { + try { + chart.destroy(); + } catch (e) {} + }); + loadInfo.delete(token); + if (currentLoad === token) { + currentLoad = null; + } +} + +export function registerChart(token, id, chart) { + const info = loadInfo.get(token); + if (info) { + info.charts.set(id, chart); + } else { + chart.destroy(); + } +} + +export function reset(container) { + if (!container) return; + container.querySelectorAll('canvas').forEach(c => { + const chart = Chart.getChart(c); + if (chart) { + chart.destroy(); + } + }); + container.innerHTML = ''; +} diff --git a/templates/index.html b/templates/index.html index 84c6214..1b27003 100644 --- a/templates/index.html +++ b/templates/index.html @@ -72,7 +72,14 @@ - From 6de85b7cc50536f5d13629e62bade6e8922f2292 Mon Sep 17 00:00:00 2001 From: ngxstat-bot Date: Mon, 18 Aug 2025 23:47:23 -0500 Subject: [PATCH 20/26] UX Phase 1 follow-ups: state v2 + reset, window defaults + support, palette support; analysis JSON generation; tests for LIMIT/metadata; README updates --- README.md | 17 ++++--- run-reports.sh | 4 ++ scripts/generate_reports.py | 34 ++++++++++++++ templates/index.html | 64 +++++++++++++++++++++----- tests/test_reports.py | 90 +++++++++++++++++++++++++++++++++++++ 5 files changed, 193 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index f641d96..ac601fc 100644 --- a/README.md +++ b/README.md @@ -39,9 +39,10 @@ all intervals in one go: ``` The script calls `scripts/generate_reports.py` internally to create hourly, -daily, weekly and monthly reports. Per-domain reports are written under -`output/domains/` alongside the aggregate data. Open -`output/index.html` in a browser to view the dashboard. +daily, weekly and monthly reports, then writes analysis JSON files used by the +"Analysis" tab. Per-domain reports are written under `output/domains/` +alongside the aggregate data. Open `output/index.html` in a browser to view the +dashboard. If you prefer to run individual commands you can invoke the generator directly: @@ -54,8 +55,14 @@ python scripts/generate_reports.py daily --all-domains `run-analysis.sh` executes additional utilities that examine the database for missing domains, caching opportunities and potential threats. The JSON output is -saved under `output/analysis` and appears in the "Analysis" tab of the -dashboard. +saved under `output/analysis` and appears in the "Analysis" tab. The +`run-reports.sh` script also generates these JSON files as part of the build. + +## UX Controls + +The dashboard defaults to a 7‑day window for time series. Your view preferences +persist locally in the browser under the `ngxstat-state-v2` key. Use the +"Reset view" button to clear saved state and restore defaults. ```bash ./run-analysis.sh diff --git a/run-reports.sh b/run-reports.sh index bfe736d..4556f32 100755 --- a/run-reports.sh +++ b/run-reports.sh @@ -42,6 +42,10 @@ python scripts/generate_reports.py daily --all-domains python scripts/generate_reports.py weekly --all-domains python scripts/generate_reports.py monthly --all-domains +# Generate analysis JSON +echo "[INFO] Generating analysis files..." +python scripts/generate_reports.py analysis + # Generate root index python scripts/generate_reports.py index diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index 073e0b7..178951e 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -344,6 +344,34 @@ def _generate_global() -> None: typer.echo("Generated global reports") +def _generate_analysis() -> None: + """Generate analysis JSON files consumed by the Analysis tab.""" + try: + # Import lazily to avoid circulars and keep dependencies optional + from scripts import analyze + except Exception as exc: # pragma: no cover - defensive + typer.echo(f"Failed to import analysis module: {exc}") + return + + # Ensure output root and icons present for parity + _copy_icons() + + # These commands write JSON files under output/analysis/ + try: + analyze.check_missing_domains(json_output=True) + except Exception as exc: # pragma: no cover - continue best-effort + typer.echo(f"check_missing_domains failed: {exc}") + try: + analyze.suggest_cache(json_output=True) + except Exception as exc: # pragma: no cover + typer.echo(f"suggest_cache failed: {exc}") + try: + analyze.detect_threats() + except Exception as exc: # pragma: no cover + typer.echo(f"detect_threats failed: {exc}") + typer.echo("Generated analysis JSON files") + + @app.command() def hourly( domain: Optional[str] = typer.Option( @@ -414,6 +442,12 @@ def global_reports() -> None: _generate_global() +@app.command() +def analysis() -> None: + """Generate analysis JSON files for the Analysis tab.""" + _generate_analysis() + + @app.command() def index() -> None: """Generate the root index page linking all reports.""" diff --git a/templates/index.html b/templates/index.html index 56dfd6f..5b877ef 100644 --- a/templates/index.html +++ b/templates/index.html @@ -74,6 +74,9 @@ Exclude “-” +
+ +
@@ -122,6 +125,7 @@ groupOthers, movingAverage, } from './chartManager.js'; + const STATE_KEY = 'ngxstat-state-v2'; const intervalSelect = document.getElementById('interval-select'); const domainSelect = document.getElementById('domain-select'); const intervalControl = document.getElementById('interval-control'); @@ -131,6 +135,7 @@ const modeGroupControl = document.getElementById('mode-group-control'); const excludeUncachedControl = document.getElementById('exclude-uncached-control'); const smoothControl = document.getElementById('smooth-control'); + const resetButton = document.getElementById('reset-view'); const tabs = document.querySelectorAll('#report-tabs li'); const sections = { recent: document.getElementById('recent-section'), @@ -172,10 +177,11 @@ let modeGroup = true; let excludeUncached = true; let smoothError = false; + let hadExplicitWindow = false; // URL or saved-state provided window function saveState() { try { - localStorage.setItem('ngxstat-state', JSON.stringify({ + localStorage.setItem(STATE_KEY, JSON.stringify({ tab: currentTab, interval: currentInterval, domain: currentDomain, @@ -190,11 +196,11 @@ function loadSavedState() { try { - const s = JSON.parse(localStorage.getItem('ngxstat-state') || '{}'); + const s = JSON.parse(localStorage.getItem(STATE_KEY) || '{}'); if (s.tab) currentTab = s.tab; if (s.interval) currentInterval = s.interval; if (s.domain !== undefined) currentDomain = s.domain; - if (s.window) currentWindow = s.window; + if (s.window) { currentWindow = s.window; hadExplicitWindow = true; } if (s.percent !== undefined) modePercent = !!Number(s.percent); if (s.group !== undefined) modeGroup = !!Number(s.group); if (s.exclude_dash !== undefined) excludeUncached = !!Number(s.exclude_dash); @@ -207,7 +213,7 @@ if (params.get('tab')) currentTab = params.get('tab'); if (params.get('interval')) currentInterval = params.get('interval'); if (params.get('domain') !== null) currentDomain = params.get('domain') || ''; - if (params.get('window')) currentWindow = params.get('window'); + if (params.get('window')) { currentWindow = params.get('window'); hadExplicitWindow = true; } if (params.get('percent') !== null) modePercent = params.get('percent') === '1'; if (params.get('group') !== null) modeGroup = params.get('group') === '1'; if (params.get('exclude_dash') !== null) excludeUncached = params.get('exclude_dash') === '1'; @@ -273,8 +279,13 @@ } // Windowing for time series if (isTimeSeries) { - const n = bucketsForWindow(currentWindow, currentInterval); - transformed = sliceWindow(transformed, n); + // Only apply windowing if report supports current window (if constrained) + const supported = Array.isArray(rep.windows_supported) ? rep.windows_supported : null; + const canWindow = !supported || supported.includes(currentWindow); + if (canWindow) { + const n = bucketsForWindow(currentWindow, currentInterval); + transformed = sliceWindow(transformed, n); + } } // Distributions: percent + group small const isDistribution = ['pie', 'polarArea', 'doughnut', 'donut'].includes(rep.chart); @@ -306,7 +317,7 @@ options.scales.y.stacked = true; // Build multiple series from columns (exclude bucket & total) const keys = transformed.length ? Object.keys(transformed[0]).filter(k => k !== bucketField && k !== 'total') : []; - const palette = rep.colors || [ + const palette = rep.colors || rep.palette || [ '#3273dc', '#23d160', '#ffdd57', '#ff3860', '#7957d5', '#363636' ]; datasets = keys.map((k, i) => ({ @@ -327,6 +338,9 @@ if (rep.colors) { dataset.backgroundColor = rep.colors; dataset.borderColor = rep.colors; + } else if (rep.palette) { + dataset.backgroundColor = rep.palette; + dataset.borderColor = rep.palette; } else if (rep.color) { dataset.backgroundColor = rep.color; dataset.borderColor = rep.color; @@ -392,6 +406,15 @@ if (currentTab === 'tables') return rep.chart === 'table'; return true; }); + // If no explicit window was given (URL or saved state), honor first report's default + if (!hadExplicitWindow) { + const withDefault = filtered.find(r => r.window_default); + if (withDefault && typeof withDefault.window_default === 'string') { + currentWindow = withDefault.window_default; + windowSelect.value = currentWindow; + updateURL(); + } + } filtered.forEach(rep => { fetch(path + '/' + rep.html, { signal: token.controller.signal }) .then(r => r.text()) @@ -499,10 +522,12 @@ intervalControl.classList.toggle('is-hidden', !showInterval); domainControl.classList.toggle('is-hidden', !showDomain); windowControl.classList.toggle('is-hidden', !showInterval); - modePercentControl.classList.toggle('is-hidden', !showInterval); - modeGroupControl.classList.toggle('is-hidden', !showInterval); - excludeUncachedControl.classList.toggle('is-hidden', !showInterval); - smoothControl.classList.toggle('is-hidden', !showInterval); + // Only show percent/group/exclude toggles on Distribution tab, + // and smoothing only on Trends tab + modePercentControl.classList.toggle('is-hidden', name !== 'distribution'); + modeGroupControl.classList.toggle('is-hidden', name !== 'distribution'); + excludeUncachedControl.classList.toggle('is-hidden', name !== 'distribution'); + smoothControl.classList.toggle('is-hidden', name !== 'trends'); updateURL(); if (name === 'recent') { loadStats(); @@ -570,6 +595,23 @@ switchTab(tab.dataset.tab); }); }); + resetButton.addEventListener('click', () => { + try { + localStorage.removeItem('ngxstat-state'); // clear legacy + localStorage.removeItem(STATE_KEY); + } catch {} + // Reset to hard defaults + currentTab = 'recent'; + currentInterval = intervalSelect.value = intervalSelect.options[0]?.value || currentInterval; + currentDomain = domainSelect.value = ''; + currentWindow = windowSelect.value = '7d'; + modePercent = percentToggle.checked = false; + modeGroup = groupToggle.checked = true; + excludeUncached = excludeUncachedToggle.checked = true; + smoothError = smoothToggle.checked = false; + hadExplicitWindow = false; + switchTab(currentTab); + }); // Initialize state (URL -> localStorage -> defaults) loadSavedState(); applyURLParams(); diff --git a/tests/test_reports.py b/tests/test_reports.py index f6c6918..60a6df6 100644 --- a/tests/test_reports.py +++ b/tests/test_reports.py @@ -323,3 +323,93 @@ def test_multi_bucket_table(tmp_path, monkeypatch): entry = next(r for r in reports if r["name"] == "multi") assert entry["buckets"] == ["domain", "agent"] assert entry["bucket_label"] == ["Domain", "Agent"] + + +def test_top_n_limit_applied(tmp_path, monkeypatch): + # Prepare DB with many distinct agents + db_path = tmp_path / "database" / "ngxstat.db" + setup_db(db_path) + conn = sqlite3.connect(db_path) + cur = conn.cursor() + for i in range(10): + cur.execute( + "INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", + ( + "127.0.0.1", + "example.com", + f"2024-01-01 11:{i:02d}:00", + "GET /x HTTP/1.1", + 200, + 100, + "-", + f"ua-{i}", + "MISS", + ), + ) + conn.commit() + conn.close() + + cfg = tmp_path / "reports.yml" + cfg.write_text( + """ +- name: agents + chart: table + global: true + top_n: 3 + query: | + SELECT user_agent AS agent, COUNT(*) AS value + FROM logs + GROUP BY user_agent + ORDER BY value DESC +""" + ) + + monkeypatch.setattr(gr, "DB_PATH", db_path) + monkeypatch.setattr(gr, "OUTPUT_DIR", tmp_path / "output") + monkeypatch.setattr(gr, "REPORT_CONFIG", cfg) + monkeypatch.setattr( + gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates" + ) + + gr._generate_global() + + data = json.loads((tmp_path / "output" / "global" / "agents.json").read_text()) + # Should be limited to 3 rows + assert len(data) <= 3 + + +def test_metadata_passthrough(tmp_path, monkeypatch): + db_path = tmp_path / "database" / "ngxstat.db" + setup_db(db_path) + + cfg = tmp_path / "reports.yml" + cfg.write_text( + """ +- name: custom_ts + label: Custom TS + chart: line + window_default: 24h + windows_supported: [1h, 24h, 7d] + palette: ["#111111", "#222222"] + query: | + SELECT {bucket} AS time_bucket, COUNT(*) AS value + FROM logs + GROUP BY time_bucket + ORDER BY time_bucket +""" + ) + + monkeypatch.setattr(gr, "DB_PATH", db_path) + monkeypatch.setattr(gr, "OUTPUT_DIR", tmp_path / "output") + monkeypatch.setattr(gr, "REPORT_CONFIG", cfg) + monkeypatch.setattr( + gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates" + ) + + gr._generate_interval("hourly") + + reports = json.loads((tmp_path / "output" / "hourly" / "reports.json").read_text()) + entry = next(r for r in reports if r["name"] == "custom_ts") + assert entry["window_default"] == "24h" + assert entry["windows_supported"] == ["1h", "24h", "7d"] + assert entry["palette"] == ["#111111", "#222222"] From 95e54359d7cc0289655161d0f271b533f9dcf7a1 Mon Sep 17 00:00:00 2001 From: ngxstat-bot Date: Tue, 19 Aug 2025 00:09:49 -0500 Subject: [PATCH 21/26] UX: unify time selection and simplify controls\n\n- Replace separate Interval + Window with a single Time preset (Last hour/24h/7d/30d/12w/12m/All time)\n- Map presets to sensible grouping (hourly/daily/weekly/monthly) based on available intervals\n- Keep backward compatibility: preserve existing URL/state params; keep legacy controls hidden\n- Add client support for new windows (12w, 12m) in time-bucket slicing\n- Show only relevant controls per tab (Trends: smoothing; Distribution: percent/group/exclude)\n- Streamline reset flow to a sane default (Last 7 days) --- templates/index.html | 133 +++++++++++++++++++++++++++++++++---------- 1 file changed, 102 insertions(+), 31 deletions(-) diff --git a/templates/index.html b/templates/index.html index 5b877ef..7e16527 100644 --- a/templates/index.html +++ b/templates/index.html @@ -21,6 +21,7 @@
+ @@ -64,17 +63,17 @@
@@ -99,12 +98,17 @@ - - - @@ -157,6 +159,7 @@ trends: document.getElementById('reports-trends'), breakdown: document.getElementById('reports-breakdown') }; + const recentRow = document.getElementById('recent-row'); const analysisElems = { missing: document.getElementById('analysis-missing'), cache: document.getElementById('analysis-cache'), @@ -453,6 +456,11 @@ path = currentDomain ? ('domains/' + encodeURIComponent(currentDomain) + '/' + currentInterval) : currentInterval; } + // Clear the top row on each load of Recent + if (currentTab === 'recent' && recentRow) { + recentRow.innerHTML = ''; + } + const token = newLoad(container); fetch(path + '/reports.json', { signal: token.controller.signal }) @@ -480,7 +488,15 @@ .then(r => r.text()) .then(html => { if (token !== currentLoad) return; - container.insertAdjacentHTML('beforeend', html); + // On Recent tab, render Cache Status and HTTP Statuses side-by-side + const inTopRow = currentTab === 'recent' && + (rep.name === 'cache_status_breakdown' || rep.name === 'status_distribution'); + if (inTopRow && recentRow) { + const wrapped = `
${html}
`; + recentRow.insertAdjacentHTML('beforeend', wrapped); + } else { + container.insertAdjacentHTML('beforeend', html); + } initReport(token, rep, path); }); }); From 371719799192a775c91c5be1c71a40a50dcbb243 Mon Sep 17 00:00:00 2001 From: ngxstat-bot Date: Tue, 19 Aug 2025 00:51:10 -0500 Subject: [PATCH 26/26] analysis: make suggest_cache and detect_threats pure-callable, add CLI wrappers\n\n- Replace Typer Option defaults with plain Python defaults in functions used by generator/tests\n- Add CLI wrapper commands (, ) that delegate to the pure functions\n- Cast params to int for SQL/timedelta to avoid type issues\n- Resolves OptionInfo errors during run-reports analysis phase --- scripts/analyze.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/scripts/analyze.py b/scripts/analyze.py index 7c4c141..9f49978 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -155,10 +155,9 @@ def check_missing_domains( typer.echo(d) -@app.command("suggest-cache") def suggest_cache( - threshold: int = typer.Option(10, help="Minimum number of MISS entries to report"), - json_output: bool = typer.Option(False, "--json", help="Output results as JSON"), + threshold: int = 10, + json_output: bool = False, ) -> None: """Suggest domain/path pairs that could benefit from caching. @@ -191,7 +190,7 @@ def suggest_cache( HAVING miss_count >= ? ORDER BY miss_count DESC """, - (threshold,), + (int(threshold),), ) rows = [r for r in cur.fetchall() if r[0] in no_cache] @@ -211,11 +210,18 @@ def suggest_cache( for item in result: typer.echo(f"{item['host']} {item['path']} {item['misses']}") +@app.command("suggest-cache") +def suggest_cache_cli( + threshold: int = typer.Option(10, help="Minimum number of MISS entries to report"), + json_output: bool = typer.Option(False, "--json", help="Output results as JSON"), +) -> None: + """CLI wrapper for suggest_cache.""" + suggest_cache(threshold=threshold, json_output=json_output) + -@app.command("detect-threats") def detect_threats( - hours: int = typer.Option(1, help="Number of recent hours to analyze"), - ip_threshold: int = typer.Option(100, help="Requests from a single IP to flag"), + hours: int = 1, + ip_threshold: int = 100, ) -> None: """Detect potential security threats from recent logs.""" @@ -231,8 +237,8 @@ def detect_threats( max_dt = datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S") recent_end = max_dt - recent_start = recent_end - timedelta(hours=hours) - prev_start = recent_start - timedelta(hours=hours) + recent_start = recent_end - timedelta(hours=int(hours)) + prev_start = recent_start - timedelta(hours=int(hours)) prev_end = recent_start fmt = "%Y-%m-%d %H:%M:%S" @@ -339,6 +345,14 @@ def detect_threats( out_path.write_text(json.dumps(report, indent=2)) typer.echo(json.dumps(report)) +@app.command("detect-threats") +def detect_threats_cli( + hours: int = typer.Option(1, help="Number of recent hours to analyze"), + ip_threshold: int = typer.Option(100, help="Requests from a single IP to flag"), +) -> None: + """CLI wrapper for detect_threats.""" + detect_threats(hours=hours, ip_threshold=ip_threshold) + if __name__ == "__main__": app()