Compare commits

..

No commits in common. "main" and "codex/retool-reports-for-better-visualization" have entirely different histories.

17 changed files with 268 additions and 1278 deletions

View file

@ -1,3 +0,0 @@
[flake8]
exclude = .git, .venv, output, static/icons
max-line-length = 160

View file

@ -1,151 +0,0 @@
name: CI
on:
push:
pull_request:
workflow_dispatch:
jobs:
ci:
name: Lint, test, and build
# This label must match your Forgejo runner's label
runs-on: docker
# Use a clean Debian container so tools are predictable
container: debian:stable-slim
env:
PYTHONDONTWRITEBYTECODE: "1"
PIP_DISABLE_PIP_VERSION_CHECK: "1"
UV_SYSTEM_PYTHON: "1"
steps:
- name: Install build tooling
run: |
set -euo pipefail
apt-get update
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
git ca-certificates python3 python3-venv python3-pip python3-setuptools \
python3-wheel sqlite3
update-ca-certificates || true
- name: Checkout repository (manual)
run: |
set -euo pipefail
if [ -f Makefile ] || [ -d .git ]; then
echo "Repository present in workspace; skipping clone"
exit 0
fi
REMOTE_URL="${CI_REPOSITORY_URL:-}"
if [ -z "$REMOTE_URL" ]; then
if [ -n "${GITHUB_SERVER_URL:-}" ] && [ -n "${GITHUB_REPOSITORY:-}" ]; then
REMOTE_URL="${GITHUB_SERVER_URL%/}/${GITHUB_REPOSITORY}.git"
elif [ -n "${GITHUB_REPOSITORY:-}" ]; then
REMOTE_URL="https://git.jordanwages.com/${GITHUB_REPOSITORY}.git"
else
echo "Unable to determine repository URL from CI environment" >&2
exit 1
fi
fi
AUTH_URL="$REMOTE_URL"
if [ -n "${GITHUB_TOKEN:-}" ]; then
ACTOR="${GITHUB_ACTOR:-oauth2}"
AUTH_URL=$(printf '%s' "$REMOTE_URL" | sed -E "s#^https://#https://${ACTOR}:${GITHUB_TOKEN}@#")
fi
echo "Cloning from: $REMOTE_URL"
if ! git clone --depth 1 "$AUTH_URL" .; then
echo "Auth clone failed; trying anonymous clone..." >&2
git clone --depth 1 "$REMOTE_URL" .
fi
if [ -n "${GITHUB_SHA:-}" ]; then
git fetch --depth 1 origin "$GITHUB_SHA" || true
git checkout -q "$GITHUB_SHA" || true
elif [ -n "${GITHUB_REF_NAME:-}" ]; then
git fetch --depth 1 origin "$GITHUB_REF_NAME" || true
git checkout -q "$GITHUB_REF_NAME" || true
fi
- name: Set up venv and install deps
run: |
set -euo pipefail
# Prefer persistent cache if runner provides /cache
USE_CACHE=0
if [ -d /cache ] && [ -w /cache ]; then
export PIP_CACHE_DIR=/cache/pip
mkdir -p "$PIP_CACHE_DIR"
REQ_HASH=$(sha256sum requirements.txt | awk '{print $1}')
PYVER=$(python3 -c 'import sys;print(".".join(map(str, sys.version_info[:2])))')
CACHE_VENV="/cache/venv-${REQ_HASH}-py${PYVER}"
if [ ! -f "$CACHE_VENV/bin/activate" ]; then
echo "Preparing cached virtualenv: $CACHE_VENV"
rm -rf "$CACHE_VENV" || true
python3 -m venv "$CACHE_VENV"
fi
ln -sfn "$CACHE_VENV" .venv
USE_CACHE=1
else
# Fallback to local venv
python3 -m venv .venv
fi
# If the link didn't produce an activate file, fallback to local venv
if [ ! -f .venv/bin/activate ]; then
echo "Cached venv missing; creating local .venv"
rm -f .venv
python3 -m venv .venv
USE_CACHE=0
fi
. .venv/bin/activate
python -m pip install --upgrade pip
if [ "$USE_CACHE" = "1" ]; then
# Ensure required packages are present; pip will use cache
pip install -r requirements.txt pytest || pip install -r requirements.txt pytest
else
pip install -r requirements.txt pytest
fi
- name: Format check (black)
run: |
. .venv/bin/activate
black --check .
- name: Lint (flake8)
run: |
. .venv/bin/activate
flake8 .
- name: Run tests (pytest)
run: |
. .venv/bin/activate
export PYTHONPATH="$(pwd)${PYTHONPATH:+:$PYTHONPATH}"
pytest -q --maxfail=1
- name: Build sample reports (no artifact upload)
run: |
set -euo pipefail
. .venv/bin/activate
python - <<'PY'
import sqlite3, pathlib
db = pathlib.Path('database/ngxstat.db')
db.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(db)
cur = conn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS logs (
id INTEGER PRIMARY KEY,
ip TEXT,
host TEXT,
time TEXT,
request TEXT,
status INTEGER,
bytes_sent INTEGER,
referer TEXT,
user_agent TEXT,
cache_status TEXT
)''')
cur.execute("INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES ('127.0.0.1','example.com','2024-01-01 10:00:00','GET / HTTP/1.1',200,100,'-','curl','MISS')")
cur.execute("INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES ('127.0.0.1','example.com','2024-01-01 10:05:00','GET /about HTTP/1.1',200,100,'-','curl','MISS')")
conn.commit(); conn.close()
PY
python scripts/generate_reports.py global
python scripts/generate_reports.py hourly
python scripts/generate_reports.py index
tar -czf ngxstat-reports.tar.gz -C output .
echo "Built sample reports archive: ngxstat-reports.tar.gz"

View file

@ -24,9 +24,6 @@ This document outlines general practices and expectations for AI agents assistin
The `run-import.sh` script can initialize this environment automatically. The `run-import.sh` script can initialize this environment automatically.
Always activate the virtual environment before running scripts or tests. Always activate the virtual environment before running scripts or tests.
* Before committing code run `black` for consistent formatting and execute
the test suite with `pytest`. All tests should pass.
* Dependency management: Use `requirements.txt` or `pip-tools` * Dependency management: Use `requirements.txt` or `pip-tools`
* Use standard libraries where feasible (e.g., `sqlite3`, `argparse`, `datetime`) * Use standard libraries where feasible (e.g., `sqlite3`, `argparse`, `datetime`)
* Adopt `typer` for CLI command interface (if CLI ergonomics matter) * Adopt `typer` for CLI command interface (if CLI ergonomics matter)
@ -92,14 +89,6 @@ ngxstat/
If uncertain, the agent should prompt the human for clarification before making architectural assumptions. If uncertain, the agent should prompt the human for clarification before making architectural assumptions.
## Testing
Use `pytest` for automated tests. Run the suite from an activated virtual environment and ensure all tests pass before committing:
```bash
pytest -q
```
--- ---
## Future Capabilities ## Future Capabilities
@ -117,4 +106,3 @@ As the project matures, agents may also:
* **2025-07-17**: Initial version by Jordan + ChatGPT * **2025-07-17**: Initial version by Jordan + ChatGPT
* **2025-07-17**: Expanded virtual environment usage guidance * **2025-07-17**: Expanded virtual environment usage guidance

148
README.md
View file

@ -1,16 +1,11 @@
# ngxstat # ngxstat
Per-domain Nginx log analytics with hybrid static reports and live insights.
`ngxstat` is a lightweight log analytics toolkit for Nginx. It imports access ## Generating Reports
logs into an SQLite database and renders static dashboards so you can explore
per-domain metrics without running a heavy backend service.
## Requirements Use the `generate_reports.py` script to build aggregated JSON and HTML snippet files from `database/ngxstat.db`.
* Python 3.10+ Create a virtual environment and install dependencies:
* Access to the Nginx log files (default: `/var/log/nginx`)
The helper scripts create a virtual environment on first run, but you can also
set one up manually:
```bash ```bash
python3 -m venv .venv python3 -m venv .venv
@ -18,95 +13,118 @@ source .venv/bin/activate
pip install -r requirements.txt pip install -r requirements.txt
``` ```
Then run one or more of the interval commands:
```bash
python scripts/generate_reports.py hourly
python scripts/generate_reports.py daily
python scripts/generate_reports.py weekly
python scripts/generate_reports.py monthly
```
Each command accepts optional flags to generate per-domain reports. Use
`--domain <name>` to limit output to a specific domain or `--all-domains`
to generate a subdirectory for every domain found in the database:
```bash
# Hourly reports for example.com only
python scripts/generate_reports.py hourly --domain example.com
# Weekly reports for all domains individually
python scripts/generate_reports.py weekly --all-domains
```
Reports are written under the `output/` directory. Each command updates the corresponding `<interval>.json` file and writes one HTML snippet per report. These snippets are loaded dynamically by the main dashboard using Chart.js and DataTables.
### Configuring Reports
Report queries are defined in `reports.yml`. Each entry specifies the `name`,
optional `label` and `chart` type, and a SQL `query` that must return `bucket`
and `value` columns. The special token `{bucket}` is replaced with the
appropriate SQLite `strftime` expression for each interval (hourly, daily,
weekly or monthly) so that a single definition works across all durations.
When `generate_reports.py` runs, every definition is executed for the requested
interval and creates `output/<interval>/<name>.json` plus a small HTML snippet
`output/<interval>/<name>.html` used by the dashboard.
Example snippet:
```yaml
- name: hits
chart: bar
query: |
SELECT {bucket} AS bucket,
COUNT(*) AS value
FROM logs
GROUP BY bucket
ORDER BY bucket
```
Add or modify entries in `reports.yml` to tailor the generated metrics.
## Importing Logs ## Importing Logs
Run the importer to ingest new log entries into `database/ngxstat.db`: Use the `run-import.sh` script to set up the Python environment if needed and import the latest Nginx log entries into `database/ngxstat.db`.
```bash ```bash
./run-import.sh ./run-import.sh
``` ```
Rotated logs are processed in order and only entries newer than the last This script is suitable for cron jobs as it creates the virtual environment on first run, installs dependencies and reuses the environment on subsequent runs.
imported timestamp are added.
## Generating Reports The importer handles rotated logs in order from oldest to newest so entries are
processed exactly once. If you rerun the script, it only ingests records with a
timestamp newer than the latest one already stored in the database, preventing
duplicates.
To build the HTML dashboard and JSON data files use `run-reports.sh` which runs ## Cron Report Generation
all intervals in one go:
Use the `run-reports.sh` script to run all report intervals in one step. The script sets up the Python environment the same way as `run-import.sh`, making it convenient for automation via cron.
```bash ```bash
./run-reports.sh ./run-reports.sh
``` ```
The script calls `scripts/generate_reports.py` internally to create hourly, Running this script will create or update the hourly, daily, weekly and monthly reports under `output/`. It also detects all unique domains found in the database and writes per-domain reports to `output/domains/<domain>/<interval>` alongside the aggregate data. After generation, open `output/index.html` in your browser to browse the reports.
daily, weekly and monthly reports, then writes analysis JSON files used by the
"Analysis" tab. Per-domain reports are written under `output/domains/<domain>`
alongside the aggregate data. Open `output/index.html` in a browser to view the
dashboard.
If you prefer to run individual commands you can invoke the generator directly:
```bash ## Log Analysis
python scripts/generate_reports.py hourly
python scripts/generate_reports.py daily --all-domains
```
## Analysis Helpers The `run-analysis.sh` script runs helper routines that inspect the database. It
creates or reuses the virtual environment and then executes a set of analysis
`run-analysis.sh` executes additional utilities that examine the database for commands to spot missing domains, suggest cache rules and detect potential
missing domains, caching opportunities and potential threats. The JSON output is threats.
saved under `output/analysis` and appears in the "Analysis" tab. The
`run-reports.sh` script also generates these JSON files as part of the build.
## UX Controls
The dashboard defaults to a 7day window for time series. Your view preferences
persist locally in the browser under the `ngxstat-state-v2` key. Use the
"Reset view" button to clear saved state and restore defaults.
```bash ```bash
./run-analysis.sh ./run-analysis.sh
``` ```
The JSON results are written under `output/analysis` and can be viewed from the
"Analysis" tab in the generated dashboard.
## Serving Reports with Nginx
## Serving the Reports To expose the generated HTML dashboards and JSON files over HTTP you can use a
simple Nginx server block. Point the `root` directive to the repository's
The generated files are static. You can serve them with a simple Nginx block: `output/` directory and optionally restrict access to your local network.
```nginx ```nginx
server { server {
listen 80; listen 80;
server_name example.com; server_name example.com;
# Path to the generated reports
root /path/to/ngxstat/output; root /path/to/ngxstat/output;
location / { location / {
try_files $uri $uri/ =404; try_files $uri $uri/ =404;
} }
# Allow access only from private networks
allow 192.0.0.0/8;
allow 10.0.0.0/8;
deny all;
} }
``` ```
Restrict access if the reports should not be public. With this configuration the generated static files are served directly by
Nginx while connections outside of `192.*` and `10.*` are denied.
## Running Tests
Install the development dependencies and execute the suite with `pytest`:
```bash
pip install -r requirements.txt
pytest -q
```
All tests must pass before submitting changes.
## Acknowledgements
ngxstat uses the following thirdparty resources:
* [Chart.js](https://www.chartjs.org/) for charts
* [DataTables](https://datatables.net/) and [jQuery](https://jquery.com/) for table views
* [Bulma CSS](https://bulma.io/) for styling
* Icons from [Free CC0 Icons](https://cc0-icons.jonh.eu/) by Jon Hicks (CC0 / MIT)
* [Typer](https://typer.tiangolo.com/) for the command-line interface
* [Jinja2](https://palletsprojects.com/p/jinja/) for templating
The project is licensed under the GPLv3. Icon assets remain in the public domain
via the CC0 license.

View file

@ -48,7 +48,6 @@
label: Top Domains label: Top Domains
icon: globe icon: globe
chart: table chart: table
top_n: 50
per_domain: false per_domain: false
bucket: domain bucket: domain
bucket_label: Domain bucket_label: Domain
@ -76,81 +75,47 @@
label: Top Paths label: Top Paths
icon: map icon: map
chart: table chart: table
top_n: 50 bucket: path
buckets: bucket_label: Path
- domain
- path
bucket_label:
- Domain
- Path
query: | query: |
WITH paths AS ( SELECT path AS path,
SELECT host AS domain, COUNT(*) AS value
substr(substr(request, instr(request, ' ') + 1), 1, FROM (
SELECT substr(substr(request, instr(request, ' ') + 1), 1,
instr(substr(request, instr(request, ' ') + 1), ' ') - 1) AS path instr(substr(request, instr(request, ' ') + 1), ' ') - 1) AS path
FROM logs FROM logs
), ranked AS (
SELECT domain, path, COUNT(*) AS value,
ROW_NUMBER() OVER (PARTITION BY domain ORDER BY COUNT(*) DESC) AS rn
FROM paths
GROUP BY domain, path
) )
SELECT domain, path, value GROUP BY path
FROM ranked ORDER BY value DESC
WHERE rn <= 20 LIMIT 20
ORDER BY domain, value DESC
- name: user_agents - name: user_agents
label: User Agents label: User Agents
icon: user icon: user
chart: table chart: table
top_n: 50 bucket: user_agent
buckets: bucket_label: User Agent
- domain
- user_agent
bucket_label:
- Domain
- User Agent
query: | query: |
WITH ua AS ( SELECT user_agent AS user_agent,
SELECT host AS domain, user_agent COUNT(*) AS value
FROM logs FROM logs
), ranked AS ( GROUP BY user_agent
SELECT domain, user_agent, COUNT(*) AS value, ORDER BY value DESC
ROW_NUMBER() OVER (PARTITION BY domain ORDER BY COUNT(*) DESC) AS rn LIMIT 20
FROM ua
GROUP BY domain, user_agent
)
SELECT domain, user_agent, value
FROM ranked
WHERE rn <= 20
ORDER BY domain, value DESC
- name: referrers - name: referrers
label: Referrers label: Referrers
icon: link icon: link
chart: table chart: table
top_n: 50 bucket: referrer
buckets: bucket_label: Referrer
- domain
- referrer
bucket_label:
- Domain
- Referrer
query: | query: |
WITH ref AS ( SELECT referer AS referrer,
SELECT host AS domain, referer AS referrer COUNT(*) AS value
FROM logs FROM logs
), ranked AS ( GROUP BY referrer
SELECT domain, referrer, COUNT(*) AS value, ORDER BY value DESC
ROW_NUMBER() OVER (PARTITION BY domain ORDER BY COUNT(*) DESC) AS rn LIMIT 20
FROM ref
GROUP BY domain, referrer
)
SELECT domain, referrer, value
FROM ranked
WHERE rn <= 20
ORDER BY domain, value DESC
- name: status_distribution - name: status_distribution
label: HTTP Statuses label: HTTP Statuses
@ -174,40 +139,3 @@
- "#209cee" - "#209cee"
- "#ffdd57" - "#ffdd57"
- "#f14668" - "#f14668"
# New time-series: status classes over time (stacked)
- name: status_classes_timeseries
label: Status Classes Over Time
icon: server
chart: stackedBar
bucket: time_bucket
bucket_label: Time
stacked: true
query: |
SELECT {bucket} AS time_bucket,
SUM(CASE WHEN status BETWEEN 200 AND 299 THEN 1 ELSE 0 END) AS "2xx",
SUM(CASE WHEN status BETWEEN 300 AND 399 THEN 1 ELSE 0 END) AS "3xx",
SUM(CASE WHEN status BETWEEN 400 AND 499 THEN 1 ELSE 0 END) AS "4xx",
SUM(CASE WHEN status BETWEEN 500 AND 599 THEN 1 ELSE 0 END) AS "5xx",
COUNT(*) AS total
FROM logs
GROUP BY time_bucket
ORDER BY time_bucket
# New time-series: cache status over time (compact Hit/Miss; exclude '-' by default)
- name: cache_status_timeseries
label: Cache Status Over Time
icon: archive
chart: stackedBar
bucket: time_bucket
bucket_label: Time
stacked: true
exclude_values: ["-"]
query: |
SELECT {bucket} AS time_bucket,
SUM(CASE WHEN cache_status = 'HIT' THEN 1 ELSE 0 END) AS hit,
SUM(CASE WHEN cache_status = 'MISS' THEN 1 ELSE 0 END) AS miss,
COUNT(*) AS total
FROM logs
GROUP BY time_bucket
ORDER BY time_bucket

View file

@ -29,25 +29,21 @@ fi
# Generate reports for all domains combined # Generate reports for all domains combined
echo "[INFO] Generating aggregate reports..." echo "[INFO] Generating aggregate reports..."
python -m scripts.generate_reports hourly python scripts/generate_reports.py hourly
python -m scripts.generate_reports daily python scripts/generate_reports.py daily
python -m scripts.generate_reports weekly python scripts/generate_reports.py weekly
python -m scripts.generate_reports monthly python scripts/generate_reports.py monthly
python -m scripts.generate_reports global python scripts/generate_reports.py global
# Generate reports for each individual domain # Generate reports for each individual domain
echo "[INFO] Generating per-domain reports..." echo "[INFO] Generating per-domain reports..."
python -m scripts.generate_reports hourly --all-domains python scripts/generate_reports.py hourly --all-domains
python -m scripts.generate_reports daily --all-domains python scripts/generate_reports.py daily --all-domains
python -m scripts.generate_reports weekly --all-domains python scripts/generate_reports.py weekly --all-domains
python -m scripts.generate_reports monthly --all-domains python scripts/generate_reports.py monthly --all-domains
# Generate analysis JSON
echo "[INFO] Generating analysis files..."
python -m scripts.generate_reports analysis
# Generate root index # Generate root index
python -m scripts.generate_reports index python scripts/generate_reports.py index
# Deactivate to keep cron environment clean # Deactivate to keep cron environment clean
if type deactivate >/dev/null 2>&1; then if type deactivate >/dev/null 2>&1; then

View file

@ -18,7 +18,7 @@ from __future__ import annotations
import sqlite3 import sqlite3
from pathlib import Path from pathlib import Path
from typing import List, Optional, Set from typing import Dict, List, Optional, Set
from datetime import datetime, timedelta from datetime import datetime, timedelta
import json import json
@ -105,9 +105,7 @@ def hits(domain: Optional[str] = typer.Option(None, help="Filter by domain")) ->
@app.command("cache-ratio") @app.command("cache-ratio")
def cache_ratio_cmd( def cache_ratio_cmd(domain: Optional[str] = typer.Option(None, help="Filter by domain")) -> None:
domain: Optional[str] = typer.Option(None, help="Filter by domain")
) -> None:
"""Display cache hit ratio as a percentage.""" """Display cache hit ratio as a percentage."""
ratio = get_cache_ratio(domain) * 100 ratio = get_cache_ratio(domain) * 100
if domain: if domain:
@ -117,11 +115,7 @@ def cache_ratio_cmd(
@app.command("check-missing-domains") @app.command("check-missing-domains")
def check_missing_domains( def check_missing_domains(json_output: bool = typer.Option(False, "--json", help="Output missing domains as JSON")) -> None:
json_output: bool = typer.Option(
False, "--json", help="Output missing domains as JSON"
)
) -> None:
"""Show domains present in the database but absent from Nginx config.""" """Show domains present in the database but absent from Nginx config."""
try: try:
from scripts.generate_reports import _get_domains as _db_domains from scripts.generate_reports import _get_domains as _db_domains
@ -155,9 +149,12 @@ def check_missing_domains(
typer.echo(d) typer.echo(d)
@app.command("suggest-cache")
def suggest_cache( def suggest_cache(
threshold: int = 10, threshold: int = typer.Option(
json_output: bool = False, 10, help="Minimum number of MISS entries to report"
),
json_output: bool = typer.Option(False, "--json", help="Output results as JSON"),
) -> None: ) -> None:
"""Suggest domain/path pairs that could benefit from caching. """Suggest domain/path pairs that could benefit from caching.
@ -190,7 +187,7 @@ def suggest_cache(
HAVING miss_count >= ? HAVING miss_count >= ?
ORDER BY miss_count DESC ORDER BY miss_count DESC
""", """,
(int(threshold),), (threshold,),
) )
rows = [r for r in cur.fetchall() if r[0] in no_cache] rows = [r for r in cur.fetchall() if r[0] in no_cache]
@ -210,18 +207,13 @@ def suggest_cache(
for item in result: for item in result:
typer.echo(f"{item['host']} {item['path']} {item['misses']}") typer.echo(f"{item['host']} {item['path']} {item['misses']}")
@app.command("suggest-cache")
def suggest_cache_cli(
threshold: int = typer.Option(10, help="Minimum number of MISS entries to report"),
json_output: bool = typer.Option(False, "--json", help="Output results as JSON"),
) -> None:
"""CLI wrapper for suggest_cache."""
suggest_cache(threshold=threshold, json_output=json_output)
@app.command("detect-threats")
def detect_threats( def detect_threats(
hours: int = 1, hours: int = typer.Option(1, help="Number of recent hours to analyze"),
ip_threshold: int = 100, ip_threshold: int = typer.Option(
100, help="Requests from a single IP to flag"
),
) -> None: ) -> None:
"""Detect potential security threats from recent logs.""" """Detect potential security threats from recent logs."""
@ -237,8 +229,8 @@ def detect_threats(
max_dt = datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S") max_dt = datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S")
recent_end = max_dt recent_end = max_dt
recent_start = recent_end - timedelta(hours=int(hours)) recent_start = recent_end - timedelta(hours=hours)
prev_start = recent_start - timedelta(hours=int(hours)) prev_start = recent_start - timedelta(hours=hours)
prev_end = recent_start prev_end = recent_start
fmt = "%Y-%m-%d %H:%M:%S" fmt = "%Y-%m-%d %H:%M:%S"
@ -324,7 +316,9 @@ def detect_threats(
""", """,
(recent_start_s, recent_end_s, ip_threshold), (recent_start_s, recent_end_s, ip_threshold),
) )
high_ip_requests = [{"ip": ip, "requests": cnt} for ip, cnt in cur.fetchall()] high_ip_requests = [
{"ip": ip, "requests": cnt} for ip, cnt in cur.fetchall()
]
conn.close() conn.close()
@ -345,14 +339,6 @@ def detect_threats(
out_path.write_text(json.dumps(report, indent=2)) out_path.write_text(json.dumps(report, indent=2))
typer.echo(json.dumps(report)) typer.echo(json.dumps(report))
@app.command("detect-threats")
def detect_threats_cli(
hours: int = typer.Option(1, help="Number of recent hours to analyze"),
ip_threshold: int = typer.Option(100, help="Requests from a single IP to flag"),
) -> None:
"""CLI wrapper for detect_threats."""
detect_threats(hours=hours, ip_threshold=ip_threshold)
if __name__ == "__main__": if __name__ == "__main__":
app() app()

View file

@ -1,27 +1,18 @@
import json import json
import sys
import sqlite3 import sqlite3
from pathlib import Path from pathlib import Path
import shutil import shutil
from typing import List, Dict, Optional from typing import List, Dict, Optional
from datetime import datetime, timezone
import time
import yaml import yaml
import typer import typer
from jinja2 import Environment, FileSystemLoader from jinja2 import Environment, FileSystemLoader
# Ensure project root is importable when running as a script (python scripts/generate_reports.py)
PROJECT_ROOT = Path(__file__).resolve().parent.parent
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
DB_PATH = Path("database/ngxstat.db") DB_PATH = Path("database/ngxstat.db")
OUTPUT_DIR = Path("output") OUTPUT_DIR = Path("output")
TEMPLATE_DIR = Path("templates") TEMPLATE_DIR = Path("templates")
REPORT_CONFIG = Path("reports.yml") REPORT_CONFIG = Path("reports.yml")
GENERATED_MARKER = OUTPUT_DIR / "generated.txt"
# Mapping of interval names to SQLite strftime formats. These strings are # Mapping of interval names to SQLite strftime formats. These strings are
# substituted into report queries whenever the special ``{bucket}`` token is # substituted into report queries whenever the special ``{bucket}`` token is
@ -37,19 +28,6 @@ INTERVAL_FORMATS = {
app = typer.Typer(help="Generate aggregated log reports") app = typer.Typer(help="Generate aggregated log reports")
@app.callback()
def _cli_callback(ctx: typer.Context) -> None:
"""Register post-command hook to note generation time."""
def _write_marker() -> None:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# Use timezone-aware UTC to avoid deprecation warnings and ambiguity
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
GENERATED_MARKER.write_text(f"{timestamp}\n")
ctx.call_on_close(_write_marker)
def _get_domains() -> List[str]: def _get_domains() -> List[str]:
"""Return a sorted list of unique domains from the logs table.""" """Return a sorted list of unique domains from the logs table."""
conn = sqlite3.connect(DB_PATH) conn = sqlite3.connect(DB_PATH)
@ -78,17 +56,14 @@ def _save_json(path: Path, data: List[Dict]) -> None:
def _copy_icons() -> None: def _copy_icons() -> None:
"""Copy vendored icons and scripts to the output directory.""" """Copy vendored icons to the output directory."""
src_dir = Path("static/icons") src_dir = Path("static/icons")
dst_dir = OUTPUT_DIR / "icons" dst_dir = OUTPUT_DIR / "icons"
if src_dir.is_dir(): if not src_dir.is_dir():
dst_dir.mkdir(parents=True, exist_ok=True) return
for icon in src_dir.glob("*.svg"): dst_dir.mkdir(parents=True, exist_ok=True)
shutil.copy(icon, dst_dir / icon.name) for icon in src_dir.glob("*.svg"):
shutil.copy(icon, dst_dir / icon.name)
js_src = Path("static/chartManager.js")
if js_src.is_file():
shutil.copy(js_src, OUTPUT_DIR / js_src.name)
def _render_snippet(report: Dict, out_dir: Path) -> None: def _render_snippet(report: Dict, out_dir: Path) -> None:
@ -99,9 +74,7 @@ def _render_snippet(report: Dict, out_dir: Path) -> None:
snippet_path.write_text(template.render(report=report)) snippet_path.write_text(template.render(report=report))
def _write_stats( def _write_stats() -> None:
generated_at: Optional[str] = None, generation_seconds: Optional[float] = None
) -> None:
"""Query basic dataset stats and write them to ``output/global/stats.json``.""" """Query basic dataset stats and write them to ``output/global/stats.json``."""
conn = sqlite3.connect(DB_PATH) conn = sqlite3.connect(DB_PATH)
cur = conn.cursor() cur = conn.cursor()
@ -125,10 +98,6 @@ def _write_stats(
"end_date": end_date, "end_date": end_date,
"unique_domains": unique_domains, "unique_domains": unique_domains,
} }
if generated_at:
stats["generated_at"] = generated_at
if generation_seconds is not None:
stats["generation_seconds"] = generation_seconds
out_path = OUTPUT_DIR / "global" / "stats.json" out_path = OUTPUT_DIR / "global" / "stats.json"
_save_json(out_path, stats) _save_json(out_path, stats)
@ -185,16 +154,6 @@ def _generate_interval(interval: str, domain: Optional[str] = None) -> None:
name = definition["name"] name = definition["name"]
query = definition["query"].replace("{bucket}", bucket) query = definition["query"].replace("{bucket}", bucket)
query = query.replace("FROM logs", "FROM logs_view") query = query.replace("FROM logs", "FROM logs_view")
# Apply top_n limit for tables (performance-friendly), if configured
top_n = definition.get("top_n")
chart_type = definition.get("chart", "line")
if top_n and chart_type == "table":
try:
n = int(top_n)
if "LIMIT" not in query.upper():
query = f"{query}\nLIMIT {n}"
except Exception:
pass
cur.execute(query) cur.execute(query)
rows = cur.fetchall() rows = cur.fetchall()
headers = [c[0] for c in cur.description] headers = [c[0] for c in cur.description]
@ -212,26 +171,12 @@ def _generate_interval(interval: str, domain: Optional[str] = None) -> None:
entry["icon"] = definition["icon"] entry["icon"] = definition["icon"]
if "bucket" in definition: if "bucket" in definition:
entry["bucket"] = definition["bucket"] entry["bucket"] = definition["bucket"]
if "buckets" in definition:
entry["buckets"] = definition["buckets"]
if "bucket_label" in definition: if "bucket_label" in definition:
entry["bucket_label"] = definition["bucket_label"] entry["bucket_label"] = definition["bucket_label"]
if "color" in definition: if "color" in definition:
entry["color"] = definition["color"] entry["color"] = definition["color"]
if "colors" in definition: if "colors" in definition:
entry["colors"] = definition["colors"] entry["colors"] = definition["colors"]
# Optional UX metadata passthrough for frontend-only transforms
for key in (
"windows_supported",
"window_default",
"group_others_threshold",
"exclude_values",
"top_n",
"stacked",
"palette",
):
if key in definition:
entry[key] = definition[key]
_render_snippet(entry, out_dir) _render_snippet(entry, out_dir)
report_list.append(entry) report_list.append(entry)
@ -252,9 +197,14 @@ def _generate_root_index() -> None:
"""Render the top-level index listing all intervals and domains.""" """Render the top-level index listing all intervals and domains."""
_copy_icons() _copy_icons()
intervals = sorted( intervals = sorted(
[name for name in INTERVAL_FORMATS if (OUTPUT_DIR / name).is_dir()] [
name
for name in INTERVAL_FORMATS
if (OUTPUT_DIR / name).is_dir()
]
) )
domains_dir = OUTPUT_DIR / "domains" domains_dir = OUTPUT_DIR / "domains"
domains: List[str] = [] domains: List[str] = []
if domains_dir.is_dir(): if domains_dir.is_dir():
@ -277,10 +227,6 @@ def _generate_global() -> None:
typer.echo("No report definitions found") typer.echo("No report definitions found")
return return
start_time = time.time()
# Use timezone-aware UTC for generated_at (string remains unchanged format)
generated_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
_copy_icons() _copy_icons()
conn = sqlite3.connect(DB_PATH) conn = sqlite3.connect(DB_PATH)
@ -296,16 +242,6 @@ def _generate_global() -> None:
name = definition["name"] name = definition["name"]
query = definition["query"] query = definition["query"]
# Apply top_n limit for tables (performance-friendly), if configured
top_n = definition.get("top_n")
chart_type = definition.get("chart", "line")
if top_n and chart_type == "table":
try:
n = int(top_n)
if "LIMIT" not in query.upper():
query = f"{query}\nLIMIT {n}"
except Exception:
pass
cur.execute(query) cur.execute(query)
rows = cur.fetchall() rows = cur.fetchall()
headers = [c[0] for c in cur.description] headers = [c[0] for c in cur.description]
@ -323,63 +259,20 @@ def _generate_global() -> None:
entry["icon"] = definition["icon"] entry["icon"] = definition["icon"]
if "bucket" in definition: if "bucket" in definition:
entry["bucket"] = definition["bucket"] entry["bucket"] = definition["bucket"]
if "buckets" in definition:
entry["buckets"] = definition["buckets"]
if "bucket_label" in definition: if "bucket_label" in definition:
entry["bucket_label"] = definition["bucket_label"] entry["bucket_label"] = definition["bucket_label"]
if "color" in definition: if "color" in definition:
entry["color"] = definition["color"] entry["color"] = definition["color"]
if "colors" in definition: if "colors" in definition:
entry["colors"] = definition["colors"] entry["colors"] = definition["colors"]
# Optional UX metadata passthrough for frontend-only transforms
for key in (
"windows_supported",
"window_default",
"group_others_threshold",
"exclude_values",
"top_n",
"stacked",
"palette",
):
if key in definition:
entry[key] = definition[key]
_render_snippet(entry, out_dir) _render_snippet(entry, out_dir)
report_list.append(entry) report_list.append(entry)
_save_json(out_dir / "reports.json", report_list) _save_json(out_dir / "reports.json", report_list)
elapsed = round(time.time() - start_time, 2) _write_stats()
_write_stats(generated_at, elapsed)
typer.echo("Generated global reports") typer.echo("Generated global reports")
def _generate_analysis() -> None:
"""Generate analysis JSON files consumed by the Analysis tab."""
try:
# Import lazily to avoid circulars and keep dependencies optional
from scripts import analyze
except Exception as exc: # pragma: no cover - defensive
typer.echo(f"Failed to import analysis module: {exc}")
return
# Ensure output root and icons present for parity
_copy_icons()
# These commands write JSON files under output/analysis/
try:
analyze.check_missing_domains(json_output=True)
except Exception as exc: # pragma: no cover - continue best-effort
typer.echo(f"check_missing_domains failed: {exc}")
try:
analyze.suggest_cache(json_output=True)
except Exception as exc: # pragma: no cover
typer.echo(f"suggest_cache failed: {exc}")
try:
analyze.detect_threats()
except Exception as exc: # pragma: no cover
typer.echo(f"detect_threats failed: {exc}")
typer.echo("Generated analysis JSON files")
@app.command() @app.command()
def hourly( def hourly(
domain: Optional[str] = typer.Option( domain: Optional[str] = typer.Option(
@ -450,12 +343,6 @@ def global_reports() -> None:
_generate_global() _generate_global()
@app.command()
def analysis() -> None:
"""Generate analysis JSON files for the Analysis tab."""
_generate_analysis()
@app.command() @app.command()
def index() -> None: def index() -> None:
"""Generate the root index page linking all reports.""" """Generate the root index page linking all reports."""

View file

@ -61,9 +61,7 @@ try:
suffix = match.group(1) suffix = match.group(1)
number = int(suffix.lstrip(".")) if suffix else 0 number = int(suffix.lstrip(".")) if suffix else 0
log_files.append((number, os.path.join(LOG_DIR, f))) log_files.append((number, os.path.join(LOG_DIR, f)))
log_files = [ log_files = [path for _, path in sorted(log_files, key=lambda x: x[0], reverse=True)]
path for _, path in sorted(log_files, key=lambda x: x[0], reverse=True)
]
except FileNotFoundError: except FileNotFoundError:
print(f"[ERROR] Log directory not found: {LOG_DIR}") print(f"[ERROR] Log directory not found: {LOG_DIR}")
exit(1) exit(1)

View file

@ -93,3 +93,4 @@ def parse_servers(paths: Set[Path]) -> List[Dict[str, str]]:
entry["root"] = " ".join(directives["root"]) entry["root"] = " ".join(directives["root"])
servers.append(entry) servers.append(entry)
return servers return servers

View file

@ -1,109 +0,0 @@
export let currentLoad = null;
const loadInfo = new Map();
export function newLoad(container) {
if (currentLoad) {
abortLoad(currentLoad);
}
reset(container);
const controller = new AbortController();
const token = { controller, charts: new Map() };
loadInfo.set(token, token);
currentLoad = token;
return token;
}
export function abortLoad(token) {
const info = loadInfo.get(token);
if (!info) return;
info.controller.abort();
info.charts.forEach(chart => {
try {
chart.destroy();
} catch (e) {}
});
loadInfo.delete(token);
if (currentLoad === token) {
currentLoad = null;
}
}
export function registerChart(token, id, chart) {
const info = loadInfo.get(token);
if (info) {
info.charts.set(id, chart);
} else {
chart.destroy();
}
}
export function reset(container) {
if (!container) return;
container.querySelectorAll('canvas').forEach(c => {
const chart = Chart.getChart(c);
if (chart) {
chart.destroy();
}
});
container.innerHTML = '';
}
// ---- Lightweight client-side data helpers ----
// Slice last N rows from a time-ordered array
export function sliceWindow(data, n) {
if (!Array.isArray(data) || n === undefined || n === null) return data;
if (n === 'all') return data;
const count = Number(n);
if (!Number.isFinite(count) || count <= 0) return data;
return data.slice(-count);
}
// Exclude rows whose value in key is in excluded list
export function excludeValues(data, key, excluded = []) {
if (!excluded || excluded.length === 0) return data;
const set = new Set(excluded);
return data.filter(row => !set.has(row[key]));
}
// Compute percentages for categorical distributions (valueKey default 'value')
export function toPercent(data, valueKey = 'value') {
const total = data.reduce((s, r) => s + (Number(r[valueKey]) || 0), 0);
if (total <= 0) return data.map(r => ({ ...r }));
return data.map(r => ({ ...r, [valueKey]: (Number(r[valueKey]) || 0) * 100 / total }));
}
// Group categories with share < threshold into an 'Other' bucket.
export function groupOthers(data, bucketKey, valueKey = 'value', threshold = 0.03, otherLabel = 'Other') {
if (!Array.isArray(data) || data.length === 0) return data;
const total = data.reduce((s, r) => s + (Number(r[valueKey]) || 0), 0);
if (total <= 0) return data;
const major = [];
let other = 0;
for (const r of data) {
const v = Number(r[valueKey]) || 0;
if (total && v / total < threshold) {
other += v;
} else {
major.push({ ...r });
}
}
if (other > 0) major.push({ [bucketKey]: otherLabel, [valueKey]: other });
return major;
}
// Simple moving average over numeric array
export function movingAverage(series, span = 3) {
const n = Math.max(1, Number(span) || 1);
const out = [];
for (let i = 0; i < series.length; i++) {
const start = Math.max(0, i - n + 1);
let sum = 0, cnt = 0;
for (let j = start; j <= i; j++) {
const v = Number(series[j]);
if (Number.isFinite(v)) { sum += v; cnt++; }
}
out.push(cnt ? sum / cnt : null);
}
return out;
}

View file

@ -12,15 +12,14 @@
<div class="tabs is-toggle" id="report-tabs"> <div class="tabs is-toggle" id="report-tabs">
<ul> <ul>
<li class="is-active" data-tab="recent"><a>Recent</a></li> <li class="is-active" data-tab="overview"><a>Overview</a></li>
<li data-tab="trends"><a>Trends</a></li> <li data-tab="all"><a>All Domains</a></li>
<li data-tab="breakdown"><a>Breakdown</a></li> <li data-tab="domain"><a>Per Domain</a></li>
<li data-tab="analysis"><a>Analysis</a></li> <li data-tab="analysis"><a>Analysis</a></li>
</ul> </ul>
</div> </div>
<div id="controls" class="field is-grouped is-align-items-center mb-4" style="position: sticky; top: 0; background: white; z-index: 2; padding: 0.5rem 0;"> <div id="controls" class="field is-grouped mb-4">
<!-- Hidden native interval control kept for compatibility and availability probing -->
<div id="interval-control" class="control has-icons-left is-hidden"> <div id="interval-control" class="control has-icons-left is-hidden">
<div class="select is-small"> <div class="select is-small">
<select id="interval-select"> <select id="interval-select">
@ -42,76 +41,25 @@
</div> </div>
<span class="icon is-small is-left"><img src="icons/server.svg" alt="Domain"></span> <span class="icon is-small is-left"><img src="icons/server.svg" alt="Domain"></span>
</div> </div>
<!-- Unified Time control: selects both range and sensible grouping -->
<div id="time-control" class="control has-icons-left is-hidden">
<div class="select is-small">
<select id="time-select">
<option value="1h">Last hour</option>
<option value="24h">Last 24 hours</option>
<option value="7d" selected>Last 7 days</option>
<option value="30d">Last 30 days</option>
<option value="12w">Last 12 weeks</option>
<option value="12m">Last 12 months</option>
<option value="all">All time</option>
</select>
</div>
<span class="icon is-small is-left"><img src="icons/clock.svg" alt="Time"></span>
</div>
<div id="smooth-control" class="control is-hidden">
<label class="checkbox is-small">
<input type="checkbox" id="smooth-toggle"> Smooth error rate
</label>
</div>
<div id="mode-percent-control" class="control is-hidden">
<label class="checkbox is-small" title="Show values as a percentage of the total, instead of raw counts.">
<input type="checkbox" id="percent-toggle"> Percent mode
</label>
</div>
<div id="mode-group-control" class="control is-hidden">
<label class="checkbox is-small" title="Combine small categories into an 'Other' slice to declutter charts.">
<input type="checkbox" id="group-toggle" checked> Group small into Other
</label>
</div>
<div id="exclude-uncached-control" class="control is-hidden">
<label class="checkbox is-small" title="Hide uncached entries (cache status '-') from cache status distributions.">
<input type="checkbox" id="exclude-uncached-toggle" checked> Exclude “-”
</label>
</div>
<div id="reset-control" class="control">
<button id="reset-view" class="button is-small is-light">Reset view</button>
</div>
</div> </div>
<div id="recent-section"> <div id="overview-section">
<div id="overview" class="box mb-5"> <div id="overview" class="box mb-5">
<h2 class="subtitle">Recent</h2> <h2 class="subtitle">Overview</h2>
<p>Total logs: <span id="stat-total">-</span></p> <p>Total logs: <span id="stat-total">-</span></p>
<p>Date range: <span id="stat-start">-</span> to <span id="stat-end">-</span></p> <p>Date range: <span id="stat-start">-</span> to <span id="stat-end">-</span></p>
<p>Unique domains: <span id="stat-domains">-</span></p> <p>Unique domains: <span id="stat-domains">-</span></p>
<p>Last generated: <span id="stat-generated">-</span></p>
<p>Generation time: <span id="stat-elapsed">-</span> seconds</p>
</div> </div>
<!-- Two key distributions side-by-side on Recent -->
<div id="recent-row" class="columns"></div>
<div id="overview-reports"></div> <div id="overview-reports"></div>
</div> </div>
<div id="trends-section" class="is-hidden"> <div id="all-section" class="is-hidden">
<div id="reports-trends"></div> <div id="reports-all"></div>
</div> </div>
<div id="breakdown-section" class="is-hidden"> <div id="domain-section" class="is-hidden">
<div class="box mb-4"> <div id="reports-domain"></div>
<h2 class="subtitle">Breakdown</h2> </div>
<p class="mb-2">Explore categorical distributions and detailed lists side-by-side. Use the options below to adjust how categories are shown.</p>
<ul style="margin-left: 1.2rem; list-style: disc;">
<li><strong>Percent mode</strong>: converts counts into percentages of the total for easier comparison.</li>
<li><strong>Group small into Other</strong>: combines tiny slices under a single “Other” category to declutter charts.</li>
<li><strong>Exclude “-”</strong>: hides uncached entries (cache status “-”) from cache status distributions.</li>
</ul>
</div>
<div id="reports-breakdown"></div>
</div>
<div id="analysis-section" class="is-hidden"> <div id="analysis-section" class="is-hidden">
<div id="analysis-missing" class="box"></div> <div id="analysis-missing" class="box"></div>
@ -122,44 +70,23 @@
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script> <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://cdn.jsdelivr.net/npm/jquery@3.7.0/dist/jquery.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/jquery@3.7.0/dist/jquery.min.js"></script>
<script src="https://cdn.datatables.net/1.13.4/js/jquery.dataTables.min.js"></script> <script src="https://cdn.datatables.net/1.13.4/js/jquery.dataTables.min.js"></script>
<script type="module"> <script>
import {
newLoad,
abortLoad,
registerChart,
reset,
currentLoad,
sliceWindow,
excludeValues,
toPercent,
groupOthers,
movingAverage,
} from './chartManager.js';
const STATE_KEY = 'ngxstat-state-v2';
const intervalSelect = document.getElementById('interval-select'); const intervalSelect = document.getElementById('interval-select');
const domainSelect = document.getElementById('domain-select'); const domainSelect = document.getElementById('domain-select');
const intervalControl = document.getElementById('interval-control'); const intervalControl = document.getElementById('interval-control');
const domainControl = document.getElementById('domain-control'); const domainControl = document.getElementById('domain-control');
const timeControl = document.getElementById('time-control');
const timeSelect = document.getElementById('time-select');
const modePercentControl = document.getElementById('mode-percent-control');
const modeGroupControl = document.getElementById('mode-group-control');
const excludeUncachedControl = document.getElementById('exclude-uncached-control');
const smoothControl = document.getElementById('smooth-control');
const resetButton = document.getElementById('reset-view');
const tabs = document.querySelectorAll('#report-tabs li'); const tabs = document.querySelectorAll('#report-tabs li');
const sections = { const sections = {
recent: document.getElementById('recent-section'), overview: document.getElementById('overview-section'),
trends: document.getElementById('trends-section'), all: document.getElementById('all-section'),
breakdown: document.getElementById('breakdown-section'), domain: document.getElementById('domain-section'),
analysis: document.getElementById('analysis-section') analysis: document.getElementById('analysis-section')
}; };
const containers = { const containers = {
recent: document.getElementById('overview-reports'), overview: document.getElementById('overview-reports'),
trends: document.getElementById('reports-trends'), all: document.getElementById('reports-all'),
breakdown: document.getElementById('reports-breakdown') domain: document.getElementById('reports-domain')
}; };
const recentRow = document.getElementById('recent-row');
const analysisElems = { const analysisElems = {
missing: document.getElementById('analysis-missing'), missing: document.getElementById('analysis-missing'),
cache: document.getElementById('analysis-cache'), cache: document.getElementById('analysis-cache'),
@ -169,265 +96,60 @@
const startElem = document.getElementById('stat-start'); const startElem = document.getElementById('stat-start');
const endElem = document.getElementById('stat-end'); const endElem = document.getElementById('stat-end');
const domainsElem = document.getElementById('stat-domains'); const domainsElem = document.getElementById('stat-domains');
const generatedElem = document.getElementById('stat-generated');
const elapsedElem = document.getElementById('stat-elapsed');
// Extra controls
// Legacy window select kept for internal state only (not shown)
const windowSelect = document.getElementById('window-select');
// If legacy window select is not present in DOM, create a hidden one for code paths
// that still reference it.
(function ensureHiddenWindowSelect(){
if (!windowSelect) {
const hidden = document.createElement('select');
hidden.id = 'window-select';
hidden.classList.add('is-hidden');
// Supported values used by code
['1h','24h','7d','30d','12w','12m','all'].forEach(v => {
const o = document.createElement('option');
o.value = v; o.textContent = v;
hidden.appendChild(o);
});
document.body.appendChild(hidden);
}
})();
const percentToggle = document.getElementById('percent-toggle');
const groupToggle = document.getElementById('group-toggle');
const excludeUncachedToggle = document.getElementById('exclude-uncached-toggle');
const smoothToggle = document.getElementById('smooth-toggle');
let currentInterval = intervalSelect.value; let currentInterval = intervalSelect.value;
let currentDomain = domainSelect.value; let currentDomain = domainSelect.value;
let currentTab = 'recent'; let currentTab = 'overview';
let currentWindow = windowSelect ? windowSelect.value : '7d'; // 1h, 24h, 7d, 30d, 12w, 12m, all
let modePercent = false;
let modeGroup = true;
let excludeUncached = true;
let smoothError = false;
let hadExplicitWindow = false; // URL or saved-state provided window
function saveState() { function initReport(rep, base) {
try { fetch(base + '/' + rep.json)
localStorage.setItem(STATE_KEY, JSON.stringify({
tab: currentTab,
interval: currentInterval,
domain: currentDomain,
window: currentWindow,
percent: modePercent ? 1 : 0,
group: modeGroup ? 1 : 0,
exclude_dash: excludeUncached ? 1 : 0,
smooth: smoothError ? 1 : 0,
}));
} catch {}
}
function loadSavedState() {
try {
const s = JSON.parse(localStorage.getItem(STATE_KEY) || '{}');
if (s.tab) currentTab = s.tab;
if (s.interval) currentInterval = s.interval;
if (s.domain !== undefined) currentDomain = s.domain;
if (s.window) { currentWindow = s.window; hadExplicitWindow = true; }
if (s.percent !== undefined) modePercent = !!Number(s.percent);
if (s.group !== undefined) modeGroup = !!Number(s.group);
if (s.exclude_dash !== undefined) excludeUncached = !!Number(s.exclude_dash);
if (s.smooth !== undefined) smoothError = !!Number(s.smooth);
} catch {}
}
function applyURLParams() {
const params = new URLSearchParams(location.search);
if (params.get('tab')) currentTab = params.get('tab');
if (params.get('interval')) currentInterval = params.get('interval');
if (params.get('domain') !== null) currentDomain = params.get('domain') || '';
if (params.get('window')) { currentWindow = params.get('window'); hadExplicitWindow = true; }
if (params.get('percent') !== null) modePercent = params.get('percent') === '1';
if (params.get('group') !== null) modeGroup = params.get('group') === '1';
if (params.get('exclude_dash') !== null) excludeUncached = params.get('exclude_dash') === '1';
if (params.get('smooth') !== null) smoothError = params.get('smooth') === '1';
}
function updateURL() {
const params = new URLSearchParams();
params.set('tab', currentTab);
params.set('interval', currentInterval);
if (currentDomain) params.set('domain', currentDomain);
params.set('window', currentWindow);
params.set('percent', modePercent ? '1' : '0');
params.set('group', modeGroup ? '1' : '0');
params.set('exclude_dash', excludeUncached ? '1' : '0');
params.set('smooth', smoothError ? '1' : '0');
const newUrl = `${location.pathname}?${params.toString()}`;
history.replaceState(null, '', newUrl);
saveState();
}
function bucketsForWindow(win, interval) {
switch (win) {
case '1h': return interval === 'hourly' ? 1 : 'all';
case '24h': return interval === 'hourly' ? 24 : 'all';
case '7d': return interval === 'daily' ? 7 : 'all';
case '30d': return interval === 'daily' ? 30 : 'all';
case '12w': return interval === 'weekly' ? 12 : 'all';
case '12m': return interval === 'monthly' ? 12 : 'all';
default: return 'all';
}
}
function availableIntervals() {
try {
return Array.from(intervalSelect ? intervalSelect.options : []).map(o => o.value);
} catch { return []; }
}
function pickIntervalForWindow(win) {
const avail = availableIntervals();
const pref = (list) => list.find(x => avail.includes(x));
switch (win) {
case '1h':
case '24h':
return pref(['hourly','daily','weekly','monthly']) || (avail[0] || 'daily');
case '7d':
case '30d':
return pref(['daily','weekly','monthly','hourly']) || (avail[0] || 'daily');
case '12w':
return pref(['weekly','daily','monthly']) || (avail[0] || 'weekly');
case '12m':
return pref(['monthly','weekly','daily']) || (avail[0] || 'monthly');
default:
// all time: favor coarser buckets if available
return pref(['monthly','weekly','daily','hourly']) || (avail[0] || 'weekly');
}
}
function applyTimePreset(win) {
currentWindow = win;
currentInterval = pickIntervalForWindow(win);
if (intervalSelect) intervalSelect.value = currentInterval;
const winSel = document.getElementById('window-select');
if (winSel) winSel.value = currentWindow;
}
function initReport(token, rep, base) {
fetch(base + '/' + rep.json, { signal: token.controller.signal })
.then(r => r.json()) .then(r => r.json())
.then(data => { .then(data => {
if (token !== currentLoad) return; const bucketField = rep.bucket || 'bucket';
const bucketFields = rep.buckets || [rep.bucket || 'bucket'];
const labels = Array.isArray(rep.bucket_label)
? rep.bucket_label
: [rep.bucket_label || 'Bucket'];
if (rep.chart === 'table') { if (rep.chart === 'table') {
const rows = data.map(x => bucketFields.map(f => x[f]).concat(x.value)); const rows = data.map(x => [x[bucketField], x.value]);
const columns = labels.map(l => ({ title: l })); new DataTable('#table-' + rep.name, {
columns.push({ title: 'Value' });
const table = new DataTable('#table-' + rep.name, {
data: rows, data: rows,
columns: columns columns: [
{ title: rep.bucket_label || 'Bucket' },
{ title: 'Value' }
]
}); });
registerChart(token, rep.name, table);
return; return;
} }
// Transform pipeline (client-only) const labels = data.map(x => x[bucketField]);
let transformed = data.slice(); const values = data.map(x => x.value);
const bucketField = bucketFields[0];
const isTimeSeries = bucketField === 'time_bucket';
// Exclusions (per-report) and explicit uncached toggle for cache_status
if (rep.exclude_values && rep.exclude_values.length) {
transformed = excludeValues(transformed, bucketField, rep.exclude_values);
}
if (excludeUncached && bucketField === 'cache_status') {
transformed = excludeValues(transformed, bucketField, ['-']);
}
// Windowing for time series
if (isTimeSeries) {
// Only apply windowing if report supports current window (if constrained)
const supported = Array.isArray(rep.windows_supported) ? rep.windows_supported : null;
const canWindow = !supported || supported.includes(currentWindow);
if (canWindow) {
const n = bucketsForWindow(currentWindow, currentInterval);
transformed = sliceWindow(transformed, n);
}
}
// Distributions: percent + group small
const isDistribution = ['pie', 'polarArea', 'doughnut', 'donut'].includes(rep.chart);
if (isDistribution) {
if (modeGroup) {
const thr = (typeof rep.group_others_threshold === 'number') ? rep.group_others_threshold : 0.03;
transformed = groupOthers(transformed, bucketField, 'value', thr, 'Other');
}
if (modePercent) {
transformed = toPercent(transformed, 'value');
}
}
// Relabel '-' to 'Uncached' for cache_status distributions
if (bucketField === 'cache_status') {
transformed = transformed.map(row => ({
...row,
[bucketField]: row[bucketField] === '-' ? 'Uncached' : row[bucketField]
}));
}
const labelsArr = transformed.map(x => x[bucketField]);
let values = transformed.map(x => x.value);
const chartType = rep.chart === 'stackedBar' ? 'bar' : rep.chart; const chartType = rep.chart === 'stackedBar' ? 'bar' : rep.chart;
const options = { scales: { y: { beginAtZero: true } } }; const options = { scales: { y: { beginAtZero: true } } };
let datasets = [];
if (rep.chart === 'stackedBar') { if (rep.chart === 'stackedBar') {
options.scales.x = { stacked: true }; options.scales.x = { stacked: true };
options.scales.y = options.scales.y || {};
options.scales.y.stacked = true; options.scales.y.stacked = true;
// Build multiple series from columns (exclude bucket & total)
const keys = transformed.length ? Object.keys(transformed[0]).filter(k => k !== bucketField && k !== 'total') : [];
const palette = rep.colors || rep.palette || [
'#3273dc', '#23d160', '#ffdd57', '#ff3860', '#7957d5', '#363636'
];
datasets = keys.map((k, i) => ({
label: k,
data: transformed.map(r => Number(r[k]) || 0),
backgroundColor: palette[i % palette.length],
borderColor: palette[i % palette.length],
borderWidth: 1,
fill: false,
}));
} else {
const dataset = {
label: rep.label,
data: values,
borderWidth: 1,
fill: rep.chart !== 'bar' && rep.chart !== 'stackedBar'
};
if (rep.colors) {
dataset.backgroundColor = rep.colors;
dataset.borderColor = rep.colors;
} else if (rep.palette) {
dataset.backgroundColor = rep.palette;
dataset.borderColor = rep.palette;
} else if (rep.color) {
dataset.backgroundColor = rep.color;
dataset.borderColor = rep.color;
} else {
dataset.backgroundColor = 'rgba(54, 162, 235, 0.5)';
dataset.borderColor = 'rgba(54, 162, 235, 1)';
}
// Optional smoothing for error_rate
if (rep.name === 'error_rate' && smoothError) {
dataset.data = movingAverage(values, 3);
dataset.label = rep.label + ' (smoothed)';
}
datasets = [dataset];
} }
const chart = new Chart(document.getElementById('chart-' + rep.name), { const dataset = {
label: rep.label,
data: values,
borderWidth: 1,
fill: rep.chart !== 'bar' && rep.chart !== 'stackedBar'
};
if (rep.colors) {
dataset.backgroundColor = rep.colors;
dataset.borderColor = rep.colors;
} else if (rep.color) {
dataset.backgroundColor = rep.color;
dataset.borderColor = rep.color;
} else {
dataset.backgroundColor = 'rgba(54, 162, 235, 0.5)';
dataset.borderColor = 'rgba(54, 162, 235, 1)';
}
new Chart(document.getElementById('chart-' + rep.name), {
type: chartType, type: chartType,
data: { data: {
labels: labelsArr, labels: labels,
datasets datasets: [dataset]
}, },
options: options options: options
}); });
registerChart(token, rep.name, chart);
}); });
} }
@ -439,68 +161,40 @@
startElem.textContent = stats.start_date; startElem.textContent = stats.start_date;
endElem.textContent = stats.end_date; endElem.textContent = stats.end_date;
domainsElem.textContent = stats.unique_domains; domainsElem.textContent = stats.unique_domains;
generatedElem.textContent = stats.generated_at || '-';
elapsedElem.textContent =
stats.generation_seconds !== undefined ? stats.generation_seconds : '-';
}); });
} }
// Reset helpers managed by chartManager
function loadReports() { function loadReports() {
let path; let path;
let container = containers[currentTab]; let container;
if (currentTab === 'recent') { if (currentTab === 'overview') {
path = 'global'; path = 'global';
container = containers.overview;
} else if (currentTab === 'all') {
path = currentInterval;
container = containers.all;
} else { } else {
path = currentDomain ? ('domains/' + encodeURIComponent(currentDomain) + '/' + currentInterval) : currentInterval; container = containers.domain;
if (!currentDomain) {
container.innerHTML = '<p>Select a domain</p>';
return;
}
path = 'domains/' + encodeURIComponent(currentDomain) + '/' + currentInterval;
} }
// Clear the top row on each load of Recent fetch(path + '/reports.json')
if (currentTab === 'recent' && recentRow) { .then(r => r.json())
recentRow.innerHTML = ''; .then(reports => {
} container.innerHTML = '';
reports.forEach(rep => {
const token = newLoad(container); fetch(path + '/' + rep.html)
.then(r => r.text())
fetch(path + '/reports.json', { signal: token.controller.signal }) .then(html => {
.then(r => r.json())
.then(reports => {
if (token !== currentLoad) return;
const isDistributionType = t => ['pie','polarArea','doughnut','donut'].includes(t);
const filtered = reports.filter(rep => {
if (currentTab === 'recent') return true;
if (currentTab === 'trends') return rep.chart !== 'table' && !isDistributionType(rep.chart);
if (currentTab === 'breakdown') return isDistributionType(rep.chart) || rep.chart === 'table';
return true;
});
// If no explicit window was given (URL or saved state), honor first report's default
if (!hadExplicitWindow) {
const withDefault = filtered.find(r => r.window_default);
if (withDefault && typeof withDefault.window_default === 'string') {
currentWindow = withDefault.window_default;
windowSelect.value = currentWindow;
updateURL();
}
}
filtered.forEach(rep => {
fetch(path + '/' + rep.html, { signal: token.controller.signal })
.then(r => r.text())
.then(html => {
if (token !== currentLoad) return;
// On Recent tab, render Cache Status and HTTP Statuses side-by-side
const inTopRow = currentTab === 'recent' &&
(rep.name === 'cache_status_breakdown' || rep.name === 'status_distribution');
if (inTopRow && recentRow) {
const wrapped = `<div class="column is-half">${html}</div>`;
recentRow.insertAdjacentHTML('beforeend', wrapped);
} else {
container.insertAdjacentHTML('beforeend', html); container.insertAdjacentHTML('beforeend', html);
} initReport(rep, path);
initReport(token, rep, path); });
}); });
}); });
});
} }
function loadAnalysis() { function loadAnalysis() {
@ -584,8 +278,6 @@
} }
function switchTab(name) { function switchTab(name) {
abortLoad(currentLoad);
Object.values(containers).forEach(reset);
currentTab = name; currentTab = name;
tabs.forEach(tab => { tabs.forEach(tab => {
tab.classList.toggle('is-active', tab.dataset.tab === name); tab.classList.toggle('is-active', tab.dataset.tab === name);
@ -593,20 +285,9 @@
Object.entries(sections).forEach(([key, section]) => { Object.entries(sections).forEach(([key, section]) => {
section.classList.toggle('is-hidden', key !== name); section.classList.toggle('is-hidden', key !== name);
}); });
const showTime = name !== 'recent' && name !== 'analysis'; intervalControl.classList.toggle('is-hidden', name === 'overview' || name === 'analysis');
const showDomain = showTime; domainControl.classList.toggle('is-hidden', name !== 'domain');
// Always keep legacy interval control hidden; use unified time control if (name === 'overview') {
intervalControl.classList.add('is-hidden');
domainControl.classList.toggle('is-hidden', !showDomain);
timeControl.classList.toggle('is-hidden', !showTime);
// Only show percent/group/exclude toggles on Breakdown tab,
// and smoothing only on Trends tab
modePercentControl.classList.toggle('is-hidden', name !== 'breakdown');
modeGroupControl.classList.toggle('is-hidden', name !== 'breakdown');
excludeUncachedControl.classList.toggle('is-hidden', name !== 'breakdown');
smoothControl.classList.toggle('is-hidden', name !== 'trends');
updateURL();
if (name === 'recent') {
loadStats(); loadStats();
} }
if (name === 'analysis') { if (name === 'analysis') {
@ -616,103 +297,21 @@
} }
} }
if (intervalSelect) { intervalSelect.addEventListener('change', () => {
intervalSelect.addEventListener('change', () => { currentInterval = intervalSelect.value;
currentInterval = intervalSelect.value; loadReports();
abortLoad(currentLoad); });
Object.values(containers).forEach(reset);
updateURL();
loadReports();
});
}
domainSelect.addEventListener('change', () => { domainSelect.addEventListener('change', () => {
currentDomain = domainSelect.value; currentDomain = domainSelect.value;
abortLoad(currentLoad);
Object.values(containers).forEach(reset);
updateURL();
loadReports();
});
if (timeSelect) {
timeSelect.addEventListener('change', () => {
applyTimePreset(timeSelect.value);
abortLoad(currentLoad);
updateURL();
loadReports();
});
}
percentToggle.addEventListener('change', () => {
modePercent = percentToggle.checked;
abortLoad(currentLoad);
updateURL();
loadReports();
});
groupToggle.addEventListener('change', () => {
modeGroup = groupToggle.checked;
abortLoad(currentLoad);
updateURL();
loadReports();
});
excludeUncachedToggle.addEventListener('change', () => {
excludeUncached = excludeUncachedToggle.checked;
abortLoad(currentLoad);
updateURL();
loadReports();
});
smoothToggle.addEventListener('change', () => {
smoothError = smoothToggle.checked;
abortLoad(currentLoad);
updateURL();
loadReports(); loadReports();
}); });
tabs.forEach(tab => { tabs.forEach(tab => {
tab.addEventListener('click', () => { tab.addEventListener('click', () => switchTab(tab.dataset.tab));
switchTab(tab.dataset.tab);
});
}); });
resetButton.addEventListener('click', () => {
try { switchTab('overview');
localStorage.removeItem('ngxstat-state'); // clear legacy
localStorage.removeItem(STATE_KEY);
} catch {}
// Reset to hard defaults
currentTab = 'recent';
currentInterval = intervalSelect ? (intervalSelect.value = intervalSelect.options[0]?.value || currentInterval) : currentInterval;
currentDomain = domainSelect.value = '';
applyTimePreset('7d');
if (timeSelect) timeSelect.value = '7d';
modePercent = percentToggle.checked = false;
modeGroup = groupToggle.checked = true;
excludeUncached = excludeUncachedToggle.checked = true;
smoothError = smoothToggle.checked = false;
hadExplicitWindow = false;
switchTab(currentTab);
});
// Initialize state (URL -> localStorage -> defaults)
loadSavedState();
applyURLParams();
// Sync controls
if (intervalSelect) intervalSelect.value = currentInterval;
domainSelect.value = currentDomain;
// Sync unified time select based on state
if (timeSelect) {
const known = new Set(['1h','24h','7d','30d','12w','12m','all']);
const pick = known.has(currentWindow) ? currentWindow : 'all';
timeSelect.value = pick;
applyTimePreset(pick);
}
percentToggle.checked = modePercent;
groupToggle.checked = modeGroup;
excludeUncachedToggle.checked = excludeUncached;
smoothToggle.checked = smoothError;
// Show/hide controls based on active tab
switchTab(currentTab);
</script> </script>
</body> </body>
</html> </html>

View file

@ -1,6 +1,12 @@
import sys
import json import json
import sqlite3 import sqlite3
from pathlib import Path from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[1]
sys.path.append(str(REPO_ROOT))
from scripts import analyze from scripts import analyze
from scripts import generate_reports as gr from scripts import generate_reports as gr

View file

@ -15,10 +15,10 @@ def sample_logs(tmp_path):
log_dir.mkdir(parents=True, exist_ok=True) log_dir.mkdir(parents=True, exist_ok=True)
(log_dir / "access.log.1").write_text( (log_dir / "access.log.1").write_text(
'127.0.0.1 - example.com [01/Jan/2024:10:00:00 +0000] "GET / HTTP/1.1" 200 123 "-" "curl" MISS\n' "127.0.0.1 - example.com [01/Jan/2024:10:00:00 +0000] \"GET / HTTP/1.1\" 200 123 \"-\" \"curl\" MISS\n"
) )
(log_dir / "access.log").write_text( (log_dir / "access.log").write_text(
'127.0.0.1 - example.com [01/Jan/2024:10:05:00 +0000] "GET /about HTTP/1.1" 200 123 "-" "curl" MISS\n' "127.0.0.1 - example.com [01/Jan/2024:10:05:00 +0000] \"GET /about HTTP/1.1\" 200 123 \"-\" \"curl\" MISS\n"
) )
yield log_dir yield log_dir
@ -59,3 +59,4 @@ def test_idempotent_import(sample_logs, tmp_path):
assert first_count == 2 assert first_count == 2
assert second_count == first_count assert second_count == first_count

View file

@ -1,3 +1,9 @@
import sys
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[1]
sys.path.append(str(REPO_ROOT))
from scripts import nginx_config as nc from scripts import nginx_config as nc
@ -61,3 +67,4 @@ server {
assert servers[1]["server_name"] == "example.org" assert servers[1]["server_name"] == "example.org"
assert servers[1]["listen"] == "443 ssl" assert servers[1]["listen"] == "443 ssl"
assert "proxy_cache" not in servers[1] assert "proxy_cache" not in servers[1]

View file

@ -1,10 +1,12 @@
import sqlite3 import sqlite3
from pathlib import Path from pathlib import Path
import json import json
from datetime import datetime import sys
import pytest import pytest
from typer.testing import CliRunner
REPO_ROOT = Path(__file__).resolve().parents[1]
sys.path.append(str(REPO_ROOT))
from scripts import generate_reports as gr from scripts import generate_reports as gr
@ -146,19 +148,20 @@ def test_generate_interval_domain_filter(tmp_path, sample_reports, monkeypatch):
gr._generate_interval("hourly", "example.com") gr._generate_interval("hourly", "example.com")
hits = json.loads( hits = json.loads(
( (tmp_path / "output" / "domains" / "example.com" / "hourly" / "hits.json").read_text()
tmp_path / "output" / "domains" / "example.com" / "hourly" / "hits.json"
).read_text()
) )
assert hits[0]["value"] == 2 assert hits[0]["value"] == 2
reports = json.loads( reports = json.loads(
( (tmp_path / "output" / "domains" / "example.com" / "hourly" / "reports.json").read_text()
tmp_path / "output" / "domains" / "example.com" / "hourly" / "reports.json"
).read_text()
) )
assert {r["name"] for r in reports} == {"hits", "error_rate"} assert {r["name"] for r in reports} == {"hits", "error_rate"}
assert not ( assert not (
tmp_path / "output" / "domains" / "example.com" / "hourly" / "skip_report.json" tmp_path
/ "output"
/ "domains"
/ "example.com"
/ "hourly"
/ "skip_report.json"
).exists() ).exists()
@ -197,25 +200,9 @@ def test_generate_root_index(tmp_path, sample_reports, monkeypatch):
assert '<option value="Global">' not in content assert '<option value="Global">' not in content
assert '<option value="analysis">' not in content assert '<option value="analysis">' not in content
# check for domain options
def test_generated_marker_written(tmp_path, monkeypatch): assert '<option value="foo.com">' in content
out_dir = tmp_path / "output" assert '<option value="bar.com">' in content
monkeypatch.setattr(gr, "OUTPUT_DIR", out_dir)
monkeypatch.setattr(
gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates"
)
monkeypatch.setattr(gr, "GENERATED_MARKER", out_dir / "generated.txt")
monkeypatch.setattr(gr, "_copy_icons", lambda: None)
(out_dir / "hourly").mkdir(parents=True)
runner = CliRunner()
result = runner.invoke(gr.app, ["index"])
assert result.exit_code == 0, result.output
marker = out_dir / "generated.txt"
assert marker.exists()
content = marker.read_text().strip()
datetime.strptime(content, "%Y-%m-%d %H:%M:%S")
def test_global_reports_once(tmp_path, sample_reports, monkeypatch): def test_global_reports_once(tmp_path, sample_reports, monkeypatch):
@ -258,158 +245,9 @@ def test_global_stats_file(tmp_path, sample_reports, monkeypatch):
"start_date", "start_date",
"end_date", "end_date",
"unique_domains", "unique_domains",
"generated_at",
"generation_seconds",
} }
assert stats["total_logs"] == 2 assert stats["total_logs"] == 2
assert stats["start_date"] == "2024-01-01 10:00:00" assert stats["start_date"] == "2024-01-01 10:00:00"
assert stats["end_date"] == "2024-01-01 10:05:00" assert stats["end_date"] == "2024-01-01 10:05:00"
assert stats["unique_domains"] == 1 assert stats["unique_domains"] == 1
assert isinstance(stats["generated_at"], str)
assert stats["generation_seconds"] >= 0
def test_multi_bucket_table(tmp_path, monkeypatch):
db_path = tmp_path / "database" / "ngxstat.db"
setup_db(db_path)
# add a second domain entry
conn = sqlite3.connect(db_path)
cur = conn.cursor()
cur.execute(
"INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
(
"127.0.0.1",
"foo.com",
"2024-01-01 10:10:00",
"GET /foo HTTP/1.1",
200,
100,
"-",
"curl",
"MISS",
),
)
conn.commit()
conn.close()
cfg = tmp_path / "reports.yml"
cfg.write_text(
"""
- name: multi
chart: table
global: true
buckets: [domain, agent]
bucket_label: [Domain, Agent]
query: |
SELECT host AS domain, user_agent AS agent, COUNT(*) AS value
FROM logs
GROUP BY host, agent
"""
)
monkeypatch.setattr(gr, "DB_PATH", db_path)
monkeypatch.setattr(gr, "OUTPUT_DIR", tmp_path / "output")
monkeypatch.setattr(gr, "REPORT_CONFIG", cfg)
monkeypatch.setattr(
gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates"
)
gr._generate_global()
gr._generate_interval("hourly")
data = json.loads((tmp_path / "output" / "global" / "multi.json").read_text())
assert {"domain", "agent", "value"} <= data[0].keys()
reports = json.loads((tmp_path / "output" / "global" / "reports.json").read_text())
entry = next(r for r in reports if r["name"] == "multi")
assert entry["buckets"] == ["domain", "agent"]
assert entry["bucket_label"] == ["Domain", "Agent"]
def test_top_n_limit_applied(tmp_path, monkeypatch):
# Prepare DB with many distinct agents
db_path = tmp_path / "database" / "ngxstat.db"
setup_db(db_path)
conn = sqlite3.connect(db_path)
cur = conn.cursor()
for i in range(10):
cur.execute(
"INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
(
"127.0.0.1",
"example.com",
f"2024-01-01 11:{i:02d}:00",
"GET /x HTTP/1.1",
200,
100,
"-",
f"ua-{i}",
"MISS",
),
)
conn.commit()
conn.close()
cfg = tmp_path / "reports.yml"
cfg.write_text(
"""
- name: agents
chart: table
global: true
top_n: 3
query: |
SELECT user_agent AS agent, COUNT(*) AS value
FROM logs
GROUP BY user_agent
ORDER BY value DESC
"""
)
monkeypatch.setattr(gr, "DB_PATH", db_path)
monkeypatch.setattr(gr, "OUTPUT_DIR", tmp_path / "output")
monkeypatch.setattr(gr, "REPORT_CONFIG", cfg)
monkeypatch.setattr(
gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates"
)
gr._generate_global()
data = json.loads((tmp_path / "output" / "global" / "agents.json").read_text())
# Should be limited to 3 rows
assert len(data) <= 3
def test_metadata_passthrough(tmp_path, monkeypatch):
db_path = tmp_path / "database" / "ngxstat.db"
setup_db(db_path)
cfg = tmp_path / "reports.yml"
cfg.write_text(
"""
- name: custom_ts
label: Custom TS
chart: line
window_default: 24h
windows_supported: [1h, 24h, 7d]
palette: ["#111111", "#222222"]
query: |
SELECT {bucket} AS time_bucket, COUNT(*) AS value
FROM logs
GROUP BY time_bucket
ORDER BY time_bucket
"""
)
monkeypatch.setattr(gr, "DB_PATH", db_path)
monkeypatch.setattr(gr, "OUTPUT_DIR", tmp_path / "output")
monkeypatch.setattr(gr, "REPORT_CONFIG", cfg)
monkeypatch.setattr(
gr, "TEMPLATE_DIR", Path(__file__).resolve().parents[1] / "templates"
)
gr._generate_interval("hourly")
reports = json.loads((tmp_path / "output" / "hourly" / "reports.json").read_text())
entry = next(r for r in reports if r["name"] == "custom_ts")
assert entry["window_default"] == "24h"
assert entry["windows_supported"] == ["1h", "24h", "7d"]
assert entry["palette"] == ["#111111", "#222222"]

View file

@ -14,9 +14,9 @@ def test_script_invokes_commands(tmp_path):
calls = tmp_path / "calls.txt" calls = tmp_path / "calls.txt"
python_stub = tmp_path / "python" python_stub = tmp_path / "python"
python_stub.write_text(f'#!/usr/bin/env bash\necho "$*" >> "{calls}"\n') python_stub.write_text(f"#!/usr/bin/env bash\necho \"$*\" >> \"{calls}\"\n")
python_stub.chmod(0o755) python_stub.chmod(0o755)
(tmp_path / "python3").write_text("#!/usr/bin/env bash\nexit 0\n") (tmp_path / "python3").write_text(f"#!/usr/bin/env bash\nexit 0\n")
(tmp_path / "python3").chmod(0o755) (tmp_path / "python3").chmod(0o755)
env = os.environ.copy() env = os.environ.copy()