diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..95f9808 --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +exclude = .git, .venv, output, static/icons +max-line-length = 160 diff --git a/.forgejo/workflows/ci.yml b/.forgejo/workflows/ci.yml new file mode 100644 index 0000000..5cf26be --- /dev/null +++ b/.forgejo/workflows/ci.yml @@ -0,0 +1,151 @@ +name: CI + +on: + push: + pull_request: + workflow_dispatch: + +jobs: + ci: + name: Lint, test, and build + # This label must match your Forgejo runner's label + runs-on: docker + # Use a clean Debian container so tools are predictable + container: debian:stable-slim + env: + PYTHONDONTWRITEBYTECODE: "1" + PIP_DISABLE_PIP_VERSION_CHECK: "1" + UV_SYSTEM_PYTHON: "1" + steps: + - name: Install build tooling + run: | + set -euo pipefail + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + git ca-certificates python3 python3-venv python3-pip python3-setuptools \ + python3-wheel sqlite3 + update-ca-certificates || true + + - name: Checkout repository (manual) + run: | + set -euo pipefail + if [ -f Makefile ] || [ -d .git ]; then + echo "Repository present in workspace; skipping clone" + exit 0 + fi + REMOTE_URL="${CI_REPOSITORY_URL:-}" + if [ -z "$REMOTE_URL" ]; then + if [ -n "${GITHUB_SERVER_URL:-}" ] && [ -n "${GITHUB_REPOSITORY:-}" ]; then + REMOTE_URL="${GITHUB_SERVER_URL%/}/${GITHUB_REPOSITORY}.git" + elif [ -n "${GITHUB_REPOSITORY:-}" ]; then + REMOTE_URL="https://git.jordanwages.com/${GITHUB_REPOSITORY}.git" + else + echo "Unable to determine repository URL from CI environment" >&2 + exit 1 + fi + fi + AUTH_URL="$REMOTE_URL" + if [ -n "${GITHUB_TOKEN:-}" ]; then + ACTOR="${GITHUB_ACTOR:-oauth2}" + AUTH_URL=$(printf '%s' "$REMOTE_URL" | sed -E "s#^https://#https://${ACTOR}:${GITHUB_TOKEN}@#") + fi + echo "Cloning from: $REMOTE_URL" + if ! git clone --depth 1 "$AUTH_URL" .; then + echo "Auth clone failed; trying anonymous clone..." >&2 + git clone --depth 1 "$REMOTE_URL" . + fi + if [ -n "${GITHUB_SHA:-}" ]; then + git fetch --depth 1 origin "$GITHUB_SHA" || true + git checkout -q "$GITHUB_SHA" || true + elif [ -n "${GITHUB_REF_NAME:-}" ]; then + git fetch --depth 1 origin "$GITHUB_REF_NAME" || true + git checkout -q "$GITHUB_REF_NAME" || true + fi + + - name: Set up venv and install deps + run: | + set -euo pipefail + # Prefer persistent cache if runner provides /cache + USE_CACHE=0 + if [ -d /cache ] && [ -w /cache ]; then + export PIP_CACHE_DIR=/cache/pip + mkdir -p "$PIP_CACHE_DIR" + REQ_HASH=$(sha256sum requirements.txt | awk '{print $1}') + PYVER=$(python3 -c 'import sys;print(".".join(map(str, sys.version_info[:2])))') + CACHE_VENV="/cache/venv-${REQ_HASH}-py${PYVER}" + if [ ! -f "$CACHE_VENV/bin/activate" ]; then + echo "Preparing cached virtualenv: $CACHE_VENV" + rm -rf "$CACHE_VENV" || true + python3 -m venv "$CACHE_VENV" + fi + ln -sfn "$CACHE_VENV" .venv + USE_CACHE=1 + else + # Fallback to local venv + python3 -m venv .venv + fi + + # If the link didn't produce an activate file, fallback to local venv + if [ ! -f .venv/bin/activate ]; then + echo "Cached venv missing; creating local .venv" + rm -f .venv + python3 -m venv .venv + USE_CACHE=0 + fi + + . .venv/bin/activate + python -m pip install --upgrade pip + if [ "$USE_CACHE" = "1" ]; then + # Ensure required packages are present; pip will use cache + pip install -r requirements.txt pytest || pip install -r requirements.txt pytest + else + pip install -r requirements.txt pytest + fi + + - name: Format check (black) + run: | + . .venv/bin/activate + black --check . + + - name: Lint (flake8) + run: | + . .venv/bin/activate + flake8 . + + - name: Run tests (pytest) + run: | + . .venv/bin/activate + export PYTHONPATH="$(pwd)${PYTHONPATH:+:$PYTHONPATH}" + pytest -q --maxfail=1 + + - name: Build sample reports (no artifact upload) + run: | + set -euo pipefail + . .venv/bin/activate + python - <<'PY' + import sqlite3, pathlib + db = pathlib.Path('database/ngxstat.db') + db.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(db) + cur = conn.cursor() + cur.execute('''CREATE TABLE IF NOT EXISTS logs ( + id INTEGER PRIMARY KEY, + ip TEXT, + host TEXT, + time TEXT, + request TEXT, + status INTEGER, + bytes_sent INTEGER, + referer TEXT, + user_agent TEXT, + cache_status TEXT + )''') + cur.execute("INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES ('127.0.0.1','example.com','2024-01-01 10:00:00','GET / HTTP/1.1',200,100,'-','curl','MISS')") + cur.execute("INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES ('127.0.0.1','example.com','2024-01-01 10:05:00','GET /about HTTP/1.1',200,100,'-','curl','MISS')") + conn.commit(); conn.close() + PY + python scripts/generate_reports.py global + python scripts/generate_reports.py hourly + python scripts/generate_reports.py index + tar -czf ngxstat-reports.tar.gz -C output . + echo "Built sample reports archive: ngxstat-reports.tar.gz" diff --git a/AGENTS.md b/AGENTS.md index ab65e99..7e7d3c5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -21,8 +21,11 @@ This document outlines general practices and expectations for AI agents assistin source .venv/bin/activate pip install -r requirements.txt ``` - The `init.sh` script can create this environment automatically. Always - activate it before running scripts or tests. + The `run-import.sh` script can initialize this environment automatically. + Always activate the virtual environment before running scripts or tests. + +* Before committing code run `black` for consistent formatting and execute + the test suite with `pytest`. All tests should pass. * Dependency management: Use `requirements.txt` or `pip-tools` * Use standard libraries where feasible (e.g., `sqlite3`, `argparse`, `datetime`) @@ -39,13 +42,19 @@ This document outlines general practices and expectations for AI agents assistin * Use latest CDN version for embedded dashboards * Charts should be rendered from pre-generated JSON blobs in `/json/` +### Tables: DataTables + +* Use DataTables via CDN for reports with `chart: table` +* Requires jQuery from a CDN +* Table data comes from the same `/json/` files as charts + ### Styling: Bulma CSS * Use via CDN or vendored minified copy (to keep reports fully static) * Stick to default components (columns, cards, buttons, etc.) * No JS dependencies from Bulma -### Icon Set: [Feather Icons (CC0)](https://feathericons.com/) +### Icon Set: [Free CC0 Icons (CC0)](https://cc0-icons.jonh.eu/) * License: MIT / CC0-like * Use SVG versions @@ -83,6 +92,14 @@ ngxstat/ If uncertain, the agent should prompt the human for clarification before making architectural assumptions. +## Testing + +Use `pytest` for automated tests. Run the suite from an activated virtual environment and ensure all tests pass before committing: + +```bash +pytest -q +``` + --- ## Future Capabilities @@ -100,3 +117,4 @@ As the project matures, agents may also: * **2025-07-17**: Initial version by Jordan + ChatGPT * **2025-07-17**: Expanded virtual environment usage guidance + diff --git a/README.md b/README.md index bc2db2d..ac601fc 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,16 @@ # ngxstat -Per-domain Nginx log analytics with hybrid static reports and live insights. -## Generating Reports +`ngxstat` is a lightweight log analytics toolkit for Nginx. It imports access +logs into an SQLite database and renders static dashboards so you can explore +per-domain metrics without running a heavy backend service. -Use the `generate_reports.py` script to build aggregated JSON and HTML files from `database/ngxstat.db`. +## Requirements -Create a virtual environment and install dependencies: +* Python 3.10+ +* Access to the Nginx log files (default: `/var/log/nginx`) + +The helper scripts create a virtual environment on first run, but you can also +set one up manually: ```bash python3 -m venv .venv @@ -13,13 +18,95 @@ source .venv/bin/activate pip install -r requirements.txt ``` -Then run one or more of the interval commands: +## Importing Logs + +Run the importer to ingest new log entries into `database/ngxstat.db`: + +```bash +./run-import.sh +``` + +Rotated logs are processed in order and only entries newer than the last +imported timestamp are added. + +## Generating Reports + +To build the HTML dashboard and JSON data files use `run-reports.sh` which runs +all intervals in one go: + +```bash +./run-reports.sh +``` + +The script calls `scripts/generate_reports.py` internally to create hourly, +daily, weekly and monthly reports, then writes analysis JSON files used by the +"Analysis" tab. Per-domain reports are written under `output/domains/` +alongside the aggregate data. Open `output/index.html` in a browser to view the +dashboard. + +If you prefer to run individual commands you can invoke the generator directly: ```bash python scripts/generate_reports.py hourly -python scripts/generate_reports.py daily -python scripts/generate_reports.py weekly -python scripts/generate_reports.py monthly +python scripts/generate_reports.py daily --all-domains ``` -Reports are written under the `output/` directory. Each command updates the corresponding `.json` file and produces an HTML dashboard using Chart.js. +## Analysis Helpers + +`run-analysis.sh` executes additional utilities that examine the database for +missing domains, caching opportunities and potential threats. The JSON output is +saved under `output/analysis` and appears in the "Analysis" tab. The +`run-reports.sh` script also generates these JSON files as part of the build. + +## UX Controls + +The dashboard defaults to a 7‑day window for time series. Your view preferences +persist locally in the browser under the `ngxstat-state-v2` key. Use the +"Reset view" button to clear saved state and restore defaults. + +```bash +./run-analysis.sh +``` + +## Serving the Reports + +The generated files are static. You can serve them with a simple Nginx block: + +```nginx +server { + listen 80; + server_name example.com; + root /path/to/ngxstat/output; + + location / { + try_files $uri $uri/ =404; + } +} +``` + +Restrict access if the reports should not be public. + +## Running Tests + +Install the development dependencies and execute the suite with `pytest`: + +```bash +pip install -r requirements.txt +pytest -q +``` + +All tests must pass before submitting changes. + +## Acknowledgements + +ngxstat uses the following third‑party resources: + +* [Chart.js](https://www.chartjs.org/) for charts +* [DataTables](https://datatables.net/) and [jQuery](https://jquery.com/) for table views +* [Bulma CSS](https://bulma.io/) for styling +* Icons from [Free CC0 Icons](https://cc0-icons.jonh.eu/) by Jon Hicks (CC0 / MIT) +* [Typer](https://typer.tiangolo.com/) for the command-line interface +* [Jinja2](https://palletsprojects.com/p/jinja/) for templating + +The project is licensed under the GPLv3. Icon assets remain in the public domain +via the CC0 license. diff --git a/init.sh b/init.sh deleted file mode 100755 index d951d8d..0000000 --- a/init.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -set -e - -echo "[INFO] Creating virtual environment..." -python3 -m venv .venv -source .venv/bin/activate - -echo "[INFO] Installing dependencies..." -pip install --upgrade pip -pip install -r requirements.txt || echo "[WARN] requirements.txt not found, skipping." - -echo "[INFO] Running database setup..." -python scripts/init_db.py diff --git a/reports.yml b/reports.yml new file mode 100644 index 0000000..709d686 --- /dev/null +++ b/reports.yml @@ -0,0 +1,213 @@ +- name: hits + label: Hits + icon: pulse + chart: line + bucket: time_bucket + bucket_label: Time + query: | + SELECT {bucket} AS time_bucket, + COUNT(*) AS value + FROM logs + GROUP BY time_bucket + ORDER BY time_bucket + +- name: error_rate + label: Error Rate (%) + icon: file-alert + chart: line + bucket: time_bucket + bucket_label: Time + query: | + SELECT {bucket} AS time_bucket, + SUM(CASE WHEN status BETWEEN 400 AND 599 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value + FROM logs + GROUP BY time_bucket + ORDER BY time_bucket + +- name: cache_status_breakdown + label: Cache Status + icon: archive + chart: polarArea + bucket: cache_status + bucket_label: Cache Status + query: | + SELECT cache_status AS cache_status, + COUNT(*) AS value + FROM logs + GROUP BY cache_status + ORDER BY value DESC + colors: + - "#3273dc" + - "#23d160" + - "#ffdd57" + - "#ff3860" + - "#7957d5" + - "#363636" + +- name: domain_traffic + label: Top Domains + icon: globe + chart: table + top_n: 50 + per_domain: false + bucket: domain + bucket_label: Domain + query: | + SELECT host AS domain, + COUNT(*) AS value + FROM logs + GROUP BY domain + ORDER BY value DESC + +- name: bytes_sent + label: Bytes Sent + icon: upload + chart: line + bucket: time_bucket + bucket_label: Time + query: | + SELECT {bucket} AS time_bucket, + SUM(bytes_sent) AS value + FROM logs + GROUP BY time_bucket + ORDER BY time_bucket + +- name: top_paths + label: Top Paths + icon: map + chart: table + top_n: 50 + buckets: + - domain + - path + bucket_label: + - Domain + - Path + query: | + WITH paths AS ( + SELECT host AS domain, + substr(substr(request, instr(request, ' ') + 1), 1, + instr(substr(request, instr(request, ' ') + 1), ' ') - 1) AS path + FROM logs + ), ranked AS ( + SELECT domain, path, COUNT(*) AS value, + ROW_NUMBER() OVER (PARTITION BY domain ORDER BY COUNT(*) DESC) AS rn + FROM paths + GROUP BY domain, path + ) + SELECT domain, path, value + FROM ranked + WHERE rn <= 20 + ORDER BY domain, value DESC + +- name: user_agents + label: User Agents + icon: user + chart: table + top_n: 50 + buckets: + - domain + - user_agent + bucket_label: + - Domain + - User Agent + query: | + WITH ua AS ( + SELECT host AS domain, user_agent + FROM logs + ), ranked AS ( + SELECT domain, user_agent, COUNT(*) AS value, + ROW_NUMBER() OVER (PARTITION BY domain ORDER BY COUNT(*) DESC) AS rn + FROM ua + GROUP BY domain, user_agent + ) + SELECT domain, user_agent, value + FROM ranked + WHERE rn <= 20 + ORDER BY domain, value DESC + +- name: referrers + label: Referrers + icon: link + chart: table + top_n: 50 + buckets: + - domain + - referrer + bucket_label: + - Domain + - Referrer + query: | + WITH ref AS ( + SELECT host AS domain, referer AS referrer + FROM logs + ), ranked AS ( + SELECT domain, referrer, COUNT(*) AS value, + ROW_NUMBER() OVER (PARTITION BY domain ORDER BY COUNT(*) DESC) AS rn + FROM ref + GROUP BY domain, referrer + ) + SELECT domain, referrer, value + FROM ranked + WHERE rn <= 20 + ORDER BY domain, value DESC + +- name: status_distribution + label: HTTP Statuses + icon: server + chart: pie + bucket: status_group + bucket_label: Status + query: | + SELECT CASE + WHEN status BETWEEN 200 AND 299 THEN '2xx' + WHEN status BETWEEN 300 AND 399 THEN '3xx' + WHEN status BETWEEN 400 AND 499 THEN '4xx' + ELSE '5xx' + END AS status_group, + COUNT(*) AS value + FROM logs + GROUP BY status_group + ORDER BY status_group + colors: + - "#48c78e" + - "#209cee" + - "#ffdd57" + - "#f14668" + +# New time-series: status classes over time (stacked) +- name: status_classes_timeseries + label: Status Classes Over Time + icon: server + chart: stackedBar + bucket: time_bucket + bucket_label: Time + stacked: true + query: | + SELECT {bucket} AS time_bucket, + SUM(CASE WHEN status BETWEEN 200 AND 299 THEN 1 ELSE 0 END) AS "2xx", + SUM(CASE WHEN status BETWEEN 300 AND 399 THEN 1 ELSE 0 END) AS "3xx", + SUM(CASE WHEN status BETWEEN 400 AND 499 THEN 1 ELSE 0 END) AS "4xx", + SUM(CASE WHEN status BETWEEN 500 AND 599 THEN 1 ELSE 0 END) AS "5xx", + COUNT(*) AS total + FROM logs + GROUP BY time_bucket + ORDER BY time_bucket + +# New time-series: cache status over time (compact Hit/Miss; exclude '-' by default) +- name: cache_status_timeseries + label: Cache Status Over Time + icon: archive + chart: stackedBar + bucket: time_bucket + bucket_label: Time + stacked: true + exclude_values: ["-"] + query: | + SELECT {bucket} AS time_bucket, + SUM(CASE WHEN cache_status = 'HIT' THEN 1 ELSE 0 END) AS hit, + SUM(CASE WHEN cache_status = 'MISS' THEN 1 ELSE 0 END) AS miss, + COUNT(*) AS total + FROM logs + GROUP BY time_bucket + ORDER BY time_bucket diff --git a/requirements.txt b/requirements.txt index 221e3c8..2678f7b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ Flask # For optional lightweight API server # Linting / formatting (optional but recommended) black flake8 +PyYAML diff --git a/run-analysis.sh b/run-analysis.sh new file mode 100755 index 0000000..4149b9a --- /dev/null +++ b/run-analysis.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +set -e + +# Prevent concurrent executions of this script. +LOCK_FILE="/tmp/$(basename "$0").lock" +if [ -e "$LOCK_FILE" ]; then + echo "[WARN] $(basename "$0") is already running (lock file present)." >&2 + exit 0 +fi +touch "$LOCK_FILE" +trap 'rm -f "$LOCK_FILE"' EXIT + +# Ensure virtual environment exists +if [ ! -d ".venv" ]; then + echo "[INFO] Creating virtual environment..." + python3 -m venv .venv + source .venv/bin/activate + echo "[INFO] Installing dependencies..." + pip install --upgrade pip + if [ -f requirements.txt ]; then + pip install -r requirements.txt + else + echo "[WARN] requirements.txt not found, skipping." + fi +else + echo "[INFO] Activating virtual environment..." + source .venv/bin/activate +fi + +# Run analysis helpers +echo "[INFO] Checking for missing domains..." +python -m scripts.analyze check-missing-domains + +echo "[INFO] Suggesting cache improvements..." +python -m scripts.analyze suggest-cache + +echo "[INFO] Detecting threats..." +python -m scripts.analyze detect-threats + +# Deactivate to keep cron environment clean +if type deactivate >/dev/null 2>&1; then + deactivate +fi diff --git a/run-import.sh b/run-import.sh index d951d8d..3c79d35 100755 --- a/run-import.sh +++ b/run-import.sh @@ -1,13 +1,39 @@ -#!/bin/bash +#!/usr/bin/env bash set -e -echo "[INFO] Creating virtual environment..." -python3 -m venv .venv -source .venv/bin/activate +# Prevent multiple simultaneous runs by using a lock file specific to this +# script. If the lock already exists, assume another instance is running and +# exit gracefully. +LOCK_FILE="/tmp/$(basename "$0").lock" +if [ -e "$LOCK_FILE" ]; then + echo "[WARN] $(basename "$0") is already running (lock file present)." >&2 + exit 0 +fi +touch "$LOCK_FILE" +trap 'rm -f "$LOCK_FILE"' EXIT -echo "[INFO] Installing dependencies..." -pip install --upgrade pip -pip install -r requirements.txt || echo "[WARN] requirements.txt not found, skipping." +# Ensure virtual environment exists +if [ ! -d ".venv" ]; then + echo "[INFO] Creating virtual environment..." + python3 -m venv .venv + source .venv/bin/activate + echo "[INFO] Installing dependencies..." + pip install --upgrade pip + if [ -f requirements.txt ]; then + pip install -r requirements.txt + else + echo "[WARN] requirements.txt not found, skipping." + fi +else + echo "[INFO] Activating virtual environment..." + source .venv/bin/activate +fi -echo "[INFO] Running database setup..." +# Run log import +echo "[INFO] Importing logs..." python scripts/init_db.py + +# Deactivate to keep cron environment clean +if type deactivate >/dev/null 2>&1; then + deactivate +fi diff --git a/run-reports.sh b/run-reports.sh new file mode 100755 index 0000000..f7cffba --- /dev/null +++ b/run-reports.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +set -e + +# Prevent concurrent executions of this script. +LOCK_FILE="/tmp/$(basename "$0").lock" +if [ -e "$LOCK_FILE" ]; then + echo "[WARN] $(basename "$0") is already running (lock file present)." >&2 + exit 0 +fi +touch "$LOCK_FILE" +trap 'rm -f "$LOCK_FILE"' EXIT + +# Ensure virtual environment exists +if [ ! -d ".venv" ]; then + echo "[INFO] Creating virtual environment..." + python3 -m venv .venv + source .venv/bin/activate + echo "[INFO] Installing dependencies..." + pip install --upgrade pip + if [ -f requirements.txt ]; then + pip install -r requirements.txt + else + echo "[WARN] requirements.txt not found, skipping." + fi +else + echo "[INFO] Activating virtual environment..." + source .venv/bin/activate +fi + +# Generate reports for all domains combined +echo "[INFO] Generating aggregate reports..." +python -m scripts.generate_reports hourly +python -m scripts.generate_reports daily +python -m scripts.generate_reports weekly +python -m scripts.generate_reports monthly +python -m scripts.generate_reports global + +# Generate reports for each individual domain +echo "[INFO] Generating per-domain reports..." +python -m scripts.generate_reports hourly --all-domains +python -m scripts.generate_reports daily --all-domains +python -m scripts.generate_reports weekly --all-domains +python -m scripts.generate_reports monthly --all-domains + +# Generate analysis JSON +echo "[INFO] Generating analysis files..." +python -m scripts.generate_reports analysis + +# Generate root index +python -m scripts.generate_reports index + +# Deactivate to keep cron environment clean +if type deactivate >/dev/null 2>&1; then + deactivate +fi diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..f4c57a1 --- /dev/null +++ b/scripts/__init__.py @@ -0,0 +1 @@ +"Utility package for ngxstat scripts" diff --git a/scripts/analyze.py b/scripts/analyze.py new file mode 100644 index 0000000..9f49978 --- /dev/null +++ b/scripts/analyze.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +"""Utility helpers for ad-hoc log analysis. + +This module exposes small helper functions to inspect the ``ngxstat`` SQLite +database. The intent is to allow quick queries from the command line or other +scripts without rewriting SQL each time. + +Examples +-------- +To list all domains present in the database:: + + python scripts/analyze.py domains + +The CLI is powered by :mod:`typer` and currently only offers a couple of +commands. More analysis routines can be added over time. +""" +from __future__ import annotations + +import sqlite3 +from pathlib import Path +from typing import List, Optional, Set +from datetime import datetime, timedelta + +import json + +import typer + +from scripts import nginx_config # noqa: F401 # imported for side effects/usage + +DB_PATH = Path("database/ngxstat.db") +ANALYSIS_DIR = Path("output/analysis") + +app = typer.Typer(help="Ad-hoc statistics queries") + + +def _connect() -> sqlite3.Connection: + """Return a new SQLite connection to :data:`DB_PATH`.""" + return sqlite3.connect(DB_PATH) + + +def load_domains_from_db() -> List[str]: + """Return a sorted list of distinct domains from the ``logs`` table.""" + conn = _connect() + cur = conn.cursor() + cur.execute("SELECT DISTINCT host FROM logs ORDER BY host") + domains = [row[0] for row in cur.fetchall()] + conn.close() + return domains + + +def get_hit_count(domain: Optional[str] = None) -> int: + """Return total request count. + + Parameters + ---------- + domain: + Optional domain to filter on. If ``None`` the count includes all logs. + """ + conn = _connect() + cur = conn.cursor() + if domain: + cur.execute("SELECT COUNT(*) FROM logs WHERE host = ?", (domain,)) + else: + cur.execute("SELECT COUNT(*) FROM logs") + count = cur.fetchone()[0] or 0 + conn.close() + return count + + +def get_cache_ratio(domain: Optional[str] = None) -> float: + """Return the percentage of requests served from cache.""" + conn = _connect() + cur = conn.cursor() + if domain: + cur.execute( + "SELECT SUM(CASE WHEN cache_status = 'HIT' THEN 1 ELSE 0 END) * 1.0 / " + "COUNT(*) FROM logs WHERE host = ?", + (domain,), + ) + else: + cur.execute( + "SELECT SUM(CASE WHEN cache_status = 'HIT' THEN 1 ELSE 0 END) * 1.0 / " + "COUNT(*) FROM logs" + ) + result = cur.fetchone()[0] + conn.close() + return float(result or 0.0) + + +@app.command() +def domains() -> None: + """Print the list of domains discovered in the database.""" + for d in load_domains_from_db(): + typer.echo(d) + + +@app.command() +def hits(domain: Optional[str] = typer.Option(None, help="Filter by domain")) -> None: + """Show request count.""" + count = get_hit_count(domain) + if domain: + typer.echo(f"{domain}: {count} hits") + else: + typer.echo(f"Total hits: {count}") + + +@app.command("cache-ratio") +def cache_ratio_cmd( + domain: Optional[str] = typer.Option(None, help="Filter by domain") +) -> None: + """Display cache hit ratio as a percentage.""" + ratio = get_cache_ratio(domain) * 100 + if domain: + typer.echo(f"{domain}: {ratio:.2f}% cached") + else: + typer.echo(f"Cache hit ratio: {ratio:.2f}%") + + +@app.command("check-missing-domains") +def check_missing_domains( + json_output: bool = typer.Option( + False, "--json", help="Output missing domains as JSON" + ) +) -> None: + """Show domains present in the database but absent from Nginx config.""" + try: + from scripts.generate_reports import _get_domains as _db_domains + except Exception: # pragma: no cover - fallback if import fails + _db_domains = load_domains_from_db + + if not isinstance(json_output, bool): + json_output = False + + db_domains = set(_db_domains()) + + paths = nginx_config.discover_configs() + servers = nginx_config.parse_servers(paths) + config_domains: Set[str] = set() + for server in servers: + names = server.get("server_name", "") + for name in names.split(): + if name: + config_domains.add(name) + + missing = sorted(db_domains - config_domains) + + ANALYSIS_DIR.mkdir(parents=True, exist_ok=True) + out_path = ANALYSIS_DIR / "missing_domains.json" + out_path.write_text(json.dumps(missing, indent=2)) + + if json_output: + typer.echo(json.dumps(missing)) + else: + for d in missing: + typer.echo(d) + + +def suggest_cache( + threshold: int = 10, + json_output: bool = False, +) -> None: + """Suggest domain/path pairs that could benefit from caching. + + Paths with at least ``threshold`` ``MISS`` entries are shown for domains + whose server blocks lack a ``proxy_cache`` directive. + """ + + # Discover domains without explicit proxy_cache + paths = nginx_config.discover_configs() + servers = nginx_config.parse_servers(paths) + no_cache: Set[str] = set() + for server in servers: + if "proxy_cache" in server: + continue + for name in server.get("server_name", "").split(): + if name: + no_cache.add(name) + + conn = _connect() + cur = conn.cursor() + cur.execute( + """ + SELECT host, + substr(request, instr(request, ' ')+1, + instr(request, ' HTTP') - instr(request, ' ') - 1) AS path, + COUNT(*) AS miss_count + FROM logs + WHERE cache_status = 'MISS' + GROUP BY host, path + HAVING miss_count >= ? + ORDER BY miss_count DESC + """, + (int(threshold),), + ) + + rows = [r for r in cur.fetchall() if r[0] in no_cache] + conn.close() + + result = [ + {"host": host, "path": path, "misses": count} for host, path, count in rows + ] + + ANALYSIS_DIR.mkdir(parents=True, exist_ok=True) + out_path = ANALYSIS_DIR / "cache_suggestions.json" + out_path.write_text(json.dumps(result, indent=2)) + + if json_output: + typer.echo(json.dumps(result)) + else: + for item in result: + typer.echo(f"{item['host']} {item['path']} {item['misses']}") + +@app.command("suggest-cache") +def suggest_cache_cli( + threshold: int = typer.Option(10, help="Minimum number of MISS entries to report"), + json_output: bool = typer.Option(False, "--json", help="Output results as JSON"), +) -> None: + """CLI wrapper for suggest_cache.""" + suggest_cache(threshold=threshold, json_output=json_output) + + +def detect_threats( + hours: int = 1, + ip_threshold: int = 100, +) -> None: + """Detect potential security threats from recent logs.""" + + conn = _connect() + cur = conn.cursor() + + cur.execute("SELECT MAX(time) FROM logs") + row = cur.fetchone() + if not row or not row[0]: + typer.echo("No logs found") + conn.close() + return + + max_dt = datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S") + recent_end = max_dt + recent_start = recent_end - timedelta(hours=int(hours)) + prev_start = recent_start - timedelta(hours=int(hours)) + prev_end = recent_start + + fmt = "%Y-%m-%d %H:%M:%S" + recent_start_s = recent_start.strftime(fmt) + recent_end_s = recent_end.strftime(fmt) + prev_start_s = prev_start.strftime(fmt) + prev_end_s = prev_end.strftime(fmt) + + cur.execute( + """ + SELECT host, + SUM(CASE WHEN status >= 400 THEN 1 ELSE 0 END) AS errors, + COUNT(*) AS total + FROM logs + WHERE time >= ? AND time < ? + GROUP BY host + """, + (recent_start_s, recent_end_s), + ) + recent_rows = {r[0]: (r[1], r[2]) for r in cur.fetchall()} + + cur.execute( + """ + SELECT host, + SUM(CASE WHEN status >= 400 THEN 1 ELSE 0 END) AS errors, + COUNT(*) AS total + FROM logs + WHERE time >= ? AND time < ? + GROUP BY host + """, + (prev_start_s, prev_end_s), + ) + prev_rows = {r[0]: (r[1], r[2]) for r in cur.fetchall()} + + error_spikes = [] + for host in set(recent_rows) | set(prev_rows): + r_err, r_total = recent_rows.get(host, (0, 0)) + p_err, p_total = prev_rows.get(host, (0, 0)) + r_rate = r_err * 100.0 / r_total if r_total else 0.0 + p_rate = p_err * 100.0 / p_total if p_total else 0.0 + if r_rate >= 10 and r_rate >= p_rate * 2: + error_spikes.append( + { + "host": host, + "recent_error_rate": round(r_rate, 2), + "previous_error_rate": round(p_rate, 2), + } + ) + + cur.execute( + """ + SELECT DISTINCT user_agent FROM logs + WHERE time >= ? AND time < ? + """, + (prev_start_s, prev_end_s), + ) + prev_agents = {r[0] for r in cur.fetchall()} + + cur.execute( + """ + SELECT user_agent, COUNT(*) AS c + FROM logs + WHERE time >= ? AND time < ? + GROUP BY user_agent + HAVING c >= 10 + """, + (recent_start_s, recent_end_s), + ) + suspicious_agents = [ + {"user_agent": ua, "requests": cnt} + for ua, cnt in cur.fetchall() + if ua not in prev_agents + ] + + cur.execute( + """ + SELECT ip, COUNT(*) AS c + FROM logs + WHERE time >= ? AND time < ? + GROUP BY ip + HAVING c >= ? + ORDER BY c DESC + """, + (recent_start_s, recent_end_s, ip_threshold), + ) + high_ip_requests = [{"ip": ip, "requests": cnt} for ip, cnt in cur.fetchall()] + + conn.close() + + report = { + "time_range": { + "recent_start": recent_start_s, + "recent_end": recent_end_s, + "previous_start": prev_start_s, + "previous_end": prev_end_s, + }, + "error_spikes": error_spikes, + "suspicious_agents": suspicious_agents, + "high_ip_requests": high_ip_requests, + } + + ANALYSIS_DIR.mkdir(parents=True, exist_ok=True) + out_path = ANALYSIS_DIR / "threat_report.json" + out_path.write_text(json.dumps(report, indent=2)) + typer.echo(json.dumps(report)) + +@app.command("detect-threats") +def detect_threats_cli( + hours: int = typer.Option(1, help="Number of recent hours to analyze"), + ip_threshold: int = typer.Option(100, help="Requests from a single IP to flag"), +) -> None: + """CLI wrapper for detect_threats.""" + detect_threats(hours=hours, ip_threshold=ip_threshold) + + +if __name__ == "__main__": + app() diff --git a/scripts/download_icons.py b/scripts/download_icons.py new file mode 100644 index 0000000..6f4675a --- /dev/null +++ b/scripts/download_icons.py @@ -0,0 +1,28 @@ +import json +from urllib.request import urlopen, Request +from pathlib import Path + +ICON_LIST_URL = "https://cc0-icons.jonh.eu/icons.json" +BASE_URL = "https://cc0-icons.jonh.eu/" + +OUTPUT_DIR = Path(__file__).resolve().parent.parent / "static" / "icons" + + +def main() -> None: + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + req = Request(ICON_LIST_URL, headers={"User-Agent": "Mozilla/5.0"}) + with urlopen(req) as resp: + data = json.load(resp) + icons = data.get("icons", []) + for icon in icons: + slug = icon.get("slug") + url = BASE_URL + icon.get("url") + path = OUTPUT_DIR / f"{slug}.svg" + req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) + with urlopen(req) as resp: + path.write_bytes(resp.read()) + print(f"Downloaded {len(icons)} icons to {OUTPUT_DIR}") + + +if __name__ == "__main__": + main() diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index b244075..d3c2f8a 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -1,79 +1,466 @@ import json +import sys import sqlite3 from pathlib import Path -from typing import List, Dict +import shutil +from typing import List, Dict, Optional +from datetime import datetime, timezone +import time + +import yaml import typer from jinja2 import Environment, FileSystemLoader +# Ensure project root is importable when running as a script (python scripts/generate_reports.py) +PROJECT_ROOT = Path(__file__).resolve().parent.parent +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + DB_PATH = Path("database/ngxstat.db") OUTPUT_DIR = Path("output") TEMPLATE_DIR = Path("templates") +REPORT_CONFIG = Path("reports.yml") +GENERATED_MARKER = OUTPUT_DIR / "generated.txt" + +# Mapping of interval names to SQLite strftime formats. These strings are +# substituted into report queries whenever the special ``{bucket}`` token is +# present so that a single report definition can be reused for multiple +# intervals. +INTERVAL_FORMATS = { + "hourly": "%Y-%m-%d %H:00:00", + "daily": "%Y-%m-%d", + "weekly": "%Y-%W", + "monthly": "%Y-%m", +} app = typer.Typer(help="Generate aggregated log reports") -def _load_existing(path: Path) -> List[Dict]: - if path.exists(): - try: - return json.loads(path.read_text()) - except Exception: - return [] - return [] + +@app.callback() +def _cli_callback(ctx: typer.Context) -> None: + """Register post-command hook to note generation time.""" + + def _write_marker() -> None: + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + # Use timezone-aware UTC to avoid deprecation warnings and ambiguity + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + GENERATED_MARKER.write_text(f"{timestamp}\n") + + ctx.call_on_close(_write_marker) + + +def _get_domains() -> List[str]: + """Return a sorted list of unique domains from the logs table.""" + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + cur.execute("SELECT DISTINCT host FROM logs ORDER BY host") + domains = [row[0] for row in cur.fetchall()] + conn.close() + return domains + + +def _load_config() -> List[Dict]: + if not REPORT_CONFIG.exists(): + typer.echo(f"Config file not found: {REPORT_CONFIG}") + raise typer.Exit(1) + with REPORT_CONFIG.open("r") as fh: + data = yaml.safe_load(fh) or [] + if not isinstance(data, list): + typer.echo("reports.yml must contain a list of report definitions") + raise typer.Exit(1) + return data + def _save_json(path: Path, data: List[Dict]) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text(json.dumps(data, indent=2)) -def _render_html(interval: str, json_name: str, out_path: Path) -> None: + +def _copy_icons() -> None: + """Copy vendored icons and scripts to the output directory.""" + src_dir = Path("static/icons") + dst_dir = OUTPUT_DIR / "icons" + if src_dir.is_dir(): + dst_dir.mkdir(parents=True, exist_ok=True) + for icon in src_dir.glob("*.svg"): + shutil.copy(icon, dst_dir / icon.name) + + js_src = Path("static/chartManager.js") + if js_src.is_file(): + shutil.copy(js_src, OUTPUT_DIR / js_src.name) + + +def _render_snippet(report: Dict, out_dir: Path) -> None: + """Render a single report snippet to ``.html`` inside ``out_dir``.""" env = Environment(loader=FileSystemLoader(TEMPLATE_DIR)) - template = env.get_template("report.html") - out_path.write_text(template.render(interval=interval, json_path=json_name)) + template = env.get_template("report_snippet.html") + snippet_path = out_dir / f"{report['name']}.html" + snippet_path.write_text(template.render(report=report)) -def _aggregate(interval: str, fmt: str) -> None: - json_path = OUTPUT_DIR / f"{interval}.json" - html_path = OUTPUT_DIR / f"{interval}.html" - existing = _load_existing(json_path) - last_bucket = existing[-1]["bucket"] if existing else None +def _write_stats( + generated_at: Optional[str] = None, generation_seconds: Optional[float] = None +) -> None: + """Query basic dataset stats and write them to ``output/global/stats.json``.""" + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + cur.execute("SELECT COUNT(*) FROM logs") + total_logs = cur.fetchone()[0] or 0 + + cur.execute("SELECT MIN(time), MAX(time) FROM logs") + row = cur.fetchone() or (None, None) + start_date = row[0] or "" + end_date = row[1] or "" + + cur.execute("SELECT COUNT(DISTINCT host) FROM logs") + unique_domains = cur.fetchone()[0] or 0 + + conn.close() + + stats = { + "total_logs": total_logs, + "start_date": start_date, + "end_date": end_date, + "unique_domains": unique_domains, + } + if generated_at: + stats["generated_at"] = generated_at + if generation_seconds is not None: + stats["generation_seconds"] = generation_seconds + + out_path = OUTPUT_DIR / "global" / "stats.json" + _save_json(out_path, stats) + + +def _bucket_expr(interval: str) -> str: + """Return the SQLite strftime expression for the given interval.""" + fmt = INTERVAL_FORMATS.get(interval) + if not fmt: + typer.echo(f"Unsupported interval: {interval}") + raise typer.Exit(1) + return f"strftime('{fmt}', datetime(time))" + + +def _generate_interval(interval: str, domain: Optional[str] = None) -> None: + cfg = _load_config() + if not cfg: + typer.echo("No report definitions found") + return + + _copy_icons() + + bucket = _bucket_expr(interval) conn = sqlite3.connect(DB_PATH) cur = conn.cursor() - query = f"SELECT strftime('{fmt}', datetime(time)) as bucket, COUNT(*) as hits FROM logs" - params = [] - if last_bucket: - query += " WHERE datetime(time) > datetime(?)" - params.append(last_bucket) - query += " GROUP BY bucket ORDER BY bucket" + # Create a temporary view so queries can easily be filtered by domain + cur.execute("DROP VIEW IF EXISTS logs_view") + if domain: + # Parameters are not allowed in CREATE VIEW statements, so we must + # safely interpolate the domain value ourselves. Escape any single + # quotes to prevent malformed queries. + safe_domain = domain.replace("'", "''") + cur.execute( + f"CREATE TEMP VIEW logs_view AS SELECT * FROM logs WHERE host = '{safe_domain}'" + ) + out_dir = OUTPUT_DIR / "domains" / domain / interval + else: + cur.execute("CREATE TEMP VIEW logs_view AS SELECT * FROM logs") + out_dir = OUTPUT_DIR / interval - rows = cur.execute(query, params).fetchall() - for bucket, hits in rows: - existing.append({"bucket": bucket, "hits": hits}) + out_dir.mkdir(parents=True, exist_ok=True) + + report_list = [] + for definition in cfg: + if "{bucket}" not in definition["query"] or definition.get("global"): + # Global reports are generated separately + continue + if domain and not definition.get("per_domain", True): + # Skip reports marked as not applicable to per-domain runs + continue + + name = definition["name"] + query = definition["query"].replace("{bucket}", bucket) + query = query.replace("FROM logs", "FROM logs_view") + # Apply top_n limit for tables (performance-friendly), if configured + top_n = definition.get("top_n") + chart_type = definition.get("chart", "line") + if top_n and chart_type == "table": + try: + n = int(top_n) + if "LIMIT" not in query.upper(): + query = f"{query}\nLIMIT {n}" + except Exception: + pass + cur.execute(query) + rows = cur.fetchall() + headers = [c[0] for c in cur.description] + data = [dict(zip(headers, row)) for row in rows] + json_path = out_dir / f"{name}.json" + _save_json(json_path, data) + entry = { + "name": name, + "label": definition.get("label", name.title()), + "chart": definition.get("chart", "line"), + "json": f"{name}.json", + "html": f"{name}.html", + } + if "icon" in definition: + entry["icon"] = definition["icon"] + if "bucket" in definition: + entry["bucket"] = definition["bucket"] + if "buckets" in definition: + entry["buckets"] = definition["buckets"] + if "bucket_label" in definition: + entry["bucket_label"] = definition["bucket_label"] + if "color" in definition: + entry["color"] = definition["color"] + if "colors" in definition: + entry["colors"] = definition["colors"] + # Optional UX metadata passthrough for frontend-only transforms + for key in ( + "windows_supported", + "window_default", + "group_others_threshold", + "exclude_values", + "top_n", + "stacked", + "palette", + ): + if key in definition: + entry[key] = definition[key] + _render_snippet(entry, out_dir) + report_list.append(entry) + + _save_json(out_dir / "reports.json", report_list) + if domain: + typer.echo(f"Generated {interval} reports for {domain}") + else: + typer.echo(f"Generated {interval} reports") + + +def _generate_all_domains(interval: str) -> None: + """Generate reports for each unique domain.""" + for domain in _get_domains(): + _generate_interval(interval, domain) + + +def _generate_root_index() -> None: + """Render the top-level index listing all intervals and domains.""" + _copy_icons() + intervals = sorted( + [name for name in INTERVAL_FORMATS if (OUTPUT_DIR / name).is_dir()] + ) + + domains_dir = OUTPUT_DIR / "domains" + domains: List[str] = [] + if domains_dir.is_dir(): + domains = [p.name for p in domains_dir.iterdir() if p.is_dir()] + domains.sort() + + env = Environment(loader=FileSystemLoader(TEMPLATE_DIR)) + template = env.get_template("index.html") + + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + out_path = OUTPUT_DIR / "index.html" + out_path.write_text(template.render(intervals=intervals, domains=domains)) + typer.echo(f"Generated root index at {out_path}") + + +def _generate_global() -> None: + """Generate reports that do not depend on an interval.""" + cfg = _load_config() + if not cfg: + typer.echo("No report definitions found") + return + + start_time = time.time() + # Use timezone-aware UTC for generated_at (string remains unchanged format) + generated_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + + _copy_icons() + + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + out_dir = OUTPUT_DIR / "global" + out_dir.mkdir(parents=True, exist_ok=True) + + report_list = [] + for definition in cfg: + if "{bucket}" in definition["query"] and not definition.get("global"): + continue + + name = definition["name"] + query = definition["query"] + # Apply top_n limit for tables (performance-friendly), if configured + top_n = definition.get("top_n") + chart_type = definition.get("chart", "line") + if top_n and chart_type == "table": + try: + n = int(top_n) + if "LIMIT" not in query.upper(): + query = f"{query}\nLIMIT {n}" + except Exception: + pass + cur.execute(query) + rows = cur.fetchall() + headers = [c[0] for c in cur.description] + data = [dict(zip(headers, row)) for row in rows] + json_path = out_dir / f"{name}.json" + _save_json(json_path, data) + entry = { + "name": name, + "label": definition.get("label", name.title()), + "chart": definition.get("chart", "line"), + "json": f"{name}.json", + "html": f"{name}.html", + } + if "icon" in definition: + entry["icon"] = definition["icon"] + if "bucket" in definition: + entry["bucket"] = definition["bucket"] + if "buckets" in definition: + entry["buckets"] = definition["buckets"] + if "bucket_label" in definition: + entry["bucket_label"] = definition["bucket_label"] + if "color" in definition: + entry["color"] = definition["color"] + if "colors" in definition: + entry["colors"] = definition["colors"] + # Optional UX metadata passthrough for frontend-only transforms + for key in ( + "windows_supported", + "window_default", + "group_others_threshold", + "exclude_values", + "top_n", + "stacked", + "palette", + ): + if key in definition: + entry[key] = definition[key] + _render_snippet(entry, out_dir) + report_list.append(entry) + + _save_json(out_dir / "reports.json", report_list) + elapsed = round(time.time() - start_time, 2) + _write_stats(generated_at, elapsed) + typer.echo("Generated global reports") + + +def _generate_analysis() -> None: + """Generate analysis JSON files consumed by the Analysis tab.""" + try: + # Import lazily to avoid circulars and keep dependencies optional + from scripts import analyze + except Exception as exc: # pragma: no cover - defensive + typer.echo(f"Failed to import analysis module: {exc}") + return + + # Ensure output root and icons present for parity + _copy_icons() + + # These commands write JSON files under output/analysis/ + try: + analyze.check_missing_domains(json_output=True) + except Exception as exc: # pragma: no cover - continue best-effort + typer.echo(f"check_missing_domains failed: {exc}") + try: + analyze.suggest_cache(json_output=True) + except Exception as exc: # pragma: no cover + typer.echo(f"suggest_cache failed: {exc}") + try: + analyze.detect_threats() + except Exception as exc: # pragma: no cover + typer.echo(f"detect_threats failed: {exc}") + typer.echo("Generated analysis JSON files") - existing.sort(key=lambda x: x["bucket"]) - _save_json(json_path, existing) - _render_html(interval, json_path.name, html_path) - typer.echo(f"Generated {json_path} and {html_path}") @app.command() -def hourly() -> None: - """Aggregate logs into hourly buckets.""" - _aggregate("hourly", "%Y-%m-%d %H:00:00") +def hourly( + domain: Optional[str] = typer.Option( + None, help="Generate reports for a specific domain" + ), + all_domains: bool = typer.Option( + False, "--all-domains", help="Generate reports for each domain" + ), +) -> None: + """Generate hourly reports.""" + if all_domains: + _generate_all_domains("hourly") + else: + _generate_interval("hourly", domain) + @app.command() -def daily() -> None: - """Aggregate logs into daily buckets.""" - _aggregate("daily", "%Y-%m-%d") +def daily( + domain: Optional[str] = typer.Option( + None, help="Generate reports for a specific domain" + ), + all_domains: bool = typer.Option( + False, "--all-domains", help="Generate reports for each domain" + ), +) -> None: + """Generate daily reports.""" + if all_domains: + _generate_all_domains("daily") + else: + _generate_interval("daily", domain) + @app.command() -def weekly() -> None: - """Aggregate logs into weekly buckets.""" - _aggregate("weekly", "%Y-%W") +def weekly( + domain: Optional[str] = typer.Option( + None, help="Generate reports for a specific domain" + ), + all_domains: bool = typer.Option( + False, "--all-domains", help="Generate reports for each domain" + ), +) -> None: + """Generate weekly reports.""" + if all_domains: + _generate_all_domains("weekly") + else: + _generate_interval("weekly", domain) + @app.command() -def monthly() -> None: - """Aggregate logs into monthly buckets.""" - _aggregate("monthly", "%Y-%m") +def monthly( + domain: Optional[str] = typer.Option( + None, help="Generate reports for a specific domain" + ), + all_domains: bool = typer.Option( + False, "--all-domains", help="Generate reports for each domain" + ), +) -> None: + """Generate monthly reports.""" + if all_domains: + _generate_all_domains("monthly") + else: + _generate_interval("monthly", domain) + + +@app.command("global") +def global_reports() -> None: + """Generate global reports.""" + _generate_global() + + +@app.command() +def analysis() -> None: + """Generate analysis JSON files for the Analysis tab.""" + _generate_analysis() + + +@app.command() +def index() -> None: + """Generate the root index page linking all reports.""" + _generate_root_index() + if __name__ == "__main__": app() diff --git a/scripts/init_db.py b/scripts/init_db.py index 8a3a89c..b9ea07d 100644 --- a/scripts/init_db.py +++ b/scripts/init_db.py @@ -1,23 +1,25 @@ #!/usr/bin/env python3 import os -import sqlite3 import re -from pathlib import Path +import sqlite3 +from datetime import datetime, timezone LOG_DIR = "/var/log/nginx" DB_FILE = "database/ngxstat.db" -LOG_FILE_PATTERN = re.compile(r'access\.log(\.\d+)?$') +LOG_FILE_PATTERN = re.compile(r"access\.log(\.\d+)?$") LOG_FORMAT_REGEX = re.compile( r'(?P\S+) - (?P\S+) \[(?P