diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 95f9808..0000000 --- a/.flake8 +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -exclude = .git, .venv, output, static/icons -max-line-length = 160 diff --git a/.forgejo/workflows/ci.yml b/.forgejo/workflows/ci.yml deleted file mode 100644 index 5cf26be..0000000 --- a/.forgejo/workflows/ci.yml +++ /dev/null @@ -1,151 +0,0 @@ -name: CI - -on: - push: - pull_request: - workflow_dispatch: - -jobs: - ci: - name: Lint, test, and build - # This label must match your Forgejo runner's label - runs-on: docker - # Use a clean Debian container so tools are predictable - container: debian:stable-slim - env: - PYTHONDONTWRITEBYTECODE: "1" - PIP_DISABLE_PIP_VERSION_CHECK: "1" - UV_SYSTEM_PYTHON: "1" - steps: - - name: Install build tooling - run: | - set -euo pipefail - apt-get update - DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - git ca-certificates python3 python3-venv python3-pip python3-setuptools \ - python3-wheel sqlite3 - update-ca-certificates || true - - - name: Checkout repository (manual) - run: | - set -euo pipefail - if [ -f Makefile ] || [ -d .git ]; then - echo "Repository present in workspace; skipping clone" - exit 0 - fi - REMOTE_URL="${CI_REPOSITORY_URL:-}" - if [ -z "$REMOTE_URL" ]; then - if [ -n "${GITHUB_SERVER_URL:-}" ] && [ -n "${GITHUB_REPOSITORY:-}" ]; then - REMOTE_URL="${GITHUB_SERVER_URL%/}/${GITHUB_REPOSITORY}.git" - elif [ -n "${GITHUB_REPOSITORY:-}" ]; then - REMOTE_URL="https://git.jordanwages.com/${GITHUB_REPOSITORY}.git" - else - echo "Unable to determine repository URL from CI environment" >&2 - exit 1 - fi - fi - AUTH_URL="$REMOTE_URL" - if [ -n "${GITHUB_TOKEN:-}" ]; then - ACTOR="${GITHUB_ACTOR:-oauth2}" - AUTH_URL=$(printf '%s' "$REMOTE_URL" | sed -E "s#^https://#https://${ACTOR}:${GITHUB_TOKEN}@#") - fi - echo "Cloning from: $REMOTE_URL" - if ! git clone --depth 1 "$AUTH_URL" .; then - echo "Auth clone failed; trying anonymous clone..." >&2 - git clone --depth 1 "$REMOTE_URL" . - fi - if [ -n "${GITHUB_SHA:-}" ]; then - git fetch --depth 1 origin "$GITHUB_SHA" || true - git checkout -q "$GITHUB_SHA" || true - elif [ -n "${GITHUB_REF_NAME:-}" ]; then - git fetch --depth 1 origin "$GITHUB_REF_NAME" || true - git checkout -q "$GITHUB_REF_NAME" || true - fi - - - name: Set up venv and install deps - run: | - set -euo pipefail - # Prefer persistent cache if runner provides /cache - USE_CACHE=0 - if [ -d /cache ] && [ -w /cache ]; then - export PIP_CACHE_DIR=/cache/pip - mkdir -p "$PIP_CACHE_DIR" - REQ_HASH=$(sha256sum requirements.txt | awk '{print $1}') - PYVER=$(python3 -c 'import sys;print(".".join(map(str, sys.version_info[:2])))') - CACHE_VENV="/cache/venv-${REQ_HASH}-py${PYVER}" - if [ ! -f "$CACHE_VENV/bin/activate" ]; then - echo "Preparing cached virtualenv: $CACHE_VENV" - rm -rf "$CACHE_VENV" || true - python3 -m venv "$CACHE_VENV" - fi - ln -sfn "$CACHE_VENV" .venv - USE_CACHE=1 - else - # Fallback to local venv - python3 -m venv .venv - fi - - # If the link didn't produce an activate file, fallback to local venv - if [ ! -f .venv/bin/activate ]; then - echo "Cached venv missing; creating local .venv" - rm -f .venv - python3 -m venv .venv - USE_CACHE=0 - fi - - . .venv/bin/activate - python -m pip install --upgrade pip - if [ "$USE_CACHE" = "1" ]; then - # Ensure required packages are present; pip will use cache - pip install -r requirements.txt pytest || pip install -r requirements.txt pytest - else - pip install -r requirements.txt pytest - fi - - - name: Format check (black) - run: | - . .venv/bin/activate - black --check . - - - name: Lint (flake8) - run: | - . .venv/bin/activate - flake8 . - - - name: Run tests (pytest) - run: | - . .venv/bin/activate - export PYTHONPATH="$(pwd)${PYTHONPATH:+:$PYTHONPATH}" - pytest -q --maxfail=1 - - - name: Build sample reports (no artifact upload) - run: | - set -euo pipefail - . .venv/bin/activate - python - <<'PY' - import sqlite3, pathlib - db = pathlib.Path('database/ngxstat.db') - db.parent.mkdir(parents=True, exist_ok=True) - conn = sqlite3.connect(db) - cur = conn.cursor() - cur.execute('''CREATE TABLE IF NOT EXISTS logs ( - id INTEGER PRIMARY KEY, - ip TEXT, - host TEXT, - time TEXT, - request TEXT, - status INTEGER, - bytes_sent INTEGER, - referer TEXT, - user_agent TEXT, - cache_status TEXT - )''') - cur.execute("INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES ('127.0.0.1','example.com','2024-01-01 10:00:00','GET / HTTP/1.1',200,100,'-','curl','MISS')") - cur.execute("INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status) VALUES ('127.0.0.1','example.com','2024-01-01 10:05:00','GET /about HTTP/1.1',200,100,'-','curl','MISS')") - conn.commit(); conn.close() - PY - python scripts/generate_reports.py global - python scripts/generate_reports.py hourly - python scripts/generate_reports.py index - tar -czf ngxstat-reports.tar.gz -C output . - echo "Built sample reports archive: ngxstat-reports.tar.gz" diff --git a/AGENTS.md b/AGENTS.md index 7e7d3c5..ab65e99 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -21,11 +21,8 @@ This document outlines general practices and expectations for AI agents assistin source .venv/bin/activate pip install -r requirements.txt ``` - The `run-import.sh` script can initialize this environment automatically. - Always activate the virtual environment before running scripts or tests. - -* Before committing code run `black` for consistent formatting and execute - the test suite with `pytest`. All tests should pass. + The `init.sh` script can create this environment automatically. Always + activate it before running scripts or tests. * Dependency management: Use `requirements.txt` or `pip-tools` * Use standard libraries where feasible (e.g., `sqlite3`, `argparse`, `datetime`) @@ -42,19 +39,13 @@ This document outlines general practices and expectations for AI agents assistin * Use latest CDN version for embedded dashboards * Charts should be rendered from pre-generated JSON blobs in `/json/` -### Tables: DataTables - -* Use DataTables via CDN for reports with `chart: table` -* Requires jQuery from a CDN -* Table data comes from the same `/json/` files as charts - ### Styling: Bulma CSS * Use via CDN or vendored minified copy (to keep reports fully static) * Stick to default components (columns, cards, buttons, etc.) * No JS dependencies from Bulma -### Icon Set: [Free CC0 Icons (CC0)](https://cc0-icons.jonh.eu/) +### Icon Set: [Feather Icons (CC0)](https://feathericons.com/) * License: MIT / CC0-like * Use SVG versions @@ -92,14 +83,6 @@ ngxstat/ If uncertain, the agent should prompt the human for clarification before making architectural assumptions. -## Testing - -Use `pytest` for automated tests. Run the suite from an activated virtual environment and ensure all tests pass before committing: - -```bash -pytest -q -``` - --- ## Future Capabilities @@ -117,4 +100,3 @@ As the project matures, agents may also: * **2025-07-17**: Initial version by Jordan + ChatGPT * **2025-07-17**: Expanded virtual environment usage guidance - diff --git a/README.md b/README.md index ac601fc..bc2db2d 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,11 @@ # ngxstat +Per-domain Nginx log analytics with hybrid static reports and live insights. -`ngxstat` is a lightweight log analytics toolkit for Nginx. It imports access -logs into an SQLite database and renders static dashboards so you can explore -per-domain metrics without running a heavy backend service. +## Generating Reports -## Requirements +Use the `generate_reports.py` script to build aggregated JSON and HTML files from `database/ngxstat.db`. -* Python 3.10+ -* Access to the Nginx log files (default: `/var/log/nginx`) - -The helper scripts create a virtual environment on first run, but you can also -set one up manually: +Create a virtual environment and install dependencies: ```bash python3 -m venv .venv @@ -18,95 +13,13 @@ source .venv/bin/activate pip install -r requirements.txt ``` -## Importing Logs - -Run the importer to ingest new log entries into `database/ngxstat.db`: - -```bash -./run-import.sh -``` - -Rotated logs are processed in order and only entries newer than the last -imported timestamp are added. - -## Generating Reports - -To build the HTML dashboard and JSON data files use `run-reports.sh` which runs -all intervals in one go: - -```bash -./run-reports.sh -``` - -The script calls `scripts/generate_reports.py` internally to create hourly, -daily, weekly and monthly reports, then writes analysis JSON files used by the -"Analysis" tab. Per-domain reports are written under `output/domains/` -alongside the aggregate data. Open `output/index.html` in a browser to view the -dashboard. - -If you prefer to run individual commands you can invoke the generator directly: +Then run one or more of the interval commands: ```bash python scripts/generate_reports.py hourly -python scripts/generate_reports.py daily --all-domains +python scripts/generate_reports.py daily +python scripts/generate_reports.py weekly +python scripts/generate_reports.py monthly ``` -## Analysis Helpers - -`run-analysis.sh` executes additional utilities that examine the database for -missing domains, caching opportunities and potential threats. The JSON output is -saved under `output/analysis` and appears in the "Analysis" tab. The -`run-reports.sh` script also generates these JSON files as part of the build. - -## UX Controls - -The dashboard defaults to a 7‑day window for time series. Your view preferences -persist locally in the browser under the `ngxstat-state-v2` key. Use the -"Reset view" button to clear saved state and restore defaults. - -```bash -./run-analysis.sh -``` - -## Serving the Reports - -The generated files are static. You can serve them with a simple Nginx block: - -```nginx -server { - listen 80; - server_name example.com; - root /path/to/ngxstat/output; - - location / { - try_files $uri $uri/ =404; - } -} -``` - -Restrict access if the reports should not be public. - -## Running Tests - -Install the development dependencies and execute the suite with `pytest`: - -```bash -pip install -r requirements.txt -pytest -q -``` - -All tests must pass before submitting changes. - -## Acknowledgements - -ngxstat uses the following third‑party resources: - -* [Chart.js](https://www.chartjs.org/) for charts -* [DataTables](https://datatables.net/) and [jQuery](https://jquery.com/) for table views -* [Bulma CSS](https://bulma.io/) for styling -* Icons from [Free CC0 Icons](https://cc0-icons.jonh.eu/) by Jon Hicks (CC0 / MIT) -* [Typer](https://typer.tiangolo.com/) for the command-line interface -* [Jinja2](https://palletsprojects.com/p/jinja/) for templating - -The project is licensed under the GPLv3. Icon assets remain in the public domain -via the CC0 license. +Reports are written under the `output/` directory. Each command updates the corresponding `.json` file and produces an HTML dashboard using Chart.js. diff --git a/init.sh b/init.sh new file mode 100755 index 0000000..d951d8d --- /dev/null +++ b/init.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -e + +echo "[INFO] Creating virtual environment..." +python3 -m venv .venv +source .venv/bin/activate + +echo "[INFO] Installing dependencies..." +pip install --upgrade pip +pip install -r requirements.txt || echo "[WARN] requirements.txt not found, skipping." + +echo "[INFO] Running database setup..." +python scripts/init_db.py diff --git a/reports.yml b/reports.yml deleted file mode 100644 index 709d686..0000000 --- a/reports.yml +++ /dev/null @@ -1,213 +0,0 @@ -- name: hits - label: Hits - icon: pulse - chart: line - bucket: time_bucket - bucket_label: Time - query: | - SELECT {bucket} AS time_bucket, - COUNT(*) AS value - FROM logs - GROUP BY time_bucket - ORDER BY time_bucket - -- name: error_rate - label: Error Rate (%) - icon: file-alert - chart: line - bucket: time_bucket - bucket_label: Time - query: | - SELECT {bucket} AS time_bucket, - SUM(CASE WHEN status BETWEEN 400 AND 599 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS value - FROM logs - GROUP BY time_bucket - ORDER BY time_bucket - -- name: cache_status_breakdown - label: Cache Status - icon: archive - chart: polarArea - bucket: cache_status - bucket_label: Cache Status - query: | - SELECT cache_status AS cache_status, - COUNT(*) AS value - FROM logs - GROUP BY cache_status - ORDER BY value DESC - colors: - - "#3273dc" - - "#23d160" - - "#ffdd57" - - "#ff3860" - - "#7957d5" - - "#363636" - -- name: domain_traffic - label: Top Domains - icon: globe - chart: table - top_n: 50 - per_domain: false - bucket: domain - bucket_label: Domain - query: | - SELECT host AS domain, - COUNT(*) AS value - FROM logs - GROUP BY domain - ORDER BY value DESC - -- name: bytes_sent - label: Bytes Sent - icon: upload - chart: line - bucket: time_bucket - bucket_label: Time - query: | - SELECT {bucket} AS time_bucket, - SUM(bytes_sent) AS value - FROM logs - GROUP BY time_bucket - ORDER BY time_bucket - -- name: top_paths - label: Top Paths - icon: map - chart: table - top_n: 50 - buckets: - - domain - - path - bucket_label: - - Domain - - Path - query: | - WITH paths AS ( - SELECT host AS domain, - substr(substr(request, instr(request, ' ') + 1), 1, - instr(substr(request, instr(request, ' ') + 1), ' ') - 1) AS path - FROM logs - ), ranked AS ( - SELECT domain, path, COUNT(*) AS value, - ROW_NUMBER() OVER (PARTITION BY domain ORDER BY COUNT(*) DESC) AS rn - FROM paths - GROUP BY domain, path - ) - SELECT domain, path, value - FROM ranked - WHERE rn <= 20 - ORDER BY domain, value DESC - -- name: user_agents - label: User Agents - icon: user - chart: table - top_n: 50 - buckets: - - domain - - user_agent - bucket_label: - - Domain - - User Agent - query: | - WITH ua AS ( - SELECT host AS domain, user_agent - FROM logs - ), ranked AS ( - SELECT domain, user_agent, COUNT(*) AS value, - ROW_NUMBER() OVER (PARTITION BY domain ORDER BY COUNT(*) DESC) AS rn - FROM ua - GROUP BY domain, user_agent - ) - SELECT domain, user_agent, value - FROM ranked - WHERE rn <= 20 - ORDER BY domain, value DESC - -- name: referrers - label: Referrers - icon: link - chart: table - top_n: 50 - buckets: - - domain - - referrer - bucket_label: - - Domain - - Referrer - query: | - WITH ref AS ( - SELECT host AS domain, referer AS referrer - FROM logs - ), ranked AS ( - SELECT domain, referrer, COUNT(*) AS value, - ROW_NUMBER() OVER (PARTITION BY domain ORDER BY COUNT(*) DESC) AS rn - FROM ref - GROUP BY domain, referrer - ) - SELECT domain, referrer, value - FROM ranked - WHERE rn <= 20 - ORDER BY domain, value DESC - -- name: status_distribution - label: HTTP Statuses - icon: server - chart: pie - bucket: status_group - bucket_label: Status - query: | - SELECT CASE - WHEN status BETWEEN 200 AND 299 THEN '2xx' - WHEN status BETWEEN 300 AND 399 THEN '3xx' - WHEN status BETWEEN 400 AND 499 THEN '4xx' - ELSE '5xx' - END AS status_group, - COUNT(*) AS value - FROM logs - GROUP BY status_group - ORDER BY status_group - colors: - - "#48c78e" - - "#209cee" - - "#ffdd57" - - "#f14668" - -# New time-series: status classes over time (stacked) -- name: status_classes_timeseries - label: Status Classes Over Time - icon: server - chart: stackedBar - bucket: time_bucket - bucket_label: Time - stacked: true - query: | - SELECT {bucket} AS time_bucket, - SUM(CASE WHEN status BETWEEN 200 AND 299 THEN 1 ELSE 0 END) AS "2xx", - SUM(CASE WHEN status BETWEEN 300 AND 399 THEN 1 ELSE 0 END) AS "3xx", - SUM(CASE WHEN status BETWEEN 400 AND 499 THEN 1 ELSE 0 END) AS "4xx", - SUM(CASE WHEN status BETWEEN 500 AND 599 THEN 1 ELSE 0 END) AS "5xx", - COUNT(*) AS total - FROM logs - GROUP BY time_bucket - ORDER BY time_bucket - -# New time-series: cache status over time (compact Hit/Miss; exclude '-' by default) -- name: cache_status_timeseries - label: Cache Status Over Time - icon: archive - chart: stackedBar - bucket: time_bucket - bucket_label: Time - stacked: true - exclude_values: ["-"] - query: | - SELECT {bucket} AS time_bucket, - SUM(CASE WHEN cache_status = 'HIT' THEN 1 ELSE 0 END) AS hit, - SUM(CASE WHEN cache_status = 'MISS' THEN 1 ELSE 0 END) AS miss, - COUNT(*) AS total - FROM logs - GROUP BY time_bucket - ORDER BY time_bucket diff --git a/requirements.txt b/requirements.txt index 2678f7b..221e3c8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,3 @@ Flask # For optional lightweight API server # Linting / formatting (optional but recommended) black flake8 -PyYAML diff --git a/run-analysis.sh b/run-analysis.sh deleted file mode 100755 index 4149b9a..0000000 --- a/run-analysis.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash -set -e - -# Prevent concurrent executions of this script. -LOCK_FILE="/tmp/$(basename "$0").lock" -if [ -e "$LOCK_FILE" ]; then - echo "[WARN] $(basename "$0") is already running (lock file present)." >&2 - exit 0 -fi -touch "$LOCK_FILE" -trap 'rm -f "$LOCK_FILE"' EXIT - -# Ensure virtual environment exists -if [ ! -d ".venv" ]; then - echo "[INFO] Creating virtual environment..." - python3 -m venv .venv - source .venv/bin/activate - echo "[INFO] Installing dependencies..." - pip install --upgrade pip - if [ -f requirements.txt ]; then - pip install -r requirements.txt - else - echo "[WARN] requirements.txt not found, skipping." - fi -else - echo "[INFO] Activating virtual environment..." - source .venv/bin/activate -fi - -# Run analysis helpers -echo "[INFO] Checking for missing domains..." -python -m scripts.analyze check-missing-domains - -echo "[INFO] Suggesting cache improvements..." -python -m scripts.analyze suggest-cache - -echo "[INFO] Detecting threats..." -python -m scripts.analyze detect-threats - -# Deactivate to keep cron environment clean -if type deactivate >/dev/null 2>&1; then - deactivate -fi diff --git a/run-import.sh b/run-import.sh index 3c79d35..d951d8d 100755 --- a/run-import.sh +++ b/run-import.sh @@ -1,39 +1,13 @@ -#!/usr/bin/env bash +#!/bin/bash set -e -# Prevent multiple simultaneous runs by using a lock file specific to this -# script. If the lock already exists, assume another instance is running and -# exit gracefully. -LOCK_FILE="/tmp/$(basename "$0").lock" -if [ -e "$LOCK_FILE" ]; then - echo "[WARN] $(basename "$0") is already running (lock file present)." >&2 - exit 0 -fi -touch "$LOCK_FILE" -trap 'rm -f "$LOCK_FILE"' EXIT +echo "[INFO] Creating virtual environment..." +python3 -m venv .venv +source .venv/bin/activate -# Ensure virtual environment exists -if [ ! -d ".venv" ]; then - echo "[INFO] Creating virtual environment..." - python3 -m venv .venv - source .venv/bin/activate - echo "[INFO] Installing dependencies..." - pip install --upgrade pip - if [ -f requirements.txt ]; then - pip install -r requirements.txt - else - echo "[WARN] requirements.txt not found, skipping." - fi -else - echo "[INFO] Activating virtual environment..." - source .venv/bin/activate -fi +echo "[INFO] Installing dependencies..." +pip install --upgrade pip +pip install -r requirements.txt || echo "[WARN] requirements.txt not found, skipping." -# Run log import -echo "[INFO] Importing logs..." +echo "[INFO] Running database setup..." python scripts/init_db.py - -# Deactivate to keep cron environment clean -if type deactivate >/dev/null 2>&1; then - deactivate -fi diff --git a/run-reports.sh b/run-reports.sh deleted file mode 100755 index f7cffba..0000000 --- a/run-reports.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env bash -set -e - -# Prevent concurrent executions of this script. -LOCK_FILE="/tmp/$(basename "$0").lock" -if [ -e "$LOCK_FILE" ]; then - echo "[WARN] $(basename "$0") is already running (lock file present)." >&2 - exit 0 -fi -touch "$LOCK_FILE" -trap 'rm -f "$LOCK_FILE"' EXIT - -# Ensure virtual environment exists -if [ ! -d ".venv" ]; then - echo "[INFO] Creating virtual environment..." - python3 -m venv .venv - source .venv/bin/activate - echo "[INFO] Installing dependencies..." - pip install --upgrade pip - if [ -f requirements.txt ]; then - pip install -r requirements.txt - else - echo "[WARN] requirements.txt not found, skipping." - fi -else - echo "[INFO] Activating virtual environment..." - source .venv/bin/activate -fi - -# Generate reports for all domains combined -echo "[INFO] Generating aggregate reports..." -python -m scripts.generate_reports hourly -python -m scripts.generate_reports daily -python -m scripts.generate_reports weekly -python -m scripts.generate_reports monthly -python -m scripts.generate_reports global - -# Generate reports for each individual domain -echo "[INFO] Generating per-domain reports..." -python -m scripts.generate_reports hourly --all-domains -python -m scripts.generate_reports daily --all-domains -python -m scripts.generate_reports weekly --all-domains -python -m scripts.generate_reports monthly --all-domains - -# Generate analysis JSON -echo "[INFO] Generating analysis files..." -python -m scripts.generate_reports analysis - -# Generate root index -python -m scripts.generate_reports index - -# Deactivate to keep cron environment clean -if type deactivate >/dev/null 2>&1; then - deactivate -fi diff --git a/scripts/__init__.py b/scripts/__init__.py deleted file mode 100644 index f4c57a1..0000000 --- a/scripts/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"Utility package for ngxstat scripts" diff --git a/scripts/analyze.py b/scripts/analyze.py deleted file mode 100644 index 9f49978..0000000 --- a/scripts/analyze.py +++ /dev/null @@ -1,358 +0,0 @@ -#!/usr/bin/env python3 -"""Utility helpers for ad-hoc log analysis. - -This module exposes small helper functions to inspect the ``ngxstat`` SQLite -database. The intent is to allow quick queries from the command line or other -scripts without rewriting SQL each time. - -Examples --------- -To list all domains present in the database:: - - python scripts/analyze.py domains - -The CLI is powered by :mod:`typer` and currently only offers a couple of -commands. More analysis routines can be added over time. -""" -from __future__ import annotations - -import sqlite3 -from pathlib import Path -from typing import List, Optional, Set -from datetime import datetime, timedelta - -import json - -import typer - -from scripts import nginx_config # noqa: F401 # imported for side effects/usage - -DB_PATH = Path("database/ngxstat.db") -ANALYSIS_DIR = Path("output/analysis") - -app = typer.Typer(help="Ad-hoc statistics queries") - - -def _connect() -> sqlite3.Connection: - """Return a new SQLite connection to :data:`DB_PATH`.""" - return sqlite3.connect(DB_PATH) - - -def load_domains_from_db() -> List[str]: - """Return a sorted list of distinct domains from the ``logs`` table.""" - conn = _connect() - cur = conn.cursor() - cur.execute("SELECT DISTINCT host FROM logs ORDER BY host") - domains = [row[0] for row in cur.fetchall()] - conn.close() - return domains - - -def get_hit_count(domain: Optional[str] = None) -> int: - """Return total request count. - - Parameters - ---------- - domain: - Optional domain to filter on. If ``None`` the count includes all logs. - """ - conn = _connect() - cur = conn.cursor() - if domain: - cur.execute("SELECT COUNT(*) FROM logs WHERE host = ?", (domain,)) - else: - cur.execute("SELECT COUNT(*) FROM logs") - count = cur.fetchone()[0] or 0 - conn.close() - return count - - -def get_cache_ratio(domain: Optional[str] = None) -> float: - """Return the percentage of requests served from cache.""" - conn = _connect() - cur = conn.cursor() - if domain: - cur.execute( - "SELECT SUM(CASE WHEN cache_status = 'HIT' THEN 1 ELSE 0 END) * 1.0 / " - "COUNT(*) FROM logs WHERE host = ?", - (domain,), - ) - else: - cur.execute( - "SELECT SUM(CASE WHEN cache_status = 'HIT' THEN 1 ELSE 0 END) * 1.0 / " - "COUNT(*) FROM logs" - ) - result = cur.fetchone()[0] - conn.close() - return float(result or 0.0) - - -@app.command() -def domains() -> None: - """Print the list of domains discovered in the database.""" - for d in load_domains_from_db(): - typer.echo(d) - - -@app.command() -def hits(domain: Optional[str] = typer.Option(None, help="Filter by domain")) -> None: - """Show request count.""" - count = get_hit_count(domain) - if domain: - typer.echo(f"{domain}: {count} hits") - else: - typer.echo(f"Total hits: {count}") - - -@app.command("cache-ratio") -def cache_ratio_cmd( - domain: Optional[str] = typer.Option(None, help="Filter by domain") -) -> None: - """Display cache hit ratio as a percentage.""" - ratio = get_cache_ratio(domain) * 100 - if domain: - typer.echo(f"{domain}: {ratio:.2f}% cached") - else: - typer.echo(f"Cache hit ratio: {ratio:.2f}%") - - -@app.command("check-missing-domains") -def check_missing_domains( - json_output: bool = typer.Option( - False, "--json", help="Output missing domains as JSON" - ) -) -> None: - """Show domains present in the database but absent from Nginx config.""" - try: - from scripts.generate_reports import _get_domains as _db_domains - except Exception: # pragma: no cover - fallback if import fails - _db_domains = load_domains_from_db - - if not isinstance(json_output, bool): - json_output = False - - db_domains = set(_db_domains()) - - paths = nginx_config.discover_configs() - servers = nginx_config.parse_servers(paths) - config_domains: Set[str] = set() - for server in servers: - names = server.get("server_name", "") - for name in names.split(): - if name: - config_domains.add(name) - - missing = sorted(db_domains - config_domains) - - ANALYSIS_DIR.mkdir(parents=True, exist_ok=True) - out_path = ANALYSIS_DIR / "missing_domains.json" - out_path.write_text(json.dumps(missing, indent=2)) - - if json_output: - typer.echo(json.dumps(missing)) - else: - for d in missing: - typer.echo(d) - - -def suggest_cache( - threshold: int = 10, - json_output: bool = False, -) -> None: - """Suggest domain/path pairs that could benefit from caching. - - Paths with at least ``threshold`` ``MISS`` entries are shown for domains - whose server blocks lack a ``proxy_cache`` directive. - """ - - # Discover domains without explicit proxy_cache - paths = nginx_config.discover_configs() - servers = nginx_config.parse_servers(paths) - no_cache: Set[str] = set() - for server in servers: - if "proxy_cache" in server: - continue - for name in server.get("server_name", "").split(): - if name: - no_cache.add(name) - - conn = _connect() - cur = conn.cursor() - cur.execute( - """ - SELECT host, - substr(request, instr(request, ' ')+1, - instr(request, ' HTTP') - instr(request, ' ') - 1) AS path, - COUNT(*) AS miss_count - FROM logs - WHERE cache_status = 'MISS' - GROUP BY host, path - HAVING miss_count >= ? - ORDER BY miss_count DESC - """, - (int(threshold),), - ) - - rows = [r for r in cur.fetchall() if r[0] in no_cache] - conn.close() - - result = [ - {"host": host, "path": path, "misses": count} for host, path, count in rows - ] - - ANALYSIS_DIR.mkdir(parents=True, exist_ok=True) - out_path = ANALYSIS_DIR / "cache_suggestions.json" - out_path.write_text(json.dumps(result, indent=2)) - - if json_output: - typer.echo(json.dumps(result)) - else: - for item in result: - typer.echo(f"{item['host']} {item['path']} {item['misses']}") - -@app.command("suggest-cache") -def suggest_cache_cli( - threshold: int = typer.Option(10, help="Minimum number of MISS entries to report"), - json_output: bool = typer.Option(False, "--json", help="Output results as JSON"), -) -> None: - """CLI wrapper for suggest_cache.""" - suggest_cache(threshold=threshold, json_output=json_output) - - -def detect_threats( - hours: int = 1, - ip_threshold: int = 100, -) -> None: - """Detect potential security threats from recent logs.""" - - conn = _connect() - cur = conn.cursor() - - cur.execute("SELECT MAX(time) FROM logs") - row = cur.fetchone() - if not row or not row[0]: - typer.echo("No logs found") - conn.close() - return - - max_dt = datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S") - recent_end = max_dt - recent_start = recent_end - timedelta(hours=int(hours)) - prev_start = recent_start - timedelta(hours=int(hours)) - prev_end = recent_start - - fmt = "%Y-%m-%d %H:%M:%S" - recent_start_s = recent_start.strftime(fmt) - recent_end_s = recent_end.strftime(fmt) - prev_start_s = prev_start.strftime(fmt) - prev_end_s = prev_end.strftime(fmt) - - cur.execute( - """ - SELECT host, - SUM(CASE WHEN status >= 400 THEN 1 ELSE 0 END) AS errors, - COUNT(*) AS total - FROM logs - WHERE time >= ? AND time < ? - GROUP BY host - """, - (recent_start_s, recent_end_s), - ) - recent_rows = {r[0]: (r[1], r[2]) for r in cur.fetchall()} - - cur.execute( - """ - SELECT host, - SUM(CASE WHEN status >= 400 THEN 1 ELSE 0 END) AS errors, - COUNT(*) AS total - FROM logs - WHERE time >= ? AND time < ? - GROUP BY host - """, - (prev_start_s, prev_end_s), - ) - prev_rows = {r[0]: (r[1], r[2]) for r in cur.fetchall()} - - error_spikes = [] - for host in set(recent_rows) | set(prev_rows): - r_err, r_total = recent_rows.get(host, (0, 0)) - p_err, p_total = prev_rows.get(host, (0, 0)) - r_rate = r_err * 100.0 / r_total if r_total else 0.0 - p_rate = p_err * 100.0 / p_total if p_total else 0.0 - if r_rate >= 10 and r_rate >= p_rate * 2: - error_spikes.append( - { - "host": host, - "recent_error_rate": round(r_rate, 2), - "previous_error_rate": round(p_rate, 2), - } - ) - - cur.execute( - """ - SELECT DISTINCT user_agent FROM logs - WHERE time >= ? AND time < ? - """, - (prev_start_s, prev_end_s), - ) - prev_agents = {r[0] for r in cur.fetchall()} - - cur.execute( - """ - SELECT user_agent, COUNT(*) AS c - FROM logs - WHERE time >= ? AND time < ? - GROUP BY user_agent - HAVING c >= 10 - """, - (recent_start_s, recent_end_s), - ) - suspicious_agents = [ - {"user_agent": ua, "requests": cnt} - for ua, cnt in cur.fetchall() - if ua not in prev_agents - ] - - cur.execute( - """ - SELECT ip, COUNT(*) AS c - FROM logs - WHERE time >= ? AND time < ? - GROUP BY ip - HAVING c >= ? - ORDER BY c DESC - """, - (recent_start_s, recent_end_s, ip_threshold), - ) - high_ip_requests = [{"ip": ip, "requests": cnt} for ip, cnt in cur.fetchall()] - - conn.close() - - report = { - "time_range": { - "recent_start": recent_start_s, - "recent_end": recent_end_s, - "previous_start": prev_start_s, - "previous_end": prev_end_s, - }, - "error_spikes": error_spikes, - "suspicious_agents": suspicious_agents, - "high_ip_requests": high_ip_requests, - } - - ANALYSIS_DIR.mkdir(parents=True, exist_ok=True) - out_path = ANALYSIS_DIR / "threat_report.json" - out_path.write_text(json.dumps(report, indent=2)) - typer.echo(json.dumps(report)) - -@app.command("detect-threats") -def detect_threats_cli( - hours: int = typer.Option(1, help="Number of recent hours to analyze"), - ip_threshold: int = typer.Option(100, help="Requests from a single IP to flag"), -) -> None: - """CLI wrapper for detect_threats.""" - detect_threats(hours=hours, ip_threshold=ip_threshold) - - -if __name__ == "__main__": - app() diff --git a/scripts/download_icons.py b/scripts/download_icons.py deleted file mode 100644 index 6f4675a..0000000 --- a/scripts/download_icons.py +++ /dev/null @@ -1,28 +0,0 @@ -import json -from urllib.request import urlopen, Request -from pathlib import Path - -ICON_LIST_URL = "https://cc0-icons.jonh.eu/icons.json" -BASE_URL = "https://cc0-icons.jonh.eu/" - -OUTPUT_DIR = Path(__file__).resolve().parent.parent / "static" / "icons" - - -def main() -> None: - OUTPUT_DIR.mkdir(parents=True, exist_ok=True) - req = Request(ICON_LIST_URL, headers={"User-Agent": "Mozilla/5.0"}) - with urlopen(req) as resp: - data = json.load(resp) - icons = data.get("icons", []) - for icon in icons: - slug = icon.get("slug") - url = BASE_URL + icon.get("url") - path = OUTPUT_DIR / f"{slug}.svg" - req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) - with urlopen(req) as resp: - path.write_bytes(resp.read()) - print(f"Downloaded {len(icons)} icons to {OUTPUT_DIR}") - - -if __name__ == "__main__": - main() diff --git a/scripts/generate_reports.py b/scripts/generate_reports.py index d3c2f8a..b244075 100644 --- a/scripts/generate_reports.py +++ b/scripts/generate_reports.py @@ -1,466 +1,79 @@ import json -import sys import sqlite3 from pathlib import Path -import shutil -from typing import List, Dict, Optional -from datetime import datetime, timezone -import time - -import yaml +from typing import List, Dict import typer from jinja2 import Environment, FileSystemLoader -# Ensure project root is importable when running as a script (python scripts/generate_reports.py) -PROJECT_ROOT = Path(__file__).resolve().parent.parent -if str(PROJECT_ROOT) not in sys.path: - sys.path.insert(0, str(PROJECT_ROOT)) - DB_PATH = Path("database/ngxstat.db") OUTPUT_DIR = Path("output") TEMPLATE_DIR = Path("templates") -REPORT_CONFIG = Path("reports.yml") -GENERATED_MARKER = OUTPUT_DIR / "generated.txt" - -# Mapping of interval names to SQLite strftime formats. These strings are -# substituted into report queries whenever the special ``{bucket}`` token is -# present so that a single report definition can be reused for multiple -# intervals. -INTERVAL_FORMATS = { - "hourly": "%Y-%m-%d %H:00:00", - "daily": "%Y-%m-%d", - "weekly": "%Y-%W", - "monthly": "%Y-%m", -} app = typer.Typer(help="Generate aggregated log reports") - -@app.callback() -def _cli_callback(ctx: typer.Context) -> None: - """Register post-command hook to note generation time.""" - - def _write_marker() -> None: - OUTPUT_DIR.mkdir(parents=True, exist_ok=True) - # Use timezone-aware UTC to avoid deprecation warnings and ambiguity - timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") - GENERATED_MARKER.write_text(f"{timestamp}\n") - - ctx.call_on_close(_write_marker) - - -def _get_domains() -> List[str]: - """Return a sorted list of unique domains from the logs table.""" - conn = sqlite3.connect(DB_PATH) - cur = conn.cursor() - cur.execute("SELECT DISTINCT host FROM logs ORDER BY host") - domains = [row[0] for row in cur.fetchall()] - conn.close() - return domains - - -def _load_config() -> List[Dict]: - if not REPORT_CONFIG.exists(): - typer.echo(f"Config file not found: {REPORT_CONFIG}") - raise typer.Exit(1) - with REPORT_CONFIG.open("r") as fh: - data = yaml.safe_load(fh) or [] - if not isinstance(data, list): - typer.echo("reports.yml must contain a list of report definitions") - raise typer.Exit(1) - return data - +def _load_existing(path: Path) -> List[Dict]: + if path.exists(): + try: + return json.loads(path.read_text()) + except Exception: + return [] + return [] def _save_json(path: Path, data: List[Dict]) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text(json.dumps(data, indent=2)) - -def _copy_icons() -> None: - """Copy vendored icons and scripts to the output directory.""" - src_dir = Path("static/icons") - dst_dir = OUTPUT_DIR / "icons" - if src_dir.is_dir(): - dst_dir.mkdir(parents=True, exist_ok=True) - for icon in src_dir.glob("*.svg"): - shutil.copy(icon, dst_dir / icon.name) - - js_src = Path("static/chartManager.js") - if js_src.is_file(): - shutil.copy(js_src, OUTPUT_DIR / js_src.name) - - -def _render_snippet(report: Dict, out_dir: Path) -> None: - """Render a single report snippet to ``.html`` inside ``out_dir``.""" +def _render_html(interval: str, json_name: str, out_path: Path) -> None: env = Environment(loader=FileSystemLoader(TEMPLATE_DIR)) - template = env.get_template("report_snippet.html") - snippet_path = out_dir / f"{report['name']}.html" - snippet_path.write_text(template.render(report=report)) + template = env.get_template("report.html") + out_path.write_text(template.render(interval=interval, json_path=json_name)) +def _aggregate(interval: str, fmt: str) -> None: + json_path = OUTPUT_DIR / f"{interval}.json" + html_path = OUTPUT_DIR / f"{interval}.html" -def _write_stats( - generated_at: Optional[str] = None, generation_seconds: Optional[float] = None -) -> None: - """Query basic dataset stats and write them to ``output/global/stats.json``.""" - conn = sqlite3.connect(DB_PATH) - cur = conn.cursor() - - cur.execute("SELECT COUNT(*) FROM logs") - total_logs = cur.fetchone()[0] or 0 - - cur.execute("SELECT MIN(time), MAX(time) FROM logs") - row = cur.fetchone() or (None, None) - start_date = row[0] or "" - end_date = row[1] or "" - - cur.execute("SELECT COUNT(DISTINCT host) FROM logs") - unique_domains = cur.fetchone()[0] or 0 - - conn.close() - - stats = { - "total_logs": total_logs, - "start_date": start_date, - "end_date": end_date, - "unique_domains": unique_domains, - } - if generated_at: - stats["generated_at"] = generated_at - if generation_seconds is not None: - stats["generation_seconds"] = generation_seconds - - out_path = OUTPUT_DIR / "global" / "stats.json" - _save_json(out_path, stats) - - -def _bucket_expr(interval: str) -> str: - """Return the SQLite strftime expression for the given interval.""" - fmt = INTERVAL_FORMATS.get(interval) - if not fmt: - typer.echo(f"Unsupported interval: {interval}") - raise typer.Exit(1) - return f"strftime('{fmt}', datetime(time))" - - -def _generate_interval(interval: str, domain: Optional[str] = None) -> None: - cfg = _load_config() - if not cfg: - typer.echo("No report definitions found") - return - - _copy_icons() - - bucket = _bucket_expr(interval) + existing = _load_existing(json_path) + last_bucket = existing[-1]["bucket"] if existing else None conn = sqlite3.connect(DB_PATH) cur = conn.cursor() - # Create a temporary view so queries can easily be filtered by domain - cur.execute("DROP VIEW IF EXISTS logs_view") - if domain: - # Parameters are not allowed in CREATE VIEW statements, so we must - # safely interpolate the domain value ourselves. Escape any single - # quotes to prevent malformed queries. - safe_domain = domain.replace("'", "''") - cur.execute( - f"CREATE TEMP VIEW logs_view AS SELECT * FROM logs WHERE host = '{safe_domain}'" - ) - out_dir = OUTPUT_DIR / "domains" / domain / interval - else: - cur.execute("CREATE TEMP VIEW logs_view AS SELECT * FROM logs") - out_dir = OUTPUT_DIR / interval + query = f"SELECT strftime('{fmt}', datetime(time)) as bucket, COUNT(*) as hits FROM logs" + params = [] + if last_bucket: + query += " WHERE datetime(time) > datetime(?)" + params.append(last_bucket) + query += " GROUP BY bucket ORDER BY bucket" - out_dir.mkdir(parents=True, exist_ok=True) - - report_list = [] - for definition in cfg: - if "{bucket}" not in definition["query"] or definition.get("global"): - # Global reports are generated separately - continue - if domain and not definition.get("per_domain", True): - # Skip reports marked as not applicable to per-domain runs - continue - - name = definition["name"] - query = definition["query"].replace("{bucket}", bucket) - query = query.replace("FROM logs", "FROM logs_view") - # Apply top_n limit for tables (performance-friendly), if configured - top_n = definition.get("top_n") - chart_type = definition.get("chart", "line") - if top_n and chart_type == "table": - try: - n = int(top_n) - if "LIMIT" not in query.upper(): - query = f"{query}\nLIMIT {n}" - except Exception: - pass - cur.execute(query) - rows = cur.fetchall() - headers = [c[0] for c in cur.description] - data = [dict(zip(headers, row)) for row in rows] - json_path = out_dir / f"{name}.json" - _save_json(json_path, data) - entry = { - "name": name, - "label": definition.get("label", name.title()), - "chart": definition.get("chart", "line"), - "json": f"{name}.json", - "html": f"{name}.html", - } - if "icon" in definition: - entry["icon"] = definition["icon"] - if "bucket" in definition: - entry["bucket"] = definition["bucket"] - if "buckets" in definition: - entry["buckets"] = definition["buckets"] - if "bucket_label" in definition: - entry["bucket_label"] = definition["bucket_label"] - if "color" in definition: - entry["color"] = definition["color"] - if "colors" in definition: - entry["colors"] = definition["colors"] - # Optional UX metadata passthrough for frontend-only transforms - for key in ( - "windows_supported", - "window_default", - "group_others_threshold", - "exclude_values", - "top_n", - "stacked", - "palette", - ): - if key in definition: - entry[key] = definition[key] - _render_snippet(entry, out_dir) - report_list.append(entry) - - _save_json(out_dir / "reports.json", report_list) - if domain: - typer.echo(f"Generated {interval} reports for {domain}") - else: - typer.echo(f"Generated {interval} reports") - - -def _generate_all_domains(interval: str) -> None: - """Generate reports for each unique domain.""" - for domain in _get_domains(): - _generate_interval(interval, domain) - - -def _generate_root_index() -> None: - """Render the top-level index listing all intervals and domains.""" - _copy_icons() - intervals = sorted( - [name for name in INTERVAL_FORMATS if (OUTPUT_DIR / name).is_dir()] - ) - - domains_dir = OUTPUT_DIR / "domains" - domains: List[str] = [] - if domains_dir.is_dir(): - domains = [p.name for p in domains_dir.iterdir() if p.is_dir()] - domains.sort() - - env = Environment(loader=FileSystemLoader(TEMPLATE_DIR)) - template = env.get_template("index.html") - - OUTPUT_DIR.mkdir(parents=True, exist_ok=True) - out_path = OUTPUT_DIR / "index.html" - out_path.write_text(template.render(intervals=intervals, domains=domains)) - typer.echo(f"Generated root index at {out_path}") - - -def _generate_global() -> None: - """Generate reports that do not depend on an interval.""" - cfg = _load_config() - if not cfg: - typer.echo("No report definitions found") - return - - start_time = time.time() - # Use timezone-aware UTC for generated_at (string remains unchanged format) - generated_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") - - _copy_icons() - - conn = sqlite3.connect(DB_PATH) - cur = conn.cursor() - - out_dir = OUTPUT_DIR / "global" - out_dir.mkdir(parents=True, exist_ok=True) - - report_list = [] - for definition in cfg: - if "{bucket}" in definition["query"] and not definition.get("global"): - continue - - name = definition["name"] - query = definition["query"] - # Apply top_n limit for tables (performance-friendly), if configured - top_n = definition.get("top_n") - chart_type = definition.get("chart", "line") - if top_n and chart_type == "table": - try: - n = int(top_n) - if "LIMIT" not in query.upper(): - query = f"{query}\nLIMIT {n}" - except Exception: - pass - cur.execute(query) - rows = cur.fetchall() - headers = [c[0] for c in cur.description] - data = [dict(zip(headers, row)) for row in rows] - json_path = out_dir / f"{name}.json" - _save_json(json_path, data) - entry = { - "name": name, - "label": definition.get("label", name.title()), - "chart": definition.get("chart", "line"), - "json": f"{name}.json", - "html": f"{name}.html", - } - if "icon" in definition: - entry["icon"] = definition["icon"] - if "bucket" in definition: - entry["bucket"] = definition["bucket"] - if "buckets" in definition: - entry["buckets"] = definition["buckets"] - if "bucket_label" in definition: - entry["bucket_label"] = definition["bucket_label"] - if "color" in definition: - entry["color"] = definition["color"] - if "colors" in definition: - entry["colors"] = definition["colors"] - # Optional UX metadata passthrough for frontend-only transforms - for key in ( - "windows_supported", - "window_default", - "group_others_threshold", - "exclude_values", - "top_n", - "stacked", - "palette", - ): - if key in definition: - entry[key] = definition[key] - _render_snippet(entry, out_dir) - report_list.append(entry) - - _save_json(out_dir / "reports.json", report_list) - elapsed = round(time.time() - start_time, 2) - _write_stats(generated_at, elapsed) - typer.echo("Generated global reports") - - -def _generate_analysis() -> None: - """Generate analysis JSON files consumed by the Analysis tab.""" - try: - # Import lazily to avoid circulars and keep dependencies optional - from scripts import analyze - except Exception as exc: # pragma: no cover - defensive - typer.echo(f"Failed to import analysis module: {exc}") - return - - # Ensure output root and icons present for parity - _copy_icons() - - # These commands write JSON files under output/analysis/ - try: - analyze.check_missing_domains(json_output=True) - except Exception as exc: # pragma: no cover - continue best-effort - typer.echo(f"check_missing_domains failed: {exc}") - try: - analyze.suggest_cache(json_output=True) - except Exception as exc: # pragma: no cover - typer.echo(f"suggest_cache failed: {exc}") - try: - analyze.detect_threats() - except Exception as exc: # pragma: no cover - typer.echo(f"detect_threats failed: {exc}") - typer.echo("Generated analysis JSON files") + rows = cur.execute(query, params).fetchall() + for bucket, hits in rows: + existing.append({"bucket": bucket, "hits": hits}) + existing.sort(key=lambda x: x["bucket"]) + _save_json(json_path, existing) + _render_html(interval, json_path.name, html_path) + typer.echo(f"Generated {json_path} and {html_path}") @app.command() -def hourly( - domain: Optional[str] = typer.Option( - None, help="Generate reports for a specific domain" - ), - all_domains: bool = typer.Option( - False, "--all-domains", help="Generate reports for each domain" - ), -) -> None: - """Generate hourly reports.""" - if all_domains: - _generate_all_domains("hourly") - else: - _generate_interval("hourly", domain) - +def hourly() -> None: + """Aggregate logs into hourly buckets.""" + _aggregate("hourly", "%Y-%m-%d %H:00:00") @app.command() -def daily( - domain: Optional[str] = typer.Option( - None, help="Generate reports for a specific domain" - ), - all_domains: bool = typer.Option( - False, "--all-domains", help="Generate reports for each domain" - ), -) -> None: - """Generate daily reports.""" - if all_domains: - _generate_all_domains("daily") - else: - _generate_interval("daily", domain) - +def daily() -> None: + """Aggregate logs into daily buckets.""" + _aggregate("daily", "%Y-%m-%d") @app.command() -def weekly( - domain: Optional[str] = typer.Option( - None, help="Generate reports for a specific domain" - ), - all_domains: bool = typer.Option( - False, "--all-domains", help="Generate reports for each domain" - ), -) -> None: - """Generate weekly reports.""" - if all_domains: - _generate_all_domains("weekly") - else: - _generate_interval("weekly", domain) - +def weekly() -> None: + """Aggregate logs into weekly buckets.""" + _aggregate("weekly", "%Y-%W") @app.command() -def monthly( - domain: Optional[str] = typer.Option( - None, help="Generate reports for a specific domain" - ), - all_domains: bool = typer.Option( - False, "--all-domains", help="Generate reports for each domain" - ), -) -> None: - """Generate monthly reports.""" - if all_domains: - _generate_all_domains("monthly") - else: - _generate_interval("monthly", domain) - - -@app.command("global") -def global_reports() -> None: - """Generate global reports.""" - _generate_global() - - -@app.command() -def analysis() -> None: - """Generate analysis JSON files for the Analysis tab.""" - _generate_analysis() - - -@app.command() -def index() -> None: - """Generate the root index page linking all reports.""" - _generate_root_index() - +def monthly() -> None: + """Aggregate logs into monthly buckets.""" + _aggregate("monthly", "%Y-%m") if __name__ == "__main__": app() diff --git a/scripts/init_db.py b/scripts/init_db.py index b9ea07d..8a3a89c 100644 --- a/scripts/init_db.py +++ b/scripts/init_db.py @@ -1,25 +1,23 @@ #!/usr/bin/env python3 import os -import re import sqlite3 -from datetime import datetime, timezone +import re +from pathlib import Path LOG_DIR = "/var/log/nginx" DB_FILE = "database/ngxstat.db" -LOG_FILE_PATTERN = re.compile(r"access\.log(\.\d+)?$") +LOG_FILE_PATTERN = re.compile(r'access\.log(\.\d+)?$') LOG_FORMAT_REGEX = re.compile( r'(?P\S+) - (?P\S+) \[(?P