From 3a8d73edcdeb17fe8ad7cd5ec666e483fb5a4d37 Mon Sep 17 00:00:00 2001 From: codex-bot Date: Tue, 26 Aug 2025 22:35:32 -0500 Subject: [PATCH] ci(forgejo): generate banlist history graph\n\n- Add scripts/banlist_metrics.py to compute counts from git history and render chart\n- Add Forgejo workflow using demisto/matplotlib image + warm-workspace checkout\n- Keep GitHub workflow for portability; remove old .gitea workflow\n- Update README with embedded SVG and CSV link --- .forgejo/workflows/generate-banlist-graph.yml | 135 ++++++++++++++++ .github/workflows/generate-banlist-graph.yml | 53 +++++++ README.md | 15 ++ scripts/banlist_metrics.py | 149 ++++++++++++++++++ 4 files changed, 352 insertions(+) create mode 100644 .forgejo/workflows/generate-banlist-graph.yml create mode 100644 .github/workflows/generate-banlist-graph.yml create mode 100644 scripts/banlist_metrics.py diff --git a/.forgejo/workflows/generate-banlist-graph.yml b/.forgejo/workflows/generate-banlist-graph.yml new file mode 100644 index 0000000..1a2e33e --- /dev/null +++ b/.forgejo/workflows/generate-banlist-graph.yml @@ -0,0 +1,135 @@ +name: Generate banlist history graph + +on: + push: + branches: [ main ] + paths: + - 'banned.txt' + - 'scripts/banlist_metrics.py' + - '.forgejo/workflows/generate-banlist-graph.yml' + schedule: + - cron: '17 3 * * *' + workflow_dispatch: {} + +jobs: + build: + # Match your Forgejo runner label + runs-on: docker + # Use a prebuilt image with matplotlib preinstalled (cached on runner) + container: demisto/matplotlib:latest + steps: + - name: Ensure git is available (minimal) + run: | + set -euo pipefail + if ! command -v git >/dev/null 2>&1; then + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + git ca-certificates + fi + + - name: Checkout repository (warm or clone) + run: | + set -euo pipefail + + # Determine remote URL + REMOTE_URL="${CI_REPOSITORY_URL:-}" + if [ -z "$REMOTE_URL" ]; then + if [ -n "${GITHUB_SERVER_URL:-}" ] && [ -n "${GITHUB_REPOSITORY:-}" ]; then + REMOTE_URL="${GITHUB_SERVER_URL%/}/${GITHUB_REPOSITORY}.git" + elif [ -n "${GITHUB_REPOSITORY:-}" ]; then + # Fallback host; adjust to your Forgejo host if needed + REMOTE_URL="https://git.jordanwages.com/${GITHUB_REPOSITORY}.git" + else + echo "Unable to determine repository URL from CI environment" >&2 + exit 1 + fi + fi + + # Try with token if available by embedding basic auth in the URL + AUTH_URL="$REMOTE_URL" + if [ -n "${GITHUB_TOKEN:-}" ]; then + ACTOR="${GITHUB_ACTOR:-oauth2}" + AUTH_URL=$(printf '%s' "$REMOTE_URL" | sed -E "s#^https://#https://${ACTOR}:${GITHUB_TOKEN}@#") + fi + + if [ -d .git ]; then + echo "Reusing existing workspace (.git found)" + # Ensure origin exists and points to AUTH_URL + if git remote get-url origin >/dev/null 2>&1; then + git remote set-url origin "$AUTH_URL" + else + git remote add origin "$AUTH_URL" + fi + git config --global --add safe.directory "$(pwd)" + # Fetch updates and tags, prune deleted refs + git fetch --all --tags --prune + + # Decide target + if [ -n "${GITHUB_SHA:-}" ]; then + TARGET="$GITHUB_SHA" + git checkout -q "$TARGET" || true + git reset --hard "$TARGET" + elif [ -n "${GITHUB_REF_NAME:-}" ]; then + BRANCH="$GITHUB_REF_NAME" + git checkout -q -B "$BRANCH" "origin/$BRANCH" || git checkout -q "$BRANCH" || true + git reset --hard "origin/$BRANCH" || true + else + # Fallback to main + git checkout -q -B main origin/main || git checkout -q main || true + git reset --hard origin/main || true + fi + # Clean untracked files + git clean -fdx + else + echo "Cloning from: $REMOTE_URL" + if ! git clone "$AUTH_URL" .; then + echo "Auth clone failed; trying anonymous clone..." >&2 + git clone "$REMOTE_URL" . + fi + git config --global --add safe.directory "$(pwd)" + + # Checkout the requested ref if provided + if [ -n "${GITHUB_SHA:-}" ]; then + git fetch origin "$GITHUB_SHA" || true + git checkout -q "$GITHUB_SHA" || true + elif [ -n "${GITHUB_REF_NAME:-}" ]; then + git fetch origin "$GITHUB_REF_NAME" || true + git checkout -q "$GITHUB_REF_NAME" || true + fi + fi + + - name: Generate metrics and chart + env: + MPLBACKEND: Agg + TZ: UTC + run: | + set -euo pipefail + python scripts/banlist_metrics.py \ + --file banned.txt \ + --csv metrics/banlist_counts.csv \ + --image assets/banlist_history.svg + + - name: Commit and push changes (if any) + run: | + set -euo pipefail + git config user.name "forgejo-actions-bot" + git config user.email "actions@noreply.local" + if [ -n "$(git status --porcelain -- metrics assets)" ]; then + git add -A metrics assets + git commit -m "chore: update banlist history graph [skip ci]" + # Push to the same remote we cloned from; token is in the origin URL if present + git push || { + echo "First push failed; trying to embed token in remote..." >&2 + if [ -n "${GITHUB_TOKEN:-}" ]; then + ACTOR="${GITHUB_ACTOR:-oauth2}" + ORIGIN_URL=$(git remote get-url origin) + AUTH_URL=$(printf '%s' "$ORIGIN_URL" | sed -E "s#^https://#https://${ACTOR}:${GITHUB_TOKEN}@#") + git remote set-url origin "$AUTH_URL" + git push + else + false + fi + } + else + echo "No changes to commit." + fi diff --git a/.github/workflows/generate-banlist-graph.yml b/.github/workflows/generate-banlist-graph.yml new file mode 100644 index 0000000..e8ccbf1 --- /dev/null +++ b/.github/workflows/generate-banlist-graph.yml @@ -0,0 +1,53 @@ +name: Generate banlist history graph + +on: + push: + branches: [ main ] + paths: + - 'banned.txt' + - 'scripts/banlist_metrics.py' + - '.github/workflows/generate-banlist-graph.yml' + schedule: + - cron: '17 3 * * *' + workflow_dispatch: {} + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install matplotlib + + - name: Generate metrics and chart + run: | + python scripts/banlist_metrics.py \ + --file banned.txt \ + --csv metrics/banlist_counts.csv \ + --image assets/banlist_history.svg + + - name: Commit changes (if any) + run: | + git config user.name "actions-bot" + git config user.email "actions@users.noreply.github.com" + if [ -n "$(git status --porcelain -- metrics assets)" ]; then + git add metrics assets + git commit -m "chore: update banlist history graph [skip ci]" + git push + else + echo "No changes to commit." + fi + diff --git a/README.md b/README.md index 9aa1dca..ab86de1 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,12 @@ A small repository that publishes the current WageNet IP blocklist. The list is - `update.sh`: helper script that copies a source list into the repo, commits, and pushes updates. - `LICENSE`: license for this repository (GPL‑3.0). +## Ban List History +- History chart: `assets/banlist_history.svg` +- CSV data: `metrics/banlist_counts.csv` + +![Ban list size over time](assets/banlist_history.svg) + ## File format - Plain text, UTF‑8. - Lines beginning with `#` are comments/metadata. @@ -52,6 +58,15 @@ https://git.jordanwages.com/wagesj45/wagenet-ip-ban-list/raw/branch/main/banned. Note: Always validate the list for your environment and merge with any allowlists you maintain. +### CI‑generated graph + +This repository includes a workflow that scans git history to count the number of non‑comment lines in `banned.txt` at each change and generates: + +- A CSV at `metrics/banlist_counts.csv` +- A chart at `assets/banlist_history.svg` (embedded above) + +The workflow runs on pushes to `main` that modify `banned.txt` and nightly on a schedule. Commits from the workflow are marked with `[skip ci]` to avoid loops. + ## Appealing If you feel your IP is in this list by mistake, please [file an issue](https://git.jordanwages.com/wagesj45/wagenet-ip-ban-list/issues) to appeal. diff --git a/scripts/banlist_metrics.py b/scripts/banlist_metrics.py new file mode 100644 index 0000000..25fe41b --- /dev/null +++ b/scripts/banlist_metrics.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +""" +Generate historical metrics for banned.txt by walking git history. + +Outputs: +- CSV with (date_iso, timestamp, commit, count) +- Optional SVG/PNG line chart of counts over time + +Usage: + python scripts/banlist_metrics.py \ + --file banned.txt \ + --csv metrics/banlist_counts.csv \ + --image assets/banlist_history.svg +""" + +from __future__ import annotations + +import argparse +import csv +import os +import subprocess +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import List, Optional + + +@dataclass +class Snapshot: + commit: str + timestamp: int # Unix epoch seconds + count: int + + @property + def date_iso(self) -> str: + return datetime.fromtimestamp(self.timestamp, tz=timezone.utc).isoformat() + + +def run(cmd: List[str], cwd: Optional[str] = None) -> str: + out = subprocess.check_output(cmd, cwd=cwd) + return out.decode().strip() + + +def git_file_commits(path: str) -> List[str]: + # List commits (oldest -> newest) that touched the file + revs = run(["git", "rev-list", "--reverse", "HEAD", "--", path]) + return [r for r in revs.splitlines() if r] + + +def git_commit_timestamp(commit: str) -> int: + return int(run(["git", "show", "-s", "--format=%ct", commit])) + + +def git_show_file_at(commit: str, path: str) -> str: + return run(["git", "show", f"{commit}:{path}"]) + + +def count_ips(text: str) -> int: + count = 0 + for line in text.splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + count += 1 + return count + + +def collect_snapshots(target_file: str) -> List[Snapshot]: + commits = git_file_commits(target_file) + snaps: List[Snapshot] = [] + for c in commits: + try: + content = git_show_file_at(c, target_file) + except subprocess.CalledProcessError: + # File may not exist in this commit (renames, etc.) + continue + cnt = count_ips(content) + ts = git_commit_timestamp(c) + snaps.append(Snapshot(commit=c, timestamp=ts, count=cnt)) + return snaps + + +def write_csv(snaps: List[Snapshot], csv_path: str) -> None: + os.makedirs(os.path.dirname(csv_path), exist_ok=True) + with open(csv_path, "w", newline="", encoding="utf-8") as f: + w = csv.writer(f) + w.writerow(["date_iso", "timestamp", "commit", "count"]) # header + for s in snaps: + w.writerow([s.date_iso, s.timestamp, s.commit, s.count]) + + +def write_chart(snaps: List[Snapshot], image_path: str) -> None: + try: + import matplotlib + matplotlib.use("Agg") # headless + import matplotlib.pyplot as plt + except Exception as e: + print(f"Skipping chart generation (matplotlib unavailable): {e}") + return + + if not snaps: + print("No snapshots to chart.") + return + + xs = [datetime.fromtimestamp(s.timestamp, tz=timezone.utc) for s in snaps] + ys = [s.count for s in snaps] + + plt.figure(figsize=(8, 3)) + plt.plot(xs, ys, marker="o", linewidth=1.5, markersize=2) + plt.title("WageNet IP Ban List Size Over Time") + plt.xlabel("Date (UTC)") + plt.ylabel("IP count") + plt.grid(True, linestyle=":", linewidth=0.5) + plt.tight_layout() + + os.makedirs(os.path.dirname(image_path), exist_ok=True) + plt.savefig(image_path) + plt.close() + + +def main() -> int: + ap = argparse.ArgumentParser() + ap.add_argument("--file", default="banned.txt", help="Path to ban list in repo") + ap.add_argument("--csv", default="metrics/banlist_counts.csv", help="Output CSV path") + ap.add_argument( + "--image", + default="assets/banlist_history.svg", + help="Output image path (SVG/PNG)", + ) + args = ap.parse_args() + + # Ensure we are in a git repo + try: + run(["git", "rev-parse", "--is-inside-work-tree"]) + except subprocess.CalledProcessError: + raise SystemExit("This script must run inside a git repository.") + + snaps = collect_snapshots(args.file) + write_csv(snaps, args.csv) + if args.image: + write_chart(snaps, args.image) + print(f"Wrote {len(snaps)} snapshots to {args.csv}") + if args.image: + print(f"Updated chart at {args.image}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) +