#!/usr/bin/env python3 """ Generate historical metrics for banned.txt by walking git history. Outputs: - CSV with (date_iso, timestamp, commit, count) - Optional SVG/PNG line chart of counts over time Usage: python scripts/banlist_metrics.py \ --file banned.txt \ --csv metrics/banlist_counts.csv \ --image assets/banlist_history.svg """ from __future__ import annotations import argparse import csv import os import subprocess from dataclasses import dataclass from datetime import datetime, timezone from typing import List, Optional @dataclass class Snapshot: commit: str timestamp: int # Unix epoch seconds count: int @property def date_iso(self) -> str: return datetime.fromtimestamp(self.timestamp, tz=timezone.utc).isoformat() def run(cmd: List[str], cwd: Optional[str] = None) -> str: out = subprocess.check_output(cmd, cwd=cwd) return out.decode().strip() def git_file_commits(path: str) -> List[str]: # List commits (oldest -> newest) that touched the file revs = run(["git", "rev-list", "--reverse", "HEAD", "--", path]) return [r for r in revs.splitlines() if r] def git_commit_timestamp(commit: str) -> int: return int(run(["git", "show", "-s", "--format=%ct", commit])) def git_show_file_at(commit: str, path: str) -> str: return run(["git", "show", f"{commit}:{path}"]) def count_ips(text: str) -> int: count = 0 for line in text.splitlines(): line = line.strip() if not line or line.startswith("#"): continue count += 1 return count def collect_snapshots(target_file: str) -> List[Snapshot]: commits = git_file_commits(target_file) snaps: List[Snapshot] = [] for c in commits: try: content = git_show_file_at(c, target_file) except subprocess.CalledProcessError: # File may not exist in this commit (renames, etc.) continue cnt = count_ips(content) ts = git_commit_timestamp(c) snaps.append(Snapshot(commit=c, timestamp=ts, count=cnt)) return snaps def write_csv(snaps: List[Snapshot], csv_path: str) -> None: os.makedirs(os.path.dirname(csv_path), exist_ok=True) with open(csv_path, "w", newline="", encoding="utf-8") as f: w = csv.writer(f) w.writerow(["date_iso", "timestamp", "commit", "count"]) # header for s in snaps: w.writerow([s.date_iso, s.timestamp, s.commit, s.count]) def write_chart(snaps: List[Snapshot], image_path: str) -> None: try: import matplotlib matplotlib.use("Agg") # headless import matplotlib.pyplot as plt import matplotlib.dates as mdates except Exception as e: print(f"Skipping chart generation (matplotlib unavailable): {e}") return if not snaps: print("No snapshots to chart.") return xs = [datetime.fromtimestamp(s.timestamp, tz=timezone.utc) for s in snaps] ys = [s.count for s in snaps] fig, ax = plt.subplots(figsize=(9.5, 3.2)) ax.plot(xs, ys, marker="o", linewidth=1.5, markersize=2) ax.set_title("WageNet IP Ban List Size Over Time") ax.set_xlabel("Date (UTC)") ax.set_ylabel("IP count") ax.grid(True, linestyle=":", linewidth=0.5) # Use full date labels on major ticks try: locator = mdates.AutoDateLocator(minticks=4, maxticks=8) formatter = mdates.DateFormatter('%Y-%m-%d') ax.xaxis.set_major_locator(locator) ax.xaxis.set_major_formatter(formatter) except Exception: pass # Always rotate for readability regardless of formatter availability try: fig.autofmt_xdate(rotation=45, ha="right") except Exception: for label in ax.get_xticklabels(): label.set_rotation(45) label.set_horizontalalignment("right") # Add slight horizontal margins to prevent clipping at edges ax.margins(x=0.02) fig.tight_layout() os.makedirs(os.path.dirname(image_path), exist_ok=True) fig.savefig(image_path) plt.close(fig) def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--file", default="banned.txt", help="Path to ban list in repo") ap.add_argument("--csv", default="metrics/banlist_counts.csv", help="Output CSV path") ap.add_argument( "--image", default="assets/banlist_history.svg", help="Output image path (SVG/PNG)", ) args = ap.parse_args() # Ensure we are in a git repo try: run(["git", "rev-parse", "--is-inside-work-tree"]) except subprocess.CalledProcessError: raise SystemExit("This script must run inside a git repository.") snaps = collect_snapshots(args.file) write_csv(snaps, args.csv) if args.image: write_chart(snaps, args.image) print(f"Wrote {len(snaps)} snapshots to {args.csv}") if args.image: print(f"Updated chart at {args.image}") return 0 if __name__ == "__main__": raise SystemExit(main())