All checks were successful
		
		
	
	Generate banlist history graph / build (push) Successful in 6s
				
			
		
			
				
	
	
		
			169 lines
		
	
	
	
		
			4.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			169 lines
		
	
	
	
		
			4.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| """
 | |
| Generate historical metrics for banned.txt by walking git history.
 | |
| 
 | |
| Outputs:
 | |
| - CSV with (date_iso, timestamp, commit, count)
 | |
| - Optional SVG/PNG line chart of counts over time
 | |
| 
 | |
| Usage:
 | |
|   python scripts/banlist_metrics.py \
 | |
|     --file banned.txt \
 | |
|     --csv metrics/banlist_counts.csv \
 | |
|     --image assets/banlist_history.svg
 | |
| """
 | |
| 
 | |
| from __future__ import annotations
 | |
| 
 | |
| import argparse
 | |
| import csv
 | |
| import os
 | |
| import subprocess
 | |
| from dataclasses import dataclass
 | |
| from datetime import datetime, timezone
 | |
| from typing import List, Optional
 | |
| 
 | |
| 
 | |
| @dataclass
 | |
| class Snapshot:
 | |
|     commit: str
 | |
|     timestamp: int  # Unix epoch seconds
 | |
|     count: int
 | |
| 
 | |
|     @property
 | |
|     def date_iso(self) -> str:
 | |
|         return datetime.fromtimestamp(self.timestamp, tz=timezone.utc).isoformat()
 | |
| 
 | |
| 
 | |
| def run(cmd: List[str], cwd: Optional[str] = None) -> str:
 | |
|     out = subprocess.check_output(cmd, cwd=cwd)
 | |
|     return out.decode().strip()
 | |
| 
 | |
| 
 | |
| def git_file_commits(path: str) -> List[str]:
 | |
|     # List commits (oldest -> newest) that touched the file
 | |
|     revs = run(["git", "rev-list", "--reverse", "HEAD", "--", path])
 | |
|     return [r for r in revs.splitlines() if r]
 | |
| 
 | |
| 
 | |
| def git_commit_timestamp(commit: str) -> int:
 | |
|     return int(run(["git", "show", "-s", "--format=%ct", commit]))
 | |
| 
 | |
| 
 | |
| def git_show_file_at(commit: str, path: str) -> str:
 | |
|     return run(["git", "show", f"{commit}:{path}"])
 | |
| 
 | |
| 
 | |
| def count_ips(text: str) -> int:
 | |
|     count = 0
 | |
|     for line in text.splitlines():
 | |
|         line = line.strip()
 | |
|         if not line or line.startswith("#"):
 | |
|             continue
 | |
|         count += 1
 | |
|     return count
 | |
| 
 | |
| 
 | |
| def collect_snapshots(target_file: str) -> List[Snapshot]:
 | |
|     commits = git_file_commits(target_file)
 | |
|     snaps: List[Snapshot] = []
 | |
|     for c in commits:
 | |
|         try:
 | |
|             content = git_show_file_at(c, target_file)
 | |
|         except subprocess.CalledProcessError:
 | |
|             # File may not exist in this commit (renames, etc.)
 | |
|             continue
 | |
|         cnt = count_ips(content)
 | |
|         ts = git_commit_timestamp(c)
 | |
|         snaps.append(Snapshot(commit=c, timestamp=ts, count=cnt))
 | |
|     return snaps
 | |
| 
 | |
| 
 | |
| def write_csv(snaps: List[Snapshot], csv_path: str) -> None:
 | |
|     os.makedirs(os.path.dirname(csv_path), exist_ok=True)
 | |
|     with open(csv_path, "w", newline="", encoding="utf-8") as f:
 | |
|         w = csv.writer(f)
 | |
|         w.writerow(["date_iso", "timestamp", "commit", "count"])  # header
 | |
|         for s in snaps:
 | |
|             w.writerow([s.date_iso, s.timestamp, s.commit, s.count])
 | |
| 
 | |
| 
 | |
| def write_chart(snaps: List[Snapshot], image_path: str) -> None:
 | |
|     try:
 | |
|         import matplotlib
 | |
|         matplotlib.use("Agg")  # headless
 | |
|         import matplotlib.pyplot as plt
 | |
|         import matplotlib.dates as mdates
 | |
|     except Exception as e:
 | |
|         print(f"Skipping chart generation (matplotlib unavailable): {e}")
 | |
|         return
 | |
| 
 | |
|     if not snaps:
 | |
|         print("No snapshots to chart.")
 | |
|         return
 | |
| 
 | |
|     xs = [datetime.fromtimestamp(s.timestamp, tz=timezone.utc) for s in snaps]
 | |
|     ys = [s.count for s in snaps]
 | |
| 
 | |
|     fig, ax = plt.subplots(figsize=(9.5, 3.2))
 | |
|     ax.plot(xs, ys, marker="o", linewidth=1.5, markersize=2)
 | |
|     ax.set_title("WageNet IP Ban List Size Over Time")
 | |
|     ax.set_xlabel("Date (UTC)")
 | |
|     ax.set_ylabel("IP count")
 | |
|     ax.grid(True, linestyle=":", linewidth=0.5)
 | |
| 
 | |
|     # Use full date labels on major ticks
 | |
|     try:
 | |
|         locator = mdates.AutoDateLocator(minticks=4, maxticks=8)
 | |
|         formatter = mdates.DateFormatter('%Y-%m-%d')
 | |
|         ax.xaxis.set_major_locator(locator)
 | |
|         ax.xaxis.set_major_formatter(formatter)
 | |
|     except Exception:
 | |
|         pass
 | |
| 
 | |
|     # Always rotate for readability regardless of formatter availability
 | |
|     try:
 | |
|         fig.autofmt_xdate(rotation=45, ha="right")
 | |
|     except Exception:
 | |
|         for label in ax.get_xticklabels():
 | |
|             label.set_rotation(45)
 | |
|             label.set_horizontalalignment("right")
 | |
| 
 | |
|     # Add slight horizontal margins to prevent clipping at edges
 | |
|     ax.margins(x=0.02)
 | |
|     fig.tight_layout()
 | |
| 
 | |
|     os.makedirs(os.path.dirname(image_path), exist_ok=True)
 | |
|     fig.savefig(image_path)
 | |
|     plt.close(fig)
 | |
| 
 | |
| 
 | |
| def main() -> int:
 | |
|     ap = argparse.ArgumentParser()
 | |
|     ap.add_argument("--file", default="banned.txt", help="Path to ban list in repo")
 | |
|     ap.add_argument("--csv", default="metrics/banlist_counts.csv", help="Output CSV path")
 | |
|     ap.add_argument(
 | |
|         "--image",
 | |
|         default="assets/banlist_history.svg",
 | |
|         help="Output image path (SVG/PNG)",
 | |
|     )
 | |
|     args = ap.parse_args()
 | |
| 
 | |
|     # Ensure we are in a git repo
 | |
|     try:
 | |
|         run(["git", "rev-parse", "--is-inside-work-tree"])
 | |
|     except subprocess.CalledProcessError:
 | |
|         raise SystemExit("This script must run inside a git repository.")
 | |
| 
 | |
|     snaps = collect_snapshots(args.file)
 | |
|     write_csv(snaps, args.csv)
 | |
|     if args.image:
 | |
|         write_chart(snaps, args.image)
 | |
|     print(f"Wrote {len(snaps)} snapshots to {args.csv}")
 | |
|     if args.image:
 | |
|         print(f"Updated chart at {args.image}")
 | |
|     return 0
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     raise SystemExit(main())
 |