149 lines
4.2 KiB
Python
149 lines
4.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Generate historical metrics for banned.txt by walking git history.
|
|
|
|
Outputs:
|
|
- CSV with (date_iso, timestamp, commit, count)
|
|
- Optional SVG/PNG line chart of counts over time
|
|
|
|
Usage:
|
|
python scripts/banlist_metrics.py \
|
|
--file banned.txt \
|
|
--csv metrics/banlist_counts.csv \
|
|
--image assets/banlist_history.svg
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import csv
|
|
import os
|
|
import subprocess
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
from typing import List, Optional
|
|
|
|
|
|
@dataclass
|
|
class Snapshot:
|
|
commit: str
|
|
timestamp: int # Unix epoch seconds
|
|
count: int
|
|
|
|
@property
|
|
def date_iso(self) -> str:
|
|
return datetime.fromtimestamp(self.timestamp, tz=timezone.utc).isoformat()
|
|
|
|
|
|
def run(cmd: List[str], cwd: Optional[str] = None) -> str:
|
|
out = subprocess.check_output(cmd, cwd=cwd)
|
|
return out.decode().strip()
|
|
|
|
|
|
def git_file_commits(path: str) -> List[str]:
|
|
# List commits (oldest -> newest) that touched the file
|
|
revs = run(["git", "rev-list", "--reverse", "HEAD", "--", path])
|
|
return [r for r in revs.splitlines() if r]
|
|
|
|
|
|
def git_commit_timestamp(commit: str) -> int:
|
|
return int(run(["git", "show", "-s", "--format=%ct", commit]))
|
|
|
|
|
|
def git_show_file_at(commit: str, path: str) -> str:
|
|
return run(["git", "show", f"{commit}:{path}"])
|
|
|
|
|
|
def count_ips(text: str) -> int:
|
|
count = 0
|
|
for line in text.splitlines():
|
|
line = line.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
count += 1
|
|
return count
|
|
|
|
|
|
def collect_snapshots(target_file: str) -> List[Snapshot]:
|
|
commits = git_file_commits(target_file)
|
|
snaps: List[Snapshot] = []
|
|
for c in commits:
|
|
try:
|
|
content = git_show_file_at(c, target_file)
|
|
except subprocess.CalledProcessError:
|
|
# File may not exist in this commit (renames, etc.)
|
|
continue
|
|
cnt = count_ips(content)
|
|
ts = git_commit_timestamp(c)
|
|
snaps.append(Snapshot(commit=c, timestamp=ts, count=cnt))
|
|
return snaps
|
|
|
|
|
|
def write_csv(snaps: List[Snapshot], csv_path: str) -> None:
|
|
os.makedirs(os.path.dirname(csv_path), exist_ok=True)
|
|
with open(csv_path, "w", newline="", encoding="utf-8") as f:
|
|
w = csv.writer(f)
|
|
w.writerow(["date_iso", "timestamp", "commit", "count"]) # header
|
|
for s in snaps:
|
|
w.writerow([s.date_iso, s.timestamp, s.commit, s.count])
|
|
|
|
|
|
def write_chart(snaps: List[Snapshot], image_path: str) -> None:
|
|
try:
|
|
import matplotlib
|
|
matplotlib.use("Agg") # headless
|
|
import matplotlib.pyplot as plt
|
|
except Exception as e:
|
|
print(f"Skipping chart generation (matplotlib unavailable): {e}")
|
|
return
|
|
|
|
if not snaps:
|
|
print("No snapshots to chart.")
|
|
return
|
|
|
|
xs = [datetime.fromtimestamp(s.timestamp, tz=timezone.utc) for s in snaps]
|
|
ys = [s.count for s in snaps]
|
|
|
|
plt.figure(figsize=(8, 3))
|
|
plt.plot(xs, ys, marker="o", linewidth=1.5, markersize=2)
|
|
plt.title("WageNet IP Ban List Size Over Time")
|
|
plt.xlabel("Date (UTC)")
|
|
plt.ylabel("IP count")
|
|
plt.grid(True, linestyle=":", linewidth=0.5)
|
|
plt.tight_layout()
|
|
|
|
os.makedirs(os.path.dirname(image_path), exist_ok=True)
|
|
plt.savefig(image_path)
|
|
plt.close()
|
|
|
|
|
|
def main() -> int:
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("--file", default="banned.txt", help="Path to ban list in repo")
|
|
ap.add_argument("--csv", default="metrics/banlist_counts.csv", help="Output CSV path")
|
|
ap.add_argument(
|
|
"--image",
|
|
default="assets/banlist_history.svg",
|
|
help="Output image path (SVG/PNG)",
|
|
)
|
|
args = ap.parse_args()
|
|
|
|
# Ensure we are in a git repo
|
|
try:
|
|
run(["git", "rev-parse", "--is-inside-work-tree"])
|
|
except subprocess.CalledProcessError:
|
|
raise SystemExit("This script must run inside a git repository.")
|
|
|
|
snaps = collect_snapshots(args.file)
|
|
write_csv(snaps, args.csv)
|
|
if args.image:
|
|
write_chart(snaps, args.image)
|
|
print(f"Wrote {len(snaps)} snapshots to {args.csv}")
|
|
if args.image:
|
|
print(f"Updated chart at {args.image}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|
|
|