Add threat detection analysis

2025-07-19 02:12:24 -05:00 · 2025-07-19 02:12:24 -05:00 · 350445b167
commit 350445b167
parent 0354185bb9
2 changed files with 253 additions and 0 deletions
--- a/tests/test_analyze.py
+++ b/tests/test_analyze.py
@ -203,3 +203,122 @@ server {
    analyze.suggest_cache(threshold=2, json_output=True)
    out_json = json.loads(capsys.readouterr().out.strip())
    assert out_json == [{"host": "example.com", "path": "/foo", "misses": 3}]
+
+
+def setup_threat_db(path: Path) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    conn = sqlite3.connect(path)
+    cur = conn.cursor()
+    cur.execute(
+        """
+        CREATE TABLE logs (
+            id INTEGER PRIMARY KEY,
+            ip TEXT,
+            host TEXT,
+            time TEXT,
+            request TEXT,
+            status INTEGER,
+            bytes_sent INTEGER,
+            referer TEXT,
+            user_agent TEXT,
+            cache_status TEXT
+        )
+        """
+    )
+
+    # Previous hour traffic with no errors
+    for i in range(10):
+        cur.execute(
+            "INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status)"
+            " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
+            (
+                "2.2.2.2",
+                "example.com",
+                f"2024-01-01 11:{i:02d}:00",
+                "GET /ok HTTP/1.1",
+                200,
+                100,
+                "-",
+                "curl",
+                "MISS",
+            ),
+        )
+
+    # Recent hour with errors
+    for i in range(10):
+        cur.execute(
+            "INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status)"
+            " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
+            (
+                "3.3.3.3",
+                "example.com",
+                f"2024-01-01 12:{i:02d}:00",
+                "GET /fail HTTP/1.1",
+                500,
+                100,
+                "-",
+                "curl",
+                "MISS",
+            ),
+        )
+
+    # High traffic from single IP
+    for i in range(101):
+        cur.execute(
+            "INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status)"
+            " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
+            (
+                "1.1.1.1",
+                "example.net",
+                f"2024-01-01 12:{i % 10:02d}:30",
+                "GET /spam HTTP/1.1",
+                200,
+                100,
+                "-",
+                "curl",
+                "MISS",
+            ),
+        )
+
+    # New suspicious user agent
+    for i in range(15):
+        cur.execute(
+            "INSERT INTO logs (ip, host, time, request, status, bytes_sent, referer, user_agent, cache_status)"
+            " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
+            (
+                "4.4.4.4",
+                "example.org",
+                f"2024-01-01 12:{30 + i:02d}:45",
+                "GET /bot HTTP/1.1",
+                200,
+                100,
+                "-",
+                "newbot",
+                "MISS",
+            ),
+        )
+
+    conn.commit()
+    conn.close()
+
+
+def test_detect_threats(tmp_path, monkeypatch):
+    db_path = tmp_path / "database" / "ngxstat.db"
+    setup_threat_db(db_path)
+
+    out_dir = tmp_path / "analysis"
+    monkeypatch.setattr(analyze, "DB_PATH", db_path)
+    monkeypatch.setattr(analyze, "ANALYSIS_DIR", out_dir)
+
+    analyze.detect_threats(hours=1, ip_threshold=100)
+
+    report = json.loads((out_dir / "threat_report.json").read_text())
+
+    hosts = {e["host"] for e in report.get("error_spikes", [])}
+    assert "example.com" in hosts
+
+    ips = {e["ip"] for e in report.get("high_ip_requests", [])}
+    assert "1.1.1.1" in ips
+
+    agents = {e["user_agent"] for e in report.get("suspicious_agents", [])}
+    assert "newbot" in agents