seeding with initial scripts.

This commit is contained in:
Jordan Wages 2025-07-16 23:38:42 -05:00
commit e744276481
5 changed files with 112 additions and 0 deletions

6
.gitignore vendored
View file

@ -205,3 +205,9 @@ cython_debug/
marimo/_static/
marimo/_lsp/
__marimo__/
# SQLite database
database/*.db
database/*.sqlite
database/*.sqlite3

13
init.sh Executable file
View file

@ -0,0 +1,13 @@
#!/bin/bash
set -e
echo "[INFO] Creating virtual environment..."
python3 -m venv .venv
source .venv/bin/activate
echo "[INFO] Installing dependencies..."
pip install --upgrade pip
pip install -r requirements.txt || echo "[WARN] requirements.txt not found, skipping."
echo "[INFO] Running database setup..."
python scripts/init_db.py

9
requirements.txt Normal file
View file

@ -0,0 +1,9 @@
# Core tools
typer[all] # For CLI commands
Jinja2 # For static HTML generation
sqlite-utils # Optional: high-level SQLite handling
Flask # For optional lightweight API server
# Linting / formatting (optional but recommended)
black
flake8

13
run-import.sh Executable file
View file

@ -0,0 +1,13 @@
#!/bin/bash
set -e
echo "[INFO] Creating virtual environment..."
python3 -m venv .venv
source .venv/bin/activate
echo "[INFO] Installing dependencies..."
pip install --upgrade pip
pip install -r requirements.txt || echo "[WARN] requirements.txt not found, skipping."
echo "[INFO] Running database setup..."
python scripts/init_db.py

71
scripts/init_db.py Normal file
View file

@ -0,0 +1,71 @@
#!/usr/bin/env python3
import os
import sqlite3
import re
from pathlib import Path
LOG_DIR = "/var/log/nginx"
DB_FILE = "database/ngxstat.db"
LOG_FILE_PATTERN = re.compile(r'access\.log(\.\d+)?$')
LOG_FORMAT_REGEX = re.compile(
r'(?P<ip>\S+) - (?P<host>\S+) \[(?P<time>.*?)\] "(?P<request>.*?)" '
r'(?P<status>\d{3}) (?P<bytes_sent>\d+) "(?P<referer>.*?)" "(?P<user_agent>.*?)" (?P<cache_status>\S+)'
)
os.makedirs("database", exist_ok=True)
conn = sqlite3.connect(DB_FILE)
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS logs (
id INTEGER PRIMARY KEY,
ip TEXT,
host TEXT,
time TEXT,
request TEXT,
status INTEGER,
bytes_sent INTEGER,
referer TEXT,
user_agent TEXT,
cache_status TEXT
)
''')
conn.commit()
try:
log_files = sorted([
os.path.join(LOG_DIR, f)
for f in os.listdir(LOG_DIR)
if LOG_FILE_PATTERN.match(f)
])
except FileNotFoundError:
print(f"[ERROR] Log directory not found: {LOG_DIR}")
exit(1)
print(f"[INFO] Found {len(log_files)} log files.")
inserted = 0
for log_file in log_files:
print(f"[INFO] Parsing {log_file}...")
with open(log_file, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
match = LOG_FORMAT_REGEX.match(line.strip())
if match:
data = match.groupdict()
cursor.execute('''
INSERT INTO logs (
ip, host, time, request, status, bytes_sent,
referer, user_agent, cache_status
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
data["ip"], data["host"], data["time"], data["request"],
int(data["status"]), int(data["bytes_sent"]),
data["referer"], data["user_agent"], data["cache_status"]
))
inserted += 1
conn.commit()
conn.close()
print(f"[DONE] Inserted {inserted} entries into {DB_FILE}.")