From d69d0cae669ca96bcb4e1b584b1cc77c268f78a2 Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 28 Jun 2025 15:46:30 -0500 Subject: [PATCH] Merge AI caches and add cache key helper --- AGENTS.md | 7 ++ README.md | 9 +- background.js | 13 +-- modules/AiClassifier.js | 155 ++++++++++++++--------------- modules/ExpressionSearchFilter.jsm | 11 +- 5 files changed, 95 insertions(+), 100 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 87cfd6f..ad83d34 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -59,3 +59,10 @@ base64 data should be replaced with placeholders showing the byte size. The final string should have the headers, a brief attachment section, then the plain text extracted from all text parts. +### Cache Strategy + +`aiCache` persists classification results. Each key is the SHA‑256 hex of +`"|"` and maps to an object with `matched` and `reason` +properties. Any legacy `aiReasonCache` data is merged into `aiCache` the first +time the add-on loads after an update. + diff --git a/README.md b/README.md index 299ecdc..ed362eb 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ message meets a specified criterion. - **Custom system prompts** – tailor the instructions sent to the model for more precise results. - **Filter editor integration** – patches Thunderbird's filter editor to accept text criteria for AI classification. -- **Persistent result caching** – classification results are saved to disk so messages aren't re-evaluated across restarts. +- **Persistent result caching** – classification results and reasoning are saved to disk so messages aren't re-evaluated across restarts. - **Advanced parameters** – tune generation settings like temperature, top‑p and more from the options page. - **Debug logging** – optional colorized logs help troubleshoot interactions with the AI service. - **Automatic rules** – create rules that tag or move new messages based on AI classification. @@ -25,6 +25,13 @@ message meets a specified criterion. - **Status icons** – toolbar icons show when classification is in progress and briefly display success or error states. - **Packaging script** – `build-xpi.ps1` builds an XPI ready for installation. +### Cache Storage + +Classification results are stored under the `aiCache` key in extension storage. +Each entry maps a SHA‑256 hash of `"|"` to an object +containing `matched` and `reason` fields. Older installations with a separate +`aiReasonCache` will be migrated automatically on startup. + ## Architecture Overview Sortana is implemented entirely with standard WebExtension scripts—no custom experiment code is required: diff --git a/background.js b/background.js index 8e235d4..ab37701 100644 --- a/background.js +++ b/background.js @@ -43,10 +43,6 @@ function showTransientIcon(path, delay = 1500) { iconTimer = setTimeout(updateActionIcon, delay); } -async function sha256Hex(str) { - const buf = await crypto.subtle.digest('SHA-256', new TextEncoder().encode(str)); - return Array.from(new Uint8Array(buf), b => b.toString(16).padStart(2, '0')).join(''); -} function byteSize(str) { return new TextEncoder().encode(str || "").length; @@ -117,7 +113,7 @@ async function applyAiRules(idsInput) { const text = buildEmailText(full); for (const rule of aiRules) { - const cacheKey = await sha256Hex(`${id}|${rule.criterion}`); + const cacheKey = await AiClassifier.buildCacheKey(id, rule.criterion); const matched = await AiClassifier.classifyText(text, rule.criterion, cacheKey); if (matched) { for (const act of (rule.actions || [])) { @@ -168,7 +164,7 @@ async function clearCacheForMessages(idsInput) { for (const msg of ids) { const id = msg?.id ?? msg; for (const rule of aiRules) { - const key = await sha256Hex(`${id}|${rule.criterion}`); + const key = await AiClassifier.buildCacheKey(id, rule.criterion); keys.push(key); } } @@ -192,6 +188,7 @@ async function clearCacheForMessages(idsInput) { const store = await storage.local.get(["endpoint", "templateName", "customTemplate", "customSystemPrompt", "aiParams", "debugLogging", "aiRules"]); logger.setDebug(store.debugLogging); await AiClassifier.setConfig(store); + await AiClassifier.init(); aiRules = Array.isArray(store.aiRules) ? store.aiRules.map(r => { if (r.actions) return r; const actions = []; @@ -331,7 +328,7 @@ async function clearCacheForMessages(idsInput) { } const reasons = []; for (const rule of aiRules) { - const key = await sha256Hex(`${id}|${rule.criterion}`); + const key = await AiClassifier.buildCacheKey(id, rule.criterion); const reason = AiClassifier.getReason(key); if (reason) { reasons.push({ criterion: rule.criterion, reason }); @@ -361,7 +358,7 @@ async function clearCacheForMessages(idsInput) { } const results = []; for (const rule of aiRules) { - const key = await sha256Hex(`${id}|${rule.criterion}`); + const key = await AiClassifier.buildCacheKey(id, rule.criterion); const matched = AiClassifier.getCachedResult(key); const reason = AiClassifier.getReason(key); if (matched !== null || reason) { diff --git a/modules/AiClassifier.js b/modules/AiClassifier.js index cef1842..e123288 100644 --- a/modules/AiClassifier.js +++ b/modules/AiClassifier.js @@ -49,8 +49,39 @@ let gAiParams = { let gCache = new Map(); let gCacheLoaded = false; -let gReasonCache = new Map(); -let gReasonCacheLoaded = false; + +function sha256HexSync(str) { + try { + const hasher = Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash); + hasher.init(Ci.nsICryptoHash.SHA256); + const data = new TextEncoder().encode(str); + hasher.update(data, data.length); + const binary = hasher.finish(false); + return Array.from(binary, c => ("0" + c.charCodeAt(0).toString(16)).slice(-2)).join(""); + } catch (e) { + aiLog(`sha256HexSync failed`, { level: 'error' }, e); + return ""; + } +} + +async function sha256Hex(str) { + if (typeof crypto?.subtle?.digest === "function") { + const buf = await crypto.subtle.digest("SHA-256", new TextEncoder().encode(str)); + return Array.from(new Uint8Array(buf), b => b.toString(16).padStart(2, "0")).join(""); + } + return sha256HexSync(str); +} + +function buildCacheKeySync(id, criterion) { + return sha256HexSync(`${id}|${criterion}`); +} + +async function buildCacheKey(id, criterion) { + if (Services) { + return buildCacheKeySync(id, criterion); + } + return sha256Hex(`${id}|${criterion}`); +} async function loadCache() { if (gCacheLoaded) { @@ -58,16 +89,29 @@ async function loadCache() { } aiLog(`[AiClassifier] Loading cache`, {debug: true}); try { - const { aiCache } = await storage.local.get("aiCache"); + const { aiCache, aiReasonCache } = await storage.local.get(["aiCache", "aiReasonCache"]); if (aiCache) { for (let [k, v] of Object.entries(aiCache)) { - aiLog(`[AiClassifier] ⮡ Loaded entry '${k}' → ${v}`, {debug: true}); - gCache.set(k, v); + if (v && typeof v === "object") { + gCache.set(k, { matched: v.matched ?? null, reason: v.reason || "" }); + } else { + gCache.set(k, { matched: v, reason: "" }); + } } aiLog(`[AiClassifier] Loaded ${gCache.size} cache entries`, {debug: true}); } else { aiLog(`[AiClassifier] Cache is empty`, {debug: true}); } + if (aiReasonCache) { + aiLog(`[AiClassifier] Migrating ${Object.keys(aiReasonCache).length} reason entries`, {debug: true}); + for (let [k, reason] of Object.entries(aiReasonCache)) { + let entry = gCache.get(k) || { matched: null, reason: "" }; + entry.reason = reason; + gCache.set(k, entry); + } + await storage.local.remove("aiReasonCache"); + await storage.local.set({ aiCache: Object.fromEntries(gCache) }); + } } catch (e) { aiLog(`Failed to load cache`, {level: 'error'}, e); } @@ -96,49 +140,6 @@ async function saveCache(updatedKey, updatedValue) { } } -async function loadReasonCache() { - if (gReasonCacheLoaded) { - return; - } - aiLog(`[AiClassifier] Loading reason cache`, {debug: true}); - try { - const { aiReasonCache } = await storage.local.get("aiReasonCache"); - if (aiReasonCache) { - for (let [k, v] of Object.entries(aiReasonCache)) { - aiLog(`[AiClassifier] ⮡ Loaded reason '${k}'`, {debug: true}); - gReasonCache.set(k, v); - } - aiLog(`[AiClassifier] Loaded ${gReasonCache.size} reason entries`, {debug: true}); - } else { - aiLog(`[AiClassifier] Reason cache is empty`, {debug: true}); - } - } catch (e) { - aiLog(`Failed to load reason cache`, {level: 'error'}, e); - } - gReasonCacheLoaded = true; -} - -function loadReasonCacheSync() { - if (!gReasonCacheLoaded) { - if (!Services?.tm?.spinEventLoopUntil) { - throw new Error("loadReasonCacheSync requires Services"); - } - let done = false; - loadReasonCache().finally(() => { done = true; }); - Services.tm.spinEventLoopUntil(() => done); - } -} - -async function saveReasonCache(updatedKey, updatedValue) { - if (typeof updatedKey !== "undefined") { - aiLog(`[AiClassifier] ⮡ Persisting reason '${updatedKey}'`, {debug: true}); - } - try { - await storage.local.set({ aiReasonCache: Object.fromEntries(gReasonCache) }); - } catch (e) { - aiLog(`Failed to save reason cache`, {level: 'error'}, e); - } -} async function loadTemplate(name) { try { @@ -220,26 +221,27 @@ function getCachedResult(cacheKey) { if (Services?.tm?.spinEventLoopUntil) { loadCacheSync(); } else { - // In non-privileged contexts we can't block, so bail out early. return null; } } if (cacheKey && gCache.has(cacheKey)) { aiLog(`[AiClassifier] Cache hit for key: ${cacheKey}`, {debug: true}); - return gCache.get(cacheKey); + const entry = gCache.get(cacheKey); + return entry?.matched ?? null; } return null; } function getReason(cacheKey) { - if (!gReasonCacheLoaded) { + if (!gCacheLoaded) { if (Services?.tm?.spinEventLoopUntil) { - loadReasonCacheSync(); + loadCacheSync(); } else { return null; } } - return cacheKey ? gReasonCache.get(cacheKey) || null : null; + const entry = gCache.get(cacheKey); + return cacheKey && entry ? entry.reason || null : null; } function buildPayload(text, criterion) { @@ -260,20 +262,20 @@ function parseMatch(result) { return { matched, reason: thinkText }; } -function cacheResult(cacheKey, matched) { - if (cacheKey) { - aiLog(`[AiClassifier] Caching entry '${cacheKey}' → ${matched}`, {debug: true}); - gCache.set(cacheKey, matched); - saveCache(cacheKey, matched); +function cacheEntry(cacheKey, matched, reason) { + if (!cacheKey) { + return; } -} - -function cacheReason(cacheKey, reason) { - if (cacheKey) { - aiLog(`[AiClassifier] Caching reason '${cacheKey}'`, {debug: true}); - gReasonCache.set(cacheKey, reason); - saveReasonCache(cacheKey, reason); + aiLog(`[AiClassifier] Caching entry '${cacheKey}'`, {debug: true}); + const entry = gCache.get(cacheKey) || { matched: null, reason: "" }; + if (typeof matched === "boolean") { + entry.matched = matched; } + if (typeof reason === "string") { + entry.reason = reason; + } + gCache.set(cacheKey, entry); + saveCache(cacheKey, entry); } async function removeCacheEntries(keys = []) { @@ -289,14 +291,9 @@ async function removeCacheEntries(keys = []) { removed = true; aiLog(`[AiClassifier] Removed cache entry '${key}'`, {debug: true}); } - if (gReasonCache.delete(key)) { - removed = true; - aiLog(`[AiClassifier] Removed reason entry '${key}'`, {debug: true}); - } } if (removed) { await saveCache(); - await saveReasonCache(); } } @@ -304,9 +301,6 @@ function classifyTextSync(text, criterion, cacheKey = null) { if (!Services?.tm?.spinEventLoopUntil) { throw new Error("classifyTextSync requires Services"); } - if (!gReasonCacheLoaded) { - loadReasonCacheSync(); - } const cached = getCachedResult(cacheKey); if (cached !== null) { return cached; @@ -329,8 +323,7 @@ function classifyTextSync(text, criterion, cacheKey = null) { const json = await response.json(); aiLog(`[AiClassifier] Received response:`, {debug: true}, json); result = parseMatch(json); - cacheResult(cacheKey, result.matched); - cacheReason(cacheKey, result.reason); + cacheEntry(cacheKey, result.matched, result.reason); result = result.matched; } else { aiLog(`HTTP status ${response.status}`, {level: 'warn'}); @@ -351,9 +344,6 @@ async function classifyText(text, criterion, cacheKey = null) { if (!gCacheLoaded) { await loadCache(); } - if (!gReasonCacheLoaded) { - await loadReasonCache(); - } const cached = getCachedResult(cacheKey); if (cached !== null) { return cached; @@ -378,8 +368,7 @@ async function classifyText(text, criterion, cacheKey = null) { const result = await response.json(); aiLog(`[AiClassifier] Received response:`, {debug: true}, result); const parsed = parseMatch(result); - cacheResult(cacheKey, parsed.matched); - cacheReason(cacheKey, parsed.reason); + cacheEntry(cacheKey, parsed.matched, parsed.reason); return parsed.matched; } catch (e) { aiLog(`HTTP request failed`, {level: 'error'}, e); @@ -387,4 +376,8 @@ async function classifyText(text, criterion, cacheKey = null) { } } -export { classifyText, classifyTextSync, setConfig, removeCacheEntries, getReason, getCachedResult }; +async function init() { + await loadCache(); +} + +export { classifyText, classifyTextSync, setConfig, removeCacheEntries, getReason, getCachedResult, buildCacheKey, buildCacheKeySync, init }; diff --git a/modules/ExpressionSearchFilter.jsm b/modules/ExpressionSearchFilter.jsm index 791c181..b9998a9 100644 --- a/modules/ExpressionSearchFilter.jsm +++ b/modules/ExpressionSearchFilter.jsm @@ -5,15 +5,6 @@ var { aiLog } = ChromeUtils.import("resource://aifilter/modules/logger.jsm"); var AiClassifier = ChromeUtils.importESModule("resource://aifilter/modules/AiClassifier.js"); var { getPlainText } = ChromeUtils.import("resource://aifilter/modules/messageUtils.jsm"); -function sha256Hex(str) { - const hasher = Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash); - hasher.init(Ci.nsICryptoHash.SHA256); - const data = new TextEncoder().encode(str); - hasher.update(data, data.length); - const binary = hasher.finish(false); - return Array.from(binary, c => ("0" + c.charCodeAt(0).toString(16)).slice(-2)).join(""); -} - var EXPORTED_SYMBOLS = ["AIFilter", "ClassificationTerm"]; class CustomerTermBase { @@ -70,7 +61,7 @@ class ClassificationTerm extends CustomerTermBase { op === Ci.nsMsgSearchOp.DoesntMatch ? "doesn't match" : `unknown (${op})`; aiLog(`[ExpressionSearchFilter] Matching message ${msgHdr.messageId} using op "${opName}" and value "${value}"`, {debug: true}); - let key = [msgHdr.messageId, op, value].map(sha256Hex).join("|"); + let key = AiClassifier.buildCacheKeySync(msgHdr.messageId, value); let body = getPlainText(msgHdr); let matched = AiClassifier.classifyTextSync(body, value, key);