Merge pull request #53 from wagesj45/codex/refactor-caching-strategy-and-implementation

Unify AI caches and add cache key helper
2025-06-28 15:46:46 -05:00 · 2025-06-28 15:46:46 -05:00 · f7bcb88a14
commit f7bcb88a14
parent b470fcd600 d69d0cae66
5 changed files with 95 additions and 100 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@ -59,3 +59,10 @@ base64 data should be replaced with placeholders showing the byte size. The
 final string should have the headers, a brief attachment section, then the plain
 text extracted from all text parts.

+### Cache Strategy
+
+`aiCache` persists classification results. Each key is the SHA‑256 hex of
+`"<message id>|<criterion>"` and maps to an object with `matched` and `reason`
+properties. Any legacy `aiReasonCache` data is merged into `aiCache` the first
+time the add-on loads after an update.
+
--- a/README.md
+++ b/README.md
@ -16,7 +16,7 @@ message meets a specified criterion.
 - **Custom system prompts** – tailor the instructions sent to the model for more precise results.
 - **Filter editor integration** – patches Thunderbird's filter editor to accept
  text criteria for AI classification.
- **Persistent result caching** – classification results are saved to disk so messages aren't re-evaluated across restarts.
+- **Persistent result caching** – classification results and reasoning are saved to disk so messages aren't re-evaluated across restarts.
 - **Advanced parameters** – tune generation settings like temperature, top‑p and more from the options page.
 - **Debug logging** – optional colorized logs help troubleshoot interactions with the AI service.
 - **Automatic rules** – create rules that tag or move new messages based on AI classification.
@ -25,6 +25,13 @@ message meets a specified criterion.
 - **Status icons** – toolbar icons show when classification is in progress and briefly display success or error states.
 - **Packaging script** – `build-xpi.ps1` builds an XPI ready for installation.

+### Cache Storage
+
+Classification results are stored under the `aiCache` key in extension storage.
+Each entry maps a SHA‑256 hash of `"<message id>|<criterion>"` to an object
+containing `matched` and `reason` fields. Older installations with a separate
+`aiReasonCache` will be migrated automatically on startup.
+
 ## Architecture Overview

 Sortana is implemented entirely with standard WebExtension scripts—no custom experiment code is required:
--- a/background.js
+++ b/background.js
@ -43,10 +43,6 @@ function showTransientIcon(path, delay = 1500) {
    iconTimer = setTimeout(updateActionIcon, delay);
 }

-async function sha256Hex(str) {
-    const buf = await crypto.subtle.digest('SHA-256', new TextEncoder().encode(str));
-    return Array.from(new Uint8Array(buf), b => b.toString(16).padStart(2, '0')).join('');
-}

 function byteSize(str) {
    return new TextEncoder().encode(str || "").length;
@ -117,7 +113,7 @@ async function applyAiRules(idsInput) {
                const text = buildEmailText(full);

                for (const rule of aiRules) {
-                    const cacheKey = await sha256Hex(`${id}|${rule.criterion}`);
+                    const cacheKey = await AiClassifier.buildCacheKey(id, rule.criterion);
                    const matched = await AiClassifier.classifyText(text, rule.criterion, cacheKey);
                    if (matched) {
                        for (const act of (rule.actions || [])) {
@ -168,7 +164,7 @@ async function clearCacheForMessages(idsInput) {
    for (const msg of ids) {
        const id = msg?.id ?? msg;
        for (const rule of aiRules) {
-            const key = await sha256Hex(`${id}|${rule.criterion}`);
+            const key = await AiClassifier.buildCacheKey(id, rule.criterion);
            keys.push(key);
        }
    }
@ -192,6 +188,7 @@ async function clearCacheForMessages(idsInput) {
        const store = await storage.local.get(["endpoint", "templateName", "customTemplate", "customSystemPrompt", "aiParams", "debugLogging", "aiRules"]);
        logger.setDebug(store.debugLogging);
        await AiClassifier.setConfig(store);
+        await AiClassifier.init();
        aiRules = Array.isArray(store.aiRules) ? store.aiRules.map(r => {
            if (r.actions) return r;
            const actions = [];
@ -331,7 +328,7 @@ async function clearCacheForMessages(idsInput) {
            }
            const reasons = [];
            for (const rule of aiRules) {
-                const key = await sha256Hex(`${id}|${rule.criterion}`);
+                const key = await AiClassifier.buildCacheKey(id, rule.criterion);
                const reason = AiClassifier.getReason(key);
                if (reason) {
                    reasons.push({ criterion: rule.criterion, reason });
@ -361,7 +358,7 @@ async function clearCacheForMessages(idsInput) {
            }
            const results = [];
            for (const rule of aiRules) {
-                const key = await sha256Hex(`${id}|${rule.criterion}`);
+                const key = await AiClassifier.buildCacheKey(id, rule.criterion);
                const matched = AiClassifier.getCachedResult(key);
                const reason = AiClassifier.getReason(key);
                if (matched !== null || reason) {
--- a/modules/AiClassifier.js
+++ b/modules/AiClassifier.js
@ -49,8 +49,39 @@ let gAiParams = {

 let gCache = new Map();
 let gCacheLoaded = false;
-let gReasonCache = new Map();
-let gReasonCacheLoaded = false;
+
+function sha256HexSync(str) {
+  try {
+    const hasher = Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash);
+    hasher.init(Ci.nsICryptoHash.SHA256);
+    const data = new TextEncoder().encode(str);
+    hasher.update(data, data.length);
+    const binary = hasher.finish(false);
+    return Array.from(binary, c => ("0" + c.charCodeAt(0).toString(16)).slice(-2)).join("");
+  } catch (e) {
+    aiLog(`sha256HexSync failed`, { level: 'error' }, e);
+    return "";
+  }
+}
+
+async function sha256Hex(str) {
+  if (typeof crypto?.subtle?.digest === "function") {
+    const buf = await crypto.subtle.digest("SHA-256", new TextEncoder().encode(str));
+    return Array.from(new Uint8Array(buf), b => b.toString(16).padStart(2, "0")).join("");
+  }
+  return sha256HexSync(str);
+}
+
+function buildCacheKeySync(id, criterion) {
+  return sha256HexSync(`${id}|${criterion}`);
+}
+
+async function buildCacheKey(id, criterion) {
+  if (Services) {
+    return buildCacheKeySync(id, criterion);
+  }
+  return sha256Hex(`${id}|${criterion}`);
+}

 async function loadCache() {
  if (gCacheLoaded) {
@ -58,16 +89,29 @@ async function loadCache() {
  }
  aiLog(`[AiClassifier] Loading cache`, {debug: true});
  try {
-    const { aiCache } = await storage.local.get("aiCache");
+    const { aiCache, aiReasonCache } = await storage.local.get(["aiCache", "aiReasonCache"]);
    if (aiCache) {
      for (let [k, v] of Object.entries(aiCache)) {
-        aiLog(`[AiClassifier] ⮡ Loaded entry '${k}' → ${v}`, {debug: true});
-        gCache.set(k, v);
+        if (v && typeof v === "object") {
+          gCache.set(k, { matched: v.matched ?? null, reason: v.reason || "" });
+        } else {
+          gCache.set(k, { matched: v, reason: "" });
+        }
      }
      aiLog(`[AiClassifier] Loaded ${gCache.size} cache entries`, {debug: true});
    } else {
      aiLog(`[AiClassifier] Cache is empty`, {debug: true});
    }
+    if (aiReasonCache) {
+      aiLog(`[AiClassifier] Migrating ${Object.keys(aiReasonCache).length} reason entries`, {debug: true});
+      for (let [k, reason] of Object.entries(aiReasonCache)) {
+        let entry = gCache.get(k) || { matched: null, reason: "" };
+        entry.reason = reason;
+        gCache.set(k, entry);
+      }
+      await storage.local.remove("aiReasonCache");
+      await storage.local.set({ aiCache: Object.fromEntries(gCache) });
+    }
  } catch (e) {
    aiLog(`Failed to load cache`, {level: 'error'}, e);
  }
@ -96,49 +140,6 @@ async function saveCache(updatedKey, updatedValue) {
  }
 }

-async function loadReasonCache() {
-  if (gReasonCacheLoaded) {
-    return;
-  }
-  aiLog(`[AiClassifier] Loading reason cache`, {debug: true});
-  try {
-    const { aiReasonCache } = await storage.local.get("aiReasonCache");
-    if (aiReasonCache) {
-      for (let [k, v] of Object.entries(aiReasonCache)) {
-        aiLog(`[AiClassifier] ⮡ Loaded reason '${k}'`, {debug: true});
-        gReasonCache.set(k, v);
-      }
-      aiLog(`[AiClassifier] Loaded ${gReasonCache.size} reason entries`, {debug: true});
-    } else {
-      aiLog(`[AiClassifier] Reason cache is empty`, {debug: true});
-    }
-  } catch (e) {
-    aiLog(`Failed to load reason cache`, {level: 'error'}, e);
-  }
-  gReasonCacheLoaded = true;
-}
-
-function loadReasonCacheSync() {
-  if (!gReasonCacheLoaded) {
-    if (!Services?.tm?.spinEventLoopUntil) {
-      throw new Error("loadReasonCacheSync requires Services");
-    }
-    let done = false;
-    loadReasonCache().finally(() => { done = true; });
-    Services.tm.spinEventLoopUntil(() => done);
-  }
-}
-
-async function saveReasonCache(updatedKey, updatedValue) {
-  if (typeof updatedKey !== "undefined") {
-    aiLog(`[AiClassifier] ⮡ Persisting reason '${updatedKey}'`, {debug: true});
-  }
-  try {
-    await storage.local.set({ aiReasonCache: Object.fromEntries(gReasonCache) });
-  } catch (e) {
-    aiLog(`Failed to save reason cache`, {level: 'error'}, e);
-  }
-}

 async function loadTemplate(name) {
  try {
@ -220,26 +221,27 @@ function getCachedResult(cacheKey) {
    if (Services?.tm?.spinEventLoopUntil) {
      loadCacheSync();
    } else {
-      // In non-privileged contexts we can't block, so bail out early.
      return null;
    }
  }
  if (cacheKey && gCache.has(cacheKey)) {
    aiLog(`[AiClassifier] Cache hit for key: ${cacheKey}`, {debug: true});
-    return gCache.get(cacheKey);
+    const entry = gCache.get(cacheKey);
+    return entry?.matched ?? null;
  }
  return null;
 }

 function getReason(cacheKey) {
-  if (!gReasonCacheLoaded) {
+  if (!gCacheLoaded) {
    if (Services?.tm?.spinEventLoopUntil) {
-      loadReasonCacheSync();
+      loadCacheSync();
    } else {
      return null;
    }
  }
-  return cacheKey ? gReasonCache.get(cacheKey) || null : null;
+  const entry = gCache.get(cacheKey);
+  return cacheKey && entry ? entry.reason || null : null;
 }

 function buildPayload(text, criterion) {
@ -260,20 +262,20 @@ function parseMatch(result) {
  return { matched, reason: thinkText };
 }

-function cacheResult(cacheKey, matched) {
-  if (cacheKey) {
-    aiLog(`[AiClassifier] Caching entry '${cacheKey}' → ${matched}`, {debug: true});
-    gCache.set(cacheKey, matched);
-    saveCache(cacheKey, matched);
+function cacheEntry(cacheKey, matched, reason) {
+  if (!cacheKey) {
+    return;
  }
-}
-
-function cacheReason(cacheKey, reason) {
-  if (cacheKey) {
-    aiLog(`[AiClassifier] Caching reason '${cacheKey}'`, {debug: true});
-    gReasonCache.set(cacheKey, reason);
-    saveReasonCache(cacheKey, reason);
+  aiLog(`[AiClassifier] Caching entry '${cacheKey}'`, {debug: true});
+  const entry = gCache.get(cacheKey) || { matched: null, reason: "" };
+  if (typeof matched === "boolean") {
+    entry.matched = matched;
  }
+  if (typeof reason === "string") {
+    entry.reason = reason;
+  }
+  gCache.set(cacheKey, entry);
+  saveCache(cacheKey, entry);
 }

 async function removeCacheEntries(keys = []) {
@ -289,14 +291,9 @@ async function removeCacheEntries(keys = []) {
      removed = true;
      aiLog(`[AiClassifier] Removed cache entry '${key}'`, {debug: true});
    }
-    if (gReasonCache.delete(key)) {
-      removed = true;
-      aiLog(`[AiClassifier] Removed reason entry '${key}'`, {debug: true});
-    }
  }
  if (removed) {
    await saveCache();
-    await saveReasonCache();
  }
 }

@ -304,9 +301,6 @@ function classifyTextSync(text, criterion, cacheKey = null) {
  if (!Services?.tm?.spinEventLoopUntil) {
    throw new Error("classifyTextSync requires Services");
  }
-  if (!gReasonCacheLoaded) {
-    loadReasonCacheSync();
-  }
  const cached = getCachedResult(cacheKey);
  if (cached !== null) {
    return cached;
@ -329,8 +323,7 @@ function classifyTextSync(text, criterion, cacheKey = null) {
        const json = await response.json();
        aiLog(`[AiClassifier] Received response:`, {debug: true}, json);
        result = parseMatch(json);
-        cacheResult(cacheKey, result.matched);
-        cacheReason(cacheKey, result.reason);
+        cacheEntry(cacheKey, result.matched, result.reason);
        result = result.matched;
      } else {
        aiLog(`HTTP status ${response.status}`, {level: 'warn'});
@ -351,9 +344,6 @@ async function classifyText(text, criterion, cacheKey = null) {
  if (!gCacheLoaded) {
    await loadCache();
  }
-  if (!gReasonCacheLoaded) {
-    await loadReasonCache();
-  }
  const cached = getCachedResult(cacheKey);
  if (cached !== null) {
    return cached;
@ -378,8 +368,7 @@ async function classifyText(text, criterion, cacheKey = null) {
    const result = await response.json();
    aiLog(`[AiClassifier] Received response:`, {debug: true}, result);
    const parsed = parseMatch(result);
-    cacheResult(cacheKey, parsed.matched);
-    cacheReason(cacheKey, parsed.reason);
+    cacheEntry(cacheKey, parsed.matched, parsed.reason);
    return parsed.matched;
  } catch (e) {
    aiLog(`HTTP request failed`, {level: 'error'}, e);
@ -387,4 +376,8 @@ async function classifyText(text, criterion, cacheKey = null) {
  }
 }

-export { classifyText, classifyTextSync, setConfig, removeCacheEntries, getReason, getCachedResult };
+async function init() {
+  await loadCache();
+}
+
+export { classifyText, classifyTextSync, setConfig, removeCacheEntries, getReason, getCachedResult, buildCacheKey, buildCacheKeySync, init };
--- a/modules/ExpressionSearchFilter.jsm
+++ b/modules/ExpressionSearchFilter.jsm
@ -5,15 +5,6 @@ var { aiLog } = ChromeUtils.import("resource://aifilter/modules/logger.jsm");
 var AiClassifier    = ChromeUtils.importESModule("resource://aifilter/modules/AiClassifier.js");
 var { getPlainText }    = ChromeUtils.import("resource://aifilter/modules/messageUtils.jsm");

-function sha256Hex(str) {
-  const hasher = Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash);
-  hasher.init(Ci.nsICryptoHash.SHA256);
-  const data = new TextEncoder().encode(str);
-  hasher.update(data, data.length);
-  const binary = hasher.finish(false);
-  return Array.from(binary, c => ("0" + c.charCodeAt(0).toString(16)).slice(-2)).join("");
-}
-
 var EXPORTED_SYMBOLS = ["AIFilter", "ClassificationTerm"];

 class CustomerTermBase {
@ -70,7 +61,7 @@ class ClassificationTerm extends CustomerTermBase {
                   op === Ci.nsMsgSearchOp.DoesntMatch ? "doesn't match" : `unknown (${op})`;
    aiLog(`[ExpressionSearchFilter] Matching message ${msgHdr.messageId} using op "${opName}" and value "${value}"`, {debug: true});

-    let key = [msgHdr.messageId, op, value].map(sha256Hex).join("|");
+    let key = AiClassifier.buildCacheKeySync(msgHdr.messageId, value);
    let body = getPlainText(msgHdr);

    let matched = AiClassifier.classifyTextSync(body, value, key);