Merge pull request #53 from wagesj45/codex/refactor-caching-strategy-and-implementation
Unify AI caches and add cache key helper
This commit is contained in:
		
				commit
				
					
						f7bcb88a14
					
				
			
		
					 5 changed files with 95 additions and 100 deletions
				
			
		|  | @ -59,3 +59,10 @@ base64 data should be replaced with placeholders showing the byte size. The | |||
| final string should have the headers, a brief attachment section, then the plain | ||||
| text extracted from all text parts. | ||||
| 
 | ||||
| ### Cache Strategy | ||||
| 
 | ||||
| `aiCache` persists classification results. Each key is the SHA‑256 hex of | ||||
| `"<message id>|<criterion>"` and maps to an object with `matched` and `reason` | ||||
| properties. Any legacy `aiReasonCache` data is merged into `aiCache` the first | ||||
| time the add-on loads after an update. | ||||
| 
 | ||||
|  |  | |||
|  | @ -16,7 +16,7 @@ message meets a specified criterion. | |||
| - **Custom system prompts** – tailor the instructions sent to the model for more precise results. | ||||
| - **Filter editor integration** – patches Thunderbird's filter editor to accept | ||||
|   text criteria for AI classification. | ||||
| - **Persistent result caching** – classification results are saved to disk so messages aren't re-evaluated across restarts. | ||||
| - **Persistent result caching** – classification results and reasoning are saved to disk so messages aren't re-evaluated across restarts. | ||||
| - **Advanced parameters** – tune generation settings like temperature, top‑p and more from the options page. | ||||
| - **Debug logging** – optional colorized logs help troubleshoot interactions with the AI service. | ||||
| - **Automatic rules** – create rules that tag or move new messages based on AI classification. | ||||
|  | @ -25,6 +25,13 @@ message meets a specified criterion. | |||
| - **Status icons** – toolbar icons show when classification is in progress and briefly display success or error states. | ||||
| - **Packaging script** – `build-xpi.ps1` builds an XPI ready for installation. | ||||
| 
 | ||||
| ### Cache Storage | ||||
| 
 | ||||
| Classification results are stored under the `aiCache` key in extension storage. | ||||
| Each entry maps a SHA‑256 hash of `"<message id>|<criterion>"` to an object | ||||
| containing `matched` and `reason` fields. Older installations with a separate | ||||
| `aiReasonCache` will be migrated automatically on startup. | ||||
| 
 | ||||
| ## Architecture Overview | ||||
| 
 | ||||
| Sortana is implemented entirely with standard WebExtension scripts—no custom experiment code is required: | ||||
|  |  | |||
|  | @ -43,10 +43,6 @@ function showTransientIcon(path, delay = 1500) { | |||
|     iconTimer = setTimeout(updateActionIcon, delay); | ||||
| } | ||||
| 
 | ||||
| async function sha256Hex(str) { | ||||
|     const buf = await crypto.subtle.digest('SHA-256', new TextEncoder().encode(str)); | ||||
|     return Array.from(new Uint8Array(buf), b => b.toString(16).padStart(2, '0')).join(''); | ||||
| } | ||||
| 
 | ||||
| function byteSize(str) { | ||||
|     return new TextEncoder().encode(str || "").length; | ||||
|  | @ -117,7 +113,7 @@ async function applyAiRules(idsInput) { | |||
|                 const text = buildEmailText(full); | ||||
| 
 | ||||
|                 for (const rule of aiRules) { | ||||
|                     const cacheKey = await sha256Hex(`${id}|${rule.criterion}`); | ||||
|                     const cacheKey = await AiClassifier.buildCacheKey(id, rule.criterion); | ||||
|                     const matched = await AiClassifier.classifyText(text, rule.criterion, cacheKey); | ||||
|                     if (matched) { | ||||
|                         for (const act of (rule.actions || [])) { | ||||
|  | @ -168,7 +164,7 @@ async function clearCacheForMessages(idsInput) { | |||
|     for (const msg of ids) { | ||||
|         const id = msg?.id ?? msg; | ||||
|         for (const rule of aiRules) { | ||||
|             const key = await sha256Hex(`${id}|${rule.criterion}`); | ||||
|             const key = await AiClassifier.buildCacheKey(id, rule.criterion); | ||||
|             keys.push(key); | ||||
|         } | ||||
|     } | ||||
|  | @ -192,6 +188,7 @@ async function clearCacheForMessages(idsInput) { | |||
|         const store = await storage.local.get(["endpoint", "templateName", "customTemplate", "customSystemPrompt", "aiParams", "debugLogging", "aiRules"]); | ||||
|         logger.setDebug(store.debugLogging); | ||||
|         await AiClassifier.setConfig(store); | ||||
|         await AiClassifier.init(); | ||||
|         aiRules = Array.isArray(store.aiRules) ? store.aiRules.map(r => { | ||||
|             if (r.actions) return r; | ||||
|             const actions = []; | ||||
|  | @ -331,7 +328,7 @@ async function clearCacheForMessages(idsInput) { | |||
|             } | ||||
|             const reasons = []; | ||||
|             for (const rule of aiRules) { | ||||
|                 const key = await sha256Hex(`${id}|${rule.criterion}`); | ||||
|                 const key = await AiClassifier.buildCacheKey(id, rule.criterion); | ||||
|                 const reason = AiClassifier.getReason(key); | ||||
|                 if (reason) { | ||||
|                     reasons.push({ criterion: rule.criterion, reason }); | ||||
|  | @ -361,7 +358,7 @@ async function clearCacheForMessages(idsInput) { | |||
|             } | ||||
|             const results = []; | ||||
|             for (const rule of aiRules) { | ||||
|                 const key = await sha256Hex(`${id}|${rule.criterion}`); | ||||
|                 const key = await AiClassifier.buildCacheKey(id, rule.criterion); | ||||
|                 const matched = AiClassifier.getCachedResult(key); | ||||
|                 const reason = AiClassifier.getReason(key); | ||||
|                 if (matched !== null || reason) { | ||||
|  |  | |||
|  | @ -49,8 +49,39 @@ let gAiParams = { | |||
| 
 | ||||
| let gCache = new Map(); | ||||
| let gCacheLoaded = false; | ||||
| let gReasonCache = new Map(); | ||||
| let gReasonCacheLoaded = false; | ||||
| 
 | ||||
| function sha256HexSync(str) { | ||||
|   try { | ||||
|     const hasher = Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash); | ||||
|     hasher.init(Ci.nsICryptoHash.SHA256); | ||||
|     const data = new TextEncoder().encode(str); | ||||
|     hasher.update(data, data.length); | ||||
|     const binary = hasher.finish(false); | ||||
|     return Array.from(binary, c => ("0" + c.charCodeAt(0).toString(16)).slice(-2)).join(""); | ||||
|   } catch (e) { | ||||
|     aiLog(`sha256HexSync failed`, { level: 'error' }, e); | ||||
|     return ""; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| async function sha256Hex(str) { | ||||
|   if (typeof crypto?.subtle?.digest === "function") { | ||||
|     const buf = await crypto.subtle.digest("SHA-256", new TextEncoder().encode(str)); | ||||
|     return Array.from(new Uint8Array(buf), b => b.toString(16).padStart(2, "0")).join(""); | ||||
|   } | ||||
|   return sha256HexSync(str); | ||||
| } | ||||
| 
 | ||||
| function buildCacheKeySync(id, criterion) { | ||||
|   return sha256HexSync(`${id}|${criterion}`); | ||||
| } | ||||
| 
 | ||||
| async function buildCacheKey(id, criterion) { | ||||
|   if (Services) { | ||||
|     return buildCacheKeySync(id, criterion); | ||||
|   } | ||||
|   return sha256Hex(`${id}|${criterion}`); | ||||
| } | ||||
| 
 | ||||
| async function loadCache() { | ||||
|   if (gCacheLoaded) { | ||||
|  | @ -58,16 +89,29 @@ async function loadCache() { | |||
|   } | ||||
|   aiLog(`[AiClassifier] Loading cache`, {debug: true}); | ||||
|   try { | ||||
|     const { aiCache } = await storage.local.get("aiCache"); | ||||
|     const { aiCache, aiReasonCache } = await storage.local.get(["aiCache", "aiReasonCache"]); | ||||
|     if (aiCache) { | ||||
|       for (let [k, v] of Object.entries(aiCache)) { | ||||
|         aiLog(`[AiClassifier] ⮡ Loaded entry '${k}' → ${v}`, {debug: true}); | ||||
|         gCache.set(k, v); | ||||
|         if (v && typeof v === "object") { | ||||
|           gCache.set(k, { matched: v.matched ?? null, reason: v.reason || "" }); | ||||
|         } else { | ||||
|           gCache.set(k, { matched: v, reason: "" }); | ||||
|         } | ||||
|       } | ||||
|       aiLog(`[AiClassifier] Loaded ${gCache.size} cache entries`, {debug: true}); | ||||
|     } else { | ||||
|       aiLog(`[AiClassifier] Cache is empty`, {debug: true}); | ||||
|     } | ||||
|     if (aiReasonCache) { | ||||
|       aiLog(`[AiClassifier] Migrating ${Object.keys(aiReasonCache).length} reason entries`, {debug: true}); | ||||
|       for (let [k, reason] of Object.entries(aiReasonCache)) { | ||||
|         let entry = gCache.get(k) || { matched: null, reason: "" }; | ||||
|         entry.reason = reason; | ||||
|         gCache.set(k, entry); | ||||
|       } | ||||
|       await storage.local.remove("aiReasonCache"); | ||||
|       await storage.local.set({ aiCache: Object.fromEntries(gCache) }); | ||||
|     } | ||||
|   } catch (e) { | ||||
|     aiLog(`Failed to load cache`, {level: 'error'}, e); | ||||
|   } | ||||
|  | @ -96,49 +140,6 @@ async function saveCache(updatedKey, updatedValue) { | |||
|   } | ||||
| } | ||||
| 
 | ||||
| async function loadReasonCache() { | ||||
|   if (gReasonCacheLoaded) { | ||||
|     return; | ||||
|   } | ||||
|   aiLog(`[AiClassifier] Loading reason cache`, {debug: true}); | ||||
|   try { | ||||
|     const { aiReasonCache } = await storage.local.get("aiReasonCache"); | ||||
|     if (aiReasonCache) { | ||||
|       for (let [k, v] of Object.entries(aiReasonCache)) { | ||||
|         aiLog(`[AiClassifier] ⮡ Loaded reason '${k}'`, {debug: true}); | ||||
|         gReasonCache.set(k, v); | ||||
|       } | ||||
|       aiLog(`[AiClassifier] Loaded ${gReasonCache.size} reason entries`, {debug: true}); | ||||
|     } else { | ||||
|       aiLog(`[AiClassifier] Reason cache is empty`, {debug: true}); | ||||
|     } | ||||
|   } catch (e) { | ||||
|     aiLog(`Failed to load reason cache`, {level: 'error'}, e); | ||||
|   } | ||||
|   gReasonCacheLoaded = true; | ||||
| } | ||||
| 
 | ||||
| function loadReasonCacheSync() { | ||||
|   if (!gReasonCacheLoaded) { | ||||
|     if (!Services?.tm?.spinEventLoopUntil) { | ||||
|       throw new Error("loadReasonCacheSync requires Services"); | ||||
|     } | ||||
|     let done = false; | ||||
|     loadReasonCache().finally(() => { done = true; }); | ||||
|     Services.tm.spinEventLoopUntil(() => done); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| async function saveReasonCache(updatedKey, updatedValue) { | ||||
|   if (typeof updatedKey !== "undefined") { | ||||
|     aiLog(`[AiClassifier] ⮡ Persisting reason '${updatedKey}'`, {debug: true}); | ||||
|   } | ||||
|   try { | ||||
|     await storage.local.set({ aiReasonCache: Object.fromEntries(gReasonCache) }); | ||||
|   } catch (e) { | ||||
|     aiLog(`Failed to save reason cache`, {level: 'error'}, e); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| async function loadTemplate(name) { | ||||
|   try { | ||||
|  | @ -220,26 +221,27 @@ function getCachedResult(cacheKey) { | |||
|     if (Services?.tm?.spinEventLoopUntil) { | ||||
|       loadCacheSync(); | ||||
|     } else { | ||||
|       // In non-privileged contexts we can't block, so bail out early.
 | ||||
|       return null; | ||||
|     } | ||||
|   } | ||||
|   if (cacheKey && gCache.has(cacheKey)) { | ||||
|     aiLog(`[AiClassifier] Cache hit for key: ${cacheKey}`, {debug: true}); | ||||
|     return gCache.get(cacheKey); | ||||
|     const entry = gCache.get(cacheKey); | ||||
|     return entry?.matched ?? null; | ||||
|   } | ||||
|   return null; | ||||
| } | ||||
| 
 | ||||
| function getReason(cacheKey) { | ||||
|   if (!gReasonCacheLoaded) { | ||||
|   if (!gCacheLoaded) { | ||||
|     if (Services?.tm?.spinEventLoopUntil) { | ||||
|       loadReasonCacheSync(); | ||||
|       loadCacheSync(); | ||||
|     } else { | ||||
|       return null; | ||||
|     } | ||||
|   } | ||||
|   return cacheKey ? gReasonCache.get(cacheKey) || null : null; | ||||
|   const entry = gCache.get(cacheKey); | ||||
|   return cacheKey && entry ? entry.reason || null : null; | ||||
| } | ||||
| 
 | ||||
| function buildPayload(text, criterion) { | ||||
|  | @ -260,20 +262,20 @@ function parseMatch(result) { | |||
|   return { matched, reason: thinkText }; | ||||
| } | ||||
| 
 | ||||
| function cacheResult(cacheKey, matched) { | ||||
|   if (cacheKey) { | ||||
|     aiLog(`[AiClassifier] Caching entry '${cacheKey}' → ${matched}`, {debug: true}); | ||||
|     gCache.set(cacheKey, matched); | ||||
|     saveCache(cacheKey, matched); | ||||
| function cacheEntry(cacheKey, matched, reason) { | ||||
|   if (!cacheKey) { | ||||
|     return; | ||||
|   } | ||||
|   aiLog(`[AiClassifier] Caching entry '${cacheKey}'`, {debug: true}); | ||||
|   const entry = gCache.get(cacheKey) || { matched: null, reason: "" }; | ||||
|   if (typeof matched === "boolean") { | ||||
|     entry.matched = matched; | ||||
|   } | ||||
| 
 | ||||
| function cacheReason(cacheKey, reason) { | ||||
|   if (cacheKey) { | ||||
|     aiLog(`[AiClassifier] Caching reason '${cacheKey}'`, {debug: true}); | ||||
|     gReasonCache.set(cacheKey, reason); | ||||
|     saveReasonCache(cacheKey, reason); | ||||
|   if (typeof reason === "string") { | ||||
|     entry.reason = reason; | ||||
|   } | ||||
|   gCache.set(cacheKey, entry); | ||||
|   saveCache(cacheKey, entry); | ||||
| } | ||||
| 
 | ||||
| async function removeCacheEntries(keys = []) { | ||||
|  | @ -289,14 +291,9 @@ async function removeCacheEntries(keys = []) { | |||
|       removed = true; | ||||
|       aiLog(`[AiClassifier] Removed cache entry '${key}'`, {debug: true}); | ||||
|     } | ||||
|     if (gReasonCache.delete(key)) { | ||||
|       removed = true; | ||||
|       aiLog(`[AiClassifier] Removed reason entry '${key}'`, {debug: true}); | ||||
|     } | ||||
|   } | ||||
|   if (removed) { | ||||
|     await saveCache(); | ||||
|     await saveReasonCache(); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
|  | @ -304,9 +301,6 @@ function classifyTextSync(text, criterion, cacheKey = null) { | |||
|   if (!Services?.tm?.spinEventLoopUntil) { | ||||
|     throw new Error("classifyTextSync requires Services"); | ||||
|   } | ||||
|   if (!gReasonCacheLoaded) { | ||||
|     loadReasonCacheSync(); | ||||
|   } | ||||
|   const cached = getCachedResult(cacheKey); | ||||
|   if (cached !== null) { | ||||
|     return cached; | ||||
|  | @ -329,8 +323,7 @@ function classifyTextSync(text, criterion, cacheKey = null) { | |||
|         const json = await response.json(); | ||||
|         aiLog(`[AiClassifier] Received response:`, {debug: true}, json); | ||||
|         result = parseMatch(json); | ||||
|         cacheResult(cacheKey, result.matched); | ||||
|         cacheReason(cacheKey, result.reason); | ||||
|         cacheEntry(cacheKey, result.matched, result.reason); | ||||
|         result = result.matched; | ||||
|       } else { | ||||
|         aiLog(`HTTP status ${response.status}`, {level: 'warn'}); | ||||
|  | @ -351,9 +344,6 @@ async function classifyText(text, criterion, cacheKey = null) { | |||
|   if (!gCacheLoaded) { | ||||
|     await loadCache(); | ||||
|   } | ||||
|   if (!gReasonCacheLoaded) { | ||||
|     await loadReasonCache(); | ||||
|   } | ||||
|   const cached = getCachedResult(cacheKey); | ||||
|   if (cached !== null) { | ||||
|     return cached; | ||||
|  | @ -378,8 +368,7 @@ async function classifyText(text, criterion, cacheKey = null) { | |||
|     const result = await response.json(); | ||||
|     aiLog(`[AiClassifier] Received response:`, {debug: true}, result); | ||||
|     const parsed = parseMatch(result); | ||||
|     cacheResult(cacheKey, parsed.matched); | ||||
|     cacheReason(cacheKey, parsed.reason); | ||||
|     cacheEntry(cacheKey, parsed.matched, parsed.reason); | ||||
|     return parsed.matched; | ||||
|   } catch (e) { | ||||
|     aiLog(`HTTP request failed`, {level: 'error'}, e); | ||||
|  | @ -387,4 +376,8 @@ async function classifyText(text, criterion, cacheKey = null) { | |||
|   } | ||||
| } | ||||
| 
 | ||||
| export { classifyText, classifyTextSync, setConfig, removeCacheEntries, getReason, getCachedResult }; | ||||
| async function init() { | ||||
|   await loadCache(); | ||||
| } | ||||
| 
 | ||||
| export { classifyText, classifyTextSync, setConfig, removeCacheEntries, getReason, getCachedResult, buildCacheKey, buildCacheKeySync, init }; | ||||
|  |  | |||
|  | @ -5,15 +5,6 @@ var { aiLog } = ChromeUtils.import("resource://aifilter/modules/logger.jsm"); | |||
| var AiClassifier    = ChromeUtils.importESModule("resource://aifilter/modules/AiClassifier.js"); | ||||
| var { getPlainText }    = ChromeUtils.import("resource://aifilter/modules/messageUtils.jsm"); | ||||
| 
 | ||||
| function sha256Hex(str) { | ||||
|   const hasher = Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash); | ||||
|   hasher.init(Ci.nsICryptoHash.SHA256); | ||||
|   const data = new TextEncoder().encode(str); | ||||
|   hasher.update(data, data.length); | ||||
|   const binary = hasher.finish(false); | ||||
|   return Array.from(binary, c => ("0" + c.charCodeAt(0).toString(16)).slice(-2)).join(""); | ||||
| } | ||||
| 
 | ||||
| var EXPORTED_SYMBOLS = ["AIFilter", "ClassificationTerm"]; | ||||
| 
 | ||||
| class CustomerTermBase { | ||||
|  | @ -70,7 +61,7 @@ class ClassificationTerm extends CustomerTermBase { | |||
|                    op === Ci.nsMsgSearchOp.DoesntMatch ? "doesn't match" : `unknown (${op})`; | ||||
|     aiLog(`[ExpressionSearchFilter] Matching message ${msgHdr.messageId} using op "${opName}" and value "${value}"`, {debug: true}); | ||||
| 
 | ||||
|     let key = [msgHdr.messageId, op, value].map(sha256Hex).join("|"); | ||||
|     let key = AiClassifier.buildCacheKeySync(msgHdr.messageId, value); | ||||
|     let body = getPlainText(msgHdr); | ||||
| 
 | ||||
|     let matched = AiClassifier.classifyTextSync(body, value, key); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue