diff --git a/experiment/api.js b/experiment/api.js index c95d5f4..c225779 100644 --- a/experiment/api.js +++ b/experiment/api.js @@ -1,6 +1,7 @@ var { ExtensionCommon } = ChromeUtils.importESModule("resource://gre/modules/ExtensionCommon.sys.mjs"); var { Services } = globalThis || ChromeUtils.importESModule("resource://gre/modules/Services.sys.mjs"); var { MailServices } = ChromeUtils.importESModule("resource:///modules/MailServices.sys.mjs"); +var { AiClassifier } = ChromeUtils.import("resource://aifilter/modules/AiClassifier.jsm"); var aiLog = (...args) => console.log("[ai-filter][api]", ...args); var setDebug = () => {}; @@ -21,7 +22,6 @@ function registerResourceUrl(extension, namespace) { resProto.setSubstitutionWithFlags(namespace, uri, resProto.ALLOW_CONTENT_ACCESS); } -var gTerm; var AIFilterMod; var aiFilter = class extends ExtensionCommon.ExtensionAPI { @@ -61,33 +61,19 @@ var aiFilter = class extends ExtensionCommon.ExtensionAPI { aiFilter: { initConfig: async (config) => { try { - if (AIFilterMod?.AIFilter?.setConfig) { - AIFilterMod.AIFilter.setConfig(config); - if (typeof config.debugLogging === "boolean") { - setDebug(config.debugLogging); - } - aiLog("[api] configuration applied", {debug: true}, config); + AiClassifier.setConfig(config); + if (typeof config.debugLogging === "boolean") { + setDebug(config.debugLogging); } + aiLog("[api] configuration applied", {debug: true}, config); } catch (err) { aiLog("[api] failed to apply config", {level: 'error'}, err); } }, - classify: (msg) => { - aiLog("[api] classify() called with msg", {debug: true}, msg); + classify: (text, criterion) => { + aiLog("[api] classify() called", {debug: true}, text, criterion); try { - if (!gTerm) { - aiLog("[api] instantiating new ClassificationTerm", {debug: true}); - let mod = AIFilterMod || ChromeUtils.import("resource://aifilter/modules/ExpressionSearchFilter.jsm"); - gTerm = new mod.ClassificationTerm(); - } - aiLog("[api] calling gTerm.match()", {debug: true}); - let matchResult = gTerm.match( - msg.msgHdr, - msg.value, - Ci.nsMsgSearchOp.Contains - ); - aiLog("[api] gTerm.match() returned", {debug: true}, matchResult); - return matchResult; + return AiClassifier.classifyTextSync(text, criterion); } catch (err) { aiLog("[api] error in classify()", {level: 'error'}, err); diff --git a/modules/AiClassifier.jsm b/modules/AiClassifier.jsm new file mode 100644 index 0000000..eb399c4 --- /dev/null +++ b/modules/AiClassifier.jsm @@ -0,0 +1,211 @@ +"use strict"; +var { Services } = globalThis || ChromeUtils.importESModule("resource://gre/modules/Services.sys.mjs"); +var { NetUtil } = ChromeUtils.importESModule("resource://gre/modules/NetUtil.sys.mjs"); +var { FileUtils } = ChromeUtils.importESModule("resource://gre/modules/FileUtils.sys.mjs"); +var { aiLog, setDebug } = ChromeUtils.import("resource://aifilter/modules/logger.jsm"); + +var EXPORTED_SYMBOLS = ["AiClassifier"]; + +const SYSTEM_PREFIX = `You are an email-classification assistant. +Read the email below and the classification criterion provided by the user. +`; + +const DEFAULT_CUSTOM_SYSTEM_PROMPT = "Determine whether the email satisfies the user's criterion."; + +const SYSTEM_SUFFIX = ` +Return ONLY a JSON object on a single line of the form: +{"match": true} - if the email satisfies the criterion +{"match": false} - otherwise + +Do not add any other keys, text, or formatting.`; + +let gEndpoint = "http://127.0.0.1:5000/v1/classify"; +let gTemplateName = "openai"; +let gCustomTemplate = ""; +let gCustomSystemPrompt = DEFAULT_CUSTOM_SYSTEM_PROMPT; +let gTemplateText = ""; + +let gAiParams = { + max_tokens: 4096, + temperature: 0.6, + top_p: 0.95, + seed: -1, + repetition_penalty: 1.0, + top_k: 20, + min_p: 0, + presence_penalty: 0, + frequency_penalty: 0, + typical_p: 1, + tfs: 1, +}; + +let gCache = new Map(); +let gCacheLoaded = false; +let gCacheFile; + +function ensureCacheFile() { + if (!gCacheFile) { + gCacheFile = Services.dirsvc.get("ProfD", Ci.nsIFile); + gCacheFile.append("aifilter_cache.json"); + } +} + +function loadCache() { + if (gCacheLoaded) { + return; + } + ensureCacheFile(); + aiLog(`[AiClassifier] Loading cache from ${gCacheFile.path}`, {debug: true}); + try { + if (gCacheFile.exists()) { + let stream = Cc["@mozilla.org/network/file-input-stream;1"].createInstance(Ci.nsIFileInputStream); + stream.init(gCacheFile, -1, 0, 0); + let data = NetUtil.readInputStreamToString(stream, stream.available()); + stream.close(); + aiLog(`[AiClassifier] Cache file contents: ${data}`, {debug: true}); + let obj = JSON.parse(data); + for (let [k, v] of Object.entries(obj)) { + aiLog(`[AiClassifier] ⮡ Loaded entry '${k}' → ${v}`, {debug: true}); + gCache.set(k, v); + } + aiLog(`[AiClassifier] Loaded ${gCache.size} cache entries`, {debug: true}); + } else { + aiLog(`[AiClassifier] Cache file does not exist`, {debug: true}); + } + } catch (e) { + aiLog(`Failed to load cache`, {level: 'error'}, e); + } + gCacheLoaded = true; +} + +function saveCache(updatedKey, updatedValue) { + ensureCacheFile(); + aiLog(`[AiClassifier] Saving cache to ${gCacheFile.path}`, {debug: true}); + if (typeof updatedKey !== "undefined") { + aiLog(`[AiClassifier] ⮡ Persisting entry '${updatedKey}' → ${updatedValue}`, {debug: true}); + } + try { + let obj = Object.fromEntries(gCache); + let data = JSON.stringify(obj); + let stream = Cc["@mozilla.org/network/file-output-stream;1"].createInstance(Ci.nsIFileOutputStream); + stream.init(gCacheFile, + FileUtils.MODE_WRONLY | FileUtils.MODE_CREATE | FileUtils.MODE_TRUNCATE, + FileUtils.PERMS_FILE, + 0); + stream.write(data, data.length); + stream.close(); + } catch (e) { + aiLog(`Failed to save cache`, {level: 'error'}, e); + } +} + +function loadTemplate(name) { + try { + let url = `resource://aifilter/prompt_templates/${name}.txt`; + let xhr = new XMLHttpRequest(); + xhr.open("GET", url, false); + xhr.overrideMimeType("text/plain"); + xhr.send(); + if (xhr.status === 0 || xhr.status === 200) { + return xhr.responseText; + } + } catch (e) { + aiLog(`Failed to load template '${name}':`, {level: 'error'}, e); + } + return ""; +} + +function setConfig(config = {}) { + if (config.endpoint) { + gEndpoint = config.endpoint; + } + if (config.templateName) { + gTemplateName = config.templateName; + } + if (typeof config.customTemplate === "string") { + gCustomTemplate = config.customTemplate; + } + if (typeof config.customSystemPrompt === "string") { + gCustomSystemPrompt = config.customSystemPrompt; + } + if (config.aiParams && typeof config.aiParams === "object") { + for (let [k, v] of Object.entries(config.aiParams)) { + if (k in gAiParams && typeof v !== "undefined") { + gAiParams[k] = v; + } + } + } + if (typeof config.debugLogging === "boolean") { + setDebug(config.debugLogging); + } + gTemplateText = gTemplateName === "custom" ? gCustomTemplate : loadTemplate(gTemplateName); + aiLog(`[AiClassifier] Endpoint set to ${gEndpoint}`, {debug: true}); + aiLog(`[AiClassifier] Template set to ${gTemplateName}`, {debug: true}); +} + +function buildSystemPrompt() { + return SYSTEM_PREFIX + (gCustomSystemPrompt || DEFAULT_CUSTOM_SYSTEM_PROMPT) + SYSTEM_SUFFIX; +} + +function buildPrompt(body, criterion) { + aiLog(`[AiClassifier] Building prompt with criterion: "${criterion}"`, {debug: true}); + const data = { + system: buildSystemPrompt(), + email: body, + query: criterion, + }; + let template = gTemplateText || loadTemplate(gTemplateName); + return template.replace(/{{\s*(\w+)\s*}}/g, (m, key) => data[key] || ""); +} + +function classifyTextSync(text, criterion, cacheKey = null) { + loadCache(); + if (cacheKey && gCache.has(cacheKey)) { + aiLog(`[AiClassifier] Cache hit for key: ${cacheKey}`, {debug: true}); + return gCache.get(cacheKey); + } + + let payloadObj = Object.assign({ + prompt: buildPrompt(text, criterion) + }, gAiParams); + let payload = JSON.stringify(payloadObj); + + aiLog(`[AiClassifier] Sending classification request to ${gEndpoint}`, {debug: true}); + + let matched = false; + try { + let xhr = new XMLHttpRequest(); + xhr.open("POST", gEndpoint, false); + xhr.setRequestHeader("Content-Type", "application/json"); + xhr.send(payload); + + if (xhr.status < 200 || xhr.status >= 300) { + aiLog(`HTTP status ${xhr.status}`, {level: 'warn'}); + } else { + const result = JSON.parse(xhr.responseText); + aiLog(`[AiClassifier] Received response:`, {debug: true}, result); + const rawText = result.choices?.[0]?.text || ""; + const thinkText = rawText.match(/[\s\S]*?<\/think>/gi)?.join('') || ''; + aiLog('[AiClassifier] ⮡ Reasoning:', {debug: true}, thinkText); + const cleanedText = rawText.replace(/[\s\S]*?<\/think>/gi, "").trim(); + aiLog('[AiClassifier] ⮡ Cleaned Response Text:', {debug: true}, cleanedText); + const obj = JSON.parse(cleanedText); + matched = obj.matched === true || obj.match === true; + if (cacheKey) { + aiLog(`[AiClassifier] Caching entry '${cacheKey}' → ${matched}`, {debug: true}); + gCache.set(cacheKey, matched); + saveCache(cacheKey, matched); + } + } + } catch (e) { + aiLog(`HTTP request failed`, {level: 'error'}, e); + } + + return matched; +} + +async function classifyText(text, criterion, cacheKey = null) { + return classifyTextSync(text, criterion, cacheKey); +} + +var AiClassifier = { classifyText, classifyTextSync, setConfig }; diff --git a/modules/ExpressionSearchFilter.jsm b/modules/ExpressionSearchFilter.jsm index 8f981ce..2403b01 100644 --- a/modules/ExpressionSearchFilter.jsm +++ b/modules/ExpressionSearchFilter.jsm @@ -4,8 +4,8 @@ var { MailServices } = ChromeUtils.importESModule("resource:///modules/MailSe var { Services } = globalThis || ChromeUtils.importESModule("resource://gre/modules/Services.sys.mjs"); var { NetUtil } = ChromeUtils.importESModule("resource://gre/modules/NetUtil.sys.mjs"); var { MimeParser } = ChromeUtils.importESModule("resource:///modules/mimeParser.sys.mjs"); -var { FileUtils } = ChromeUtils.importESModule("resource://gre/modules/FileUtils.sys.mjs"); -var { aiLog, setDebug } = ChromeUtils.import("resource://aifilter/modules/logger.jsm"); +var { aiLog } = ChromeUtils.import("resource://aifilter/modules/logger.jsm"); +var { AiClassifier } = ChromeUtils.import("resource://aifilter/modules/AiClassifier.jsm"); function sha256Hex(str) { const hasher = Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash); @@ -18,19 +18,6 @@ function sha256Hex(str) { var EXPORTED_SYMBOLS = ["AIFilter", "ClassificationTerm"]; -const SYSTEM_PREFIX = `You are an email-classification assistant. -Read the email below and the classification criterion provided by the user. -`; - -const DEFAULT_CUSTOM_SYSTEM_PROMPT = "Determine whether the email satisfies the user's criterion."; - -const SYSTEM_SUFFIX = ` -Return ONLY a JSON object on a single line of the form: -{"match": true} - if the email satisfies the criterion -{"match": false} - otherwise - -Do not add any other keys, text, or formatting.`; - class CustomerTermBase { constructor(nameId, operators) { // Lookup our extension instance using the ID from manifest.json @@ -39,56 +26,10 @@ class CustomerTermBase { this.id = "aifilter#" + nameId; this.name = this.extension.localeData.localizeMessage(nameId); this.operators = operators; - this.cache = new Map(); - this._cacheFile = Services.dirsvc.get("ProfD", Ci.nsIFile); - this._cacheFile.append("aifilter_cache.json"); - this._loadCache(); aiLog(`[ExpressionSearchFilter] Initialized term base "${this.id}"`, {debug: true}); } - _loadCache() { - aiLog(`[ExpressionSearchFilter] Loading cache from ${this._cacheFile.path}` , {debug: true}); - try { - if (this._cacheFile.exists()) { - let stream = Cc["@mozilla.org/network/file-input-stream;1"].createInstance(Ci.nsIFileInputStream); - stream.init(this._cacheFile, -1, 0, 0); - let data = NetUtil.readInputStreamToString(stream, stream.available()); - stream.close(); - aiLog(`[ExpressionSearchFilter] Cache file contents: ${data}`, {debug: true}); - let obj = JSON.parse(data); - for (let [k, v] of Object.entries(obj)) { - aiLog(`[ExpressionSearchFilter] ⮡ Loaded entry '${k}' → ${v}`, {debug: true}); - this.cache.set(k, v); - } - aiLog(`[ExpressionSearchFilter] Loaded ${this.cache.size} cache entries`, {debug: true}); - } else { - aiLog(`[ExpressionSearchFilter] Cache file does not exist`, {debug: true}); - } - } catch (e) { - aiLog(`Failed to load cache`, {level: 'error'}, e); - } - } - - _saveCache(updatedKey, updatedValue) { - aiLog(`[ExpressionSearchFilter] Saving cache to ${this._cacheFile.path}`, {debug: true}); - if (typeof updatedKey !== "undefined") { - aiLog(`[ExpressionSearchFilter] ⮡ Persisting entry '${updatedKey}' → ${updatedValue}`, {debug: true}); - } - try { - let obj = Object.fromEntries(this.cache); - let data = JSON.stringify(obj); - let stream = Cc["@mozilla.org/network/file-output-stream;1"].createInstance(Ci.nsIFileOutputStream); - stream.init(this._cacheFile, - FileUtils.MODE_WRONLY | FileUtils.MODE_CREATE | FileUtils.MODE_TRUNCATE, - FileUtils.PERMS_FILE, - 0); - stream.write(data, data.length); - stream.close(); - } catch (e) { - aiLog(`Failed to save cache`, {level: 'error'}, e); - } - } getEnabled() { aiLog(`[ExpressionSearchFilter] getEnabled() called on "${this.id}"`, {debug: true}); @@ -199,84 +140,6 @@ function getPlainText(msgHdr) { } } -let gEndpoint = "http://127.0.0.1:5000/v1/classify"; -let gTemplateName = "openai"; -let gCustomTemplate = ""; -let gCustomSystemPrompt = DEFAULT_CUSTOM_SYSTEM_PROMPT; -let gTemplateText = ""; - -let gAiParams = { - max_tokens: 4096, - temperature: 0.6, - top_p: 0.95, - seed: -1, - repetition_penalty: 1.0, - top_k: 20, - min_p: 0, - presence_penalty: 0, - frequency_penalty: 0, - typical_p: 1, - tfs: 1, -}; - -function loadTemplate(name) { - try { - let url = `resource://aifilter/prompt_templates/${name}.txt`; - let xhr = new XMLHttpRequest(); - xhr.open("GET", url, false); - xhr.overrideMimeType("text/plain"); - xhr.send(); - if (xhr.status === 0 || xhr.status === 200) { - return xhr.responseText; - } - } catch (e) { - aiLog(`Failed to load template '${name}':`, {level: 'error'}, e); - } - return ""; -} - -function setConfig(config = {}) { - if (config.endpoint) { - gEndpoint = config.endpoint; - } - if (config.templateName) { - gTemplateName = config.templateName; - } - if (typeof config.customTemplate === "string") { - gCustomTemplate = config.customTemplate; - } - if (typeof config.customSystemPrompt === "string") { - gCustomSystemPrompt = config.customSystemPrompt; - } - if (config.aiParams && typeof config.aiParams === "object") { - for (let [k, v] of Object.entries(config.aiParams)) { - if (k in gAiParams && typeof v !== "undefined") { - gAiParams[k] = v; - } - } - } - if (typeof config.debugLogging === "boolean") { - setDebug(config.debugLogging); - } - gTemplateText = gTemplateName === "custom" ? gCustomTemplate : loadTemplate(gTemplateName); - aiLog(`[ExpressionSearchFilter] Endpoint set to ${gEndpoint}`, {debug: true}); - aiLog(`[ExpressionSearchFilter] Template set to ${gTemplateName}`, {debug: true}); -} - -function buildSystemPrompt() { - return SYSTEM_PREFIX + (gCustomSystemPrompt || DEFAULT_CUSTOM_SYSTEM_PROMPT) + SYSTEM_SUFFIX; -} - -function buildPrompt(body, criterion) { - aiLog(`[ExpressionSearchFilter] Building prompt with criterion: "${criterion}"`, {debug: true}); - const data = { - system: buildSystemPrompt(), - email: body, - query: criterion, - }; - let template = gTemplateText || loadTemplate(gTemplateName); - return template.replace(/{{\s*(\w+)\s*}}/g, (m, key) => data[key] || ""); -} class ClassificationTerm extends CustomerTermBase { constructor() { @@ -292,47 +155,9 @@ class ClassificationTerm extends CustomerTermBase { aiLog(`[ExpressionSearchFilter] Matching message ${msgHdr.messageId} using op "${opName}" and value "${value}"`, {debug: true}); let key = [msgHdr.messageId, op, value].map(sha256Hex).join("|"); - if (this.cache.has(key)) { - aiLog(`[ExpressionSearchFilter] Cache hit for key: ${key}`, {debug: true}); - return this.cache.get(key); - } - let body = getPlainText(msgHdr); - let payloadObj = Object.assign({ - prompt: buildPrompt(body, value) - }, gAiParams); - let payload = JSON.stringify(payloadObj); - - aiLog(`[ExpressionSearchFilter] Sending classification request to ${gEndpoint}`, {debug: true}); - - let matched = false; - try { - let xhr = new XMLHttpRequest(); - xhr.open("POST", gEndpoint, false); // synchronous request - xhr.setRequestHeader("Content-Type", "application/json"); - xhr.send(payload); - - if (xhr.status < 200 || xhr.status >= 300) { - aiLog(`HTTP status ${xhr.status}`, {level: 'warn'}); - } else { - const result = JSON.parse(xhr.responseText); - aiLog(`[ExpressionSearchFilter] Received response:`, {debug: true}, result); - const rawText = result.choices?.[0]?.text || ""; - const thinkText = rawText.match(/[\s\S]*?<\/think>/gi)?.join('') || ''; - aiLog('[ExpressionSearchFilter] ⮡ Reasoning:', {debug: true}, thinkText); - const cleanedText = rawText.replace(/[\s\S]*?<\/think>/gi, "").trim(); - aiLog('[ExpressionSearchFilter] ⮡ Cleaned Response Text:', {debug: true}, cleanedText); - const obj = JSON.parse(cleanedText); - matched = obj.matched === true || obj.match === true; - - aiLog(`[ExpressionSearchFilter] Caching entry '${key}' → ${matched}`, {debug: true}); - this.cache.set(key, matched); - this._saveCache(key, matched); - } - } catch (e) { - aiLog(`HTTP request failed`, {level: 'error'}, e); - } + let matched = AiClassifier.classifyTextSync(body, value, key); if (op === Ci.nsMsgSearchOp.DoesntMatch) { matched = !matched; @@ -355,4 +180,4 @@ class ClassificationTerm extends CustomerTermBase { } })(); -var AIFilter = { setConfig }; +var AIFilter = { setConfig: AiClassifier.setConfig };