diff --git a/AGENTS.md b/AGENTS.md index 9b94461..401f962 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -28,7 +28,7 @@ This file provides guidelines for codex agents contributing to the Sortana proje ## Testing -There are currently no automated tests for this project. If you add tests in the future, specify the commands to run them here. For now, verification must happen manually in Thunderbird. +There are currently no automated tests for this project. If you add tests in the future, specify the commands to run them here. For now, verification must happen manually in Thunderbird. Do **not** run the `ps1` build script or the SVG processing script. ## Documentation diff --git a/README.md b/README.md index 7637f85..57a0285 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ message meets a specified criterion. - **Advanced parameters** – tune generation settings like temperature, top‑p and more from the options page. - **Markdown conversion** – optionally convert HTML bodies to Markdown before sending them to the AI service. - **Debug logging** – optional colorized logs help troubleshoot interactions with the AI service. +- **Debug tab** – view the last request payload and a diff between the unaltered message text and the final prompt. - **Light/Dark themes** – automatically match Thunderbird's appearance with optional manual override. - **Automatic rules** – create rules that tag, move, copy, forward, reply, delete, archive, mark read/unread or flag/unflag messages based on AI classification. Rules can optionally apply only to unread messages and can ignore messages outside a chosen age range. - **Rule ordering** – drag rules to prioritize them and optionally stop processing after a match. @@ -78,8 +79,9 @@ Sortana is implemented entirely with standard WebExtension scripts—no custom e reorder them, check *Only apply to unread messages* to skip read mail, set optional minimum or maximum message age limits, select the accounts or folders a rule should apply to. Use the - slashed-circle/check button to disable or re-enable a rule, and - check *Stop after match* to halt further processing. Forward and reply actions + slashed-circle/check button to disable or re-enable a rule. The small + circle buttons for optional conditions show a filled dot when active, and + check *Stop after match* to halt further processing. Forward and reply actions open a compose window using the account that received the message. 3. Save your settings. New mail will be evaluated automatically using the configured rules. @@ -125,6 +127,7 @@ Sortana requests the following Thunderbird permissions: - `accountsRead` – list accounts and folders for move or copy actions. - `menus` – add context menu commands. - `tabs` – open new tabs and query the active tab. +- `notifications` – display error notifications. - `compose` – create reply and forward compose windows for matching rules. ## Thunderbird Add-on Store Disclosures @@ -138,6 +141,8 @@ uses the following third party libraries: - MIT License - [turndown v7.2.0](https://github.com/mixmark-io/turndown/tree/v7.2.0) - MIT License +- [diff](https://github.com/google/diff-match-patch/blob/62f2e689f498f9c92dbc588c58750addec9b1654/javascript/diff_match_patch_uncompressed.js) + - Apache-2.0 license ## License diff --git a/_locales/en-US/messages.json b/_locales/en-US/messages.json index 983bb86..65181c8 100644 --- a/_locales/en-US/messages.json +++ b/_locales/en-US/messages.json @@ -18,7 +18,8 @@ "options.htmlToMarkdown": { "message": "Convert HTML body to Markdown" }, "options.stripUrlParams": { "message": "Remove URL tracking parameters" }, "options.altTextImages": { "message": "Replace images with alt text" }, - "options.collapseWhitespace": { "message": "Collapse long whitespace" } + "options.collapseWhitespace": { "message": "Collapse long whitespace" }, + "options.tokenReduction": { "message": "Aggressive token reduction" } ,"action.read": { "message": "read" } ,"action.flag": { "message": "flag" } ,"action.copy": { "message": "copy" } diff --git a/ai-filter.sln b/ai-filter.sln index 57705eb..ea71f1b 100644 --- a/ai-filter.sln +++ b/ai-filter.sln @@ -108,6 +108,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "img", "img", "{F266602F-175 EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "js", "js", "{21D2A42C-3F85-465C-9141-C106AFD92B68}" ProjectSection(SolutionItems) = preProject + resources\js\diff_match_patch_uncompressed.js = resources\js\diff_match_patch_uncompressed.js resources\js\turndown.js = resources\js\turndown.js EndProjectSection EndProject diff --git a/background.js b/background.js index 06683e2..fc585ff 100644 --- a/background.js +++ b/background.js @@ -26,11 +26,14 @@ let htmlToMarkdown = false; let stripUrlParams = false; let altTextImages = false; let collapseWhitespace = false; +let tokenReduction = false; +let maxTokens = 4096; let TurndownService = null; let userTheme = 'auto'; let currentTheme = 'light'; let detectSystemTheme; let errorPending = false; +let showDebugTab = false; const ERROR_NOTIFICATION_ID = 'sortana-error'; function normalizeRules(rules) { @@ -125,12 +128,16 @@ function byteSize(str) { } function replaceInlineBase64(text) { - return text.replace(/[A-Za-z0-9+/]{100,}={0,2}/g, - m => `[base64: ${byteSize(m)} bytes]`); + return text.replace(/(?:data:[^;]+;base64,)?[A-Za-z0-9+/=\r\n]{100,}/g, + m => tokenReduction ? '__BASE64__' : `[base64: ${byteSize(m)} bytes]`); } function sanitizeString(text) { let t = String(text); + if (tokenReduction) { + t = t.replace(//gs, '') + .replace(/url\([^\)]*\)/gi, 'url(__IMG__)'); + } if (stripUrlParams) { t = t.replace(/https?:\/\/[^\s)]+/g, m => { const idx = m.indexOf('?'); @@ -138,7 +145,7 @@ function sanitizeString(text) { }); } if (collapseWhitespace) { - t = t.replace(/[ \t\u00A0]{2,}/g, ' ').replace(/\n{3,}/g, '\n\n'); + t = t.replace(/[\u200B-\u200D\u2060\s]{2,}/g, ' ').replace(/\n{3,}/g, '\n\n'); } return t; } @@ -157,12 +164,26 @@ function collectText(part, bodyParts, attachments) { attachments.push(`${name} (${ct}, ${part.size || byteSize(body)} bytes)`); } else if (ct.startsWith("text/html")) { const doc = new DOMParser().parseFromString(body, 'text/html'); - if (altTextImages) { - doc.querySelectorAll('img').forEach(img => { - const alt = img.getAttribute('alt') || ''; - img.replaceWith(doc.createTextNode(alt)); + if (tokenReduction) { + doc.querySelectorAll('script,style').forEach(el => el.remove()); + const walker = doc.createTreeWalker(doc, NodeFilter.SHOW_COMMENT); + let node; + while ((node = walker.nextNode())) { + node.parentNode.removeChild(node); + } + doc.querySelectorAll('*').forEach(el => { + for (const attr of Array.from(el.attributes)) { + if (!['href','src','alt'].includes(attr.name)) { + el.removeAttribute(attr.name); + } + } }); } + doc.querySelectorAll('img').forEach(img => { + const alt = img.getAttribute('alt') || ''; + const text = altTextImages ? alt : '__IMG__'; + img.replaceWith(doc.createTextNode(text)); + }); if (stripUrlParams) { doc.querySelectorAll('[href]').forEach(a => { const href = a.getAttribute('href'); @@ -189,17 +210,46 @@ function collectText(part, bodyParts, attachments) { } } -function buildEmailText(full) { +function collectRawText(part, bodyParts, attachments) { + if (part.parts && part.parts.length) { + for (const p of part.parts) collectRawText(p, bodyParts, attachments); + return; + } + const ct = (part.contentType || "text/plain").toLowerCase(); + const cd = (part.headers?.["content-disposition"]?.[0] || "").toLowerCase(); + const body = String(part.body || ""); + if (cd.includes("attachment") || !ct.startsWith("text/")) { + const nameMatch = /filename\s*=\s*"?([^";]+)/i.exec(cd) || /name\s*=\s*"?([^";]+)/i.exec(part.headers?.["content-type"]?.[0] || ""); + const name = nameMatch ? nameMatch[1] : ""; + attachments.push(`${name} (${ct}, ${part.size || byteSize(body)} bytes)`); + } else if (ct.startsWith("text/html")) { + const doc = new DOMParser().parseFromString(body, 'text/html'); + bodyParts.push(doc.body.textContent || ""); + } else { + bodyParts.push(body); + } +} + +function buildEmailText(full, applyTransforms = true) { const bodyParts = []; const attachments = []; - collectText(full, bodyParts, attachments); + const collect = applyTransforms ? collectText : collectRawText; + collect(full, bodyParts, attachments); const headers = Object.entries(full.headers || {}) .map(([k, v]) => `${k}: ${v.join(' ')}`) .join('\n'); const attachInfo = `Attachments: ${attachments.length}` + (attachments.length ? "\n" + attachments.map(a => ` - ${a}`).join('\n') : ""); - const combined = `${headers}\n${attachInfo}\n\n${bodyParts.join('\n')}`.trim(); - return sanitizeString(combined); + let combined = `${headers}\n${attachInfo}\n\n${bodyParts.join('\n')}`.trim(); + if (applyTransforms && tokenReduction) { + const seen = new Set(); + combined = combined.split('\n').filter(l => { + if (seen.has(l)) return false; + seen.add(l); + return true; + }).join('\n'); + } + return applyTransforms ? sanitizeString(combined) : combined; } function updateTimingStats(elapsed) { @@ -233,7 +283,17 @@ async function processMessage(id) { updateActionIcon(); try { const full = await messenger.messages.getFull(id); - const text = buildEmailText(full); + const originalText = buildEmailText(full, false); + let text = buildEmailText(full); + if (tokenReduction && maxTokens > 0) { + const limit = Math.floor(maxTokens * 0.9); + if (text.length > limit) { + text = text.slice(0, limit); + } + } + if (showDebugTab) { + await storage.local.set({ lastFullText: originalText, lastPromptText: text }); + } let hdr; let currentTags = []; let alreadyRead = false; @@ -391,7 +451,7 @@ async function clearCacheForMessages(idsInput) { } try { - const store = await storage.local.get(["endpoint", "templateName", "customTemplate", "customSystemPrompt", "aiParams", "debugLogging", "htmlToMarkdown", "stripUrlParams", "altTextImages", "collapseWhitespace", "aiRules", "theme", "errorPending"]); + const store = await storage.local.get(["endpoint", "templateName", "customTemplate", "customSystemPrompt", "aiParams", "debugLogging", "htmlToMarkdown", "stripUrlParams", "altTextImages", "collapseWhitespace", "tokenReduction", "aiRules", "theme", "errorPending", "showDebugTab"]); logger.setDebug(store.debugLogging); await AiClassifier.setConfig(store); userTheme = store.theme || 'auto'; @@ -401,7 +461,12 @@ async function clearCacheForMessages(idsInput) { stripUrlParams = store.stripUrlParams === true; altTextImages = store.altTextImages === true; collapseWhitespace = store.collapseWhitespace === true; + tokenReduction = store.tokenReduction === true; + if (store.aiParams && typeof store.aiParams.max_tokens !== 'undefined') { + maxTokens = parseInt(store.aiParams.max_tokens) || maxTokens; + } errorPending = store.errorPending === true; + showDebugTab = store.showDebugTab === true; const savedStats = await storage.local.get('classifyStats'); if (savedStats.classifyStats && typeof savedStats.classifyStats === 'object') { Object.assign(timingStats, savedStats.classifyStats); @@ -417,6 +482,25 @@ async function clearCacheForMessages(idsInput) { aiRules = normalizeRules(newRules); logger.aiLog("aiRules updated from storage change", { debug: true }, aiRules); } + if (changes.endpoint || changes.templateName || changes.customTemplate || changes.customSystemPrompt || changes.aiParams || changes.debugLogging) { + const config = {}; + if (changes.endpoint) config.endpoint = changes.endpoint.newValue; + if (changes.templateName) config.templateName = changes.templateName.newValue; + if (changes.customTemplate) config.customTemplate = changes.customTemplate.newValue; + if (changes.customSystemPrompt) config.customSystemPrompt = changes.customSystemPrompt.newValue; + if (changes.aiParams) { + config.aiParams = changes.aiParams.newValue; + if (changes.aiParams.newValue && typeof changes.aiParams.newValue.max_tokens !== 'undefined') { + maxTokens = parseInt(changes.aiParams.newValue.max_tokens) || maxTokens; + } + } + if (changes.debugLogging) { + config.debugLogging = changes.debugLogging.newValue === true; + logger.setDebug(config.debugLogging); + } + await AiClassifier.setConfig(config); + logger.aiLog("AiClassifier config updated from storage change", { debug: true }, config); + } if (changes.htmlToMarkdown) { htmlToMarkdown = changes.htmlToMarkdown.newValue === true; logger.aiLog("htmlToMarkdown updated from storage change", { debug: true }, htmlToMarkdown); @@ -433,6 +517,13 @@ async function clearCacheForMessages(idsInput) { collapseWhitespace = changes.collapseWhitespace.newValue === true; logger.aiLog("collapseWhitespace updated from storage change", { debug: true }, collapseWhitespace); } + if (changes.tokenReduction) { + tokenReduction = changes.tokenReduction.newValue === true; + logger.aiLog("tokenReduction updated from storage change", { debug: true }, tokenReduction); + } + if (changes.showDebugTab) { + showDebugTab = changes.showDebugTab.newValue === true; + } if (changes.errorPending) { errorPending = changes.errorPending.newValue === true; updateActionIcon(); diff --git a/manifest.json b/manifest.json index fe019ab..e7cb9d8 100644 --- a/manifest.json +++ b/manifest.json @@ -1,13 +1,13 @@ { "manifest_version": 2, "name": "Sortana", - "version": "2.1.1", + "version": "2.2.0", "default_locale": "en-US", "applications": { "gecko": { "id": "ai-filter@jordanwages", "strict_min_version": "128.0", - "strict_max_version": "139.*" + "strict_max_version": "140.*" } }, "icons": { @@ -32,17 +32,18 @@ "page": "options/options.html", "open_in_tab": true }, - "permissions": [ - "storage", - "messagesRead", - "messagesMove", - "messagesUpdate", - "messagesTagsList", - "accountsRead", - "menus", - "scripting", - "tabs", - "theme", - "compose" - ] + "permissions": [ + "storage", + "messagesRead", + "messagesMove", + "messagesUpdate", + "messagesTagsList", + "accountsRead", + "menus", + "notifications", + "scripting", + "tabs", + "theme", + "compose" + ] } diff --git a/modules/AiClassifier.js b/modules/AiClassifier.js index b35cb2c..8313654 100644 --- a/modules/AiClassifier.js +++ b/modules/AiClassifier.js @@ -308,6 +308,11 @@ async function classifyText(text, criterion, cacheKey = null) { } const payload = buildPayload(text, criterion); + try { + await storage.local.set({ lastPayload: JSON.parse(payload) }); + } catch (e) { + aiLog('failed to save last payload', { level: 'warn' }, e); + } aiLog(`[AiClassifier] Sending classification request to ${gEndpoint}`, {debug: true}); aiLog(`[AiClassifier] Classification request payload:`, { debug: true }, payload); diff --git a/options/options.html b/options/options.html index 57f407c..ddc5ee0 100644 --- a/options/options.html +++ b/options/options.html @@ -31,6 +31,10 @@ .tag { --bulma-tag-h: 318; } + #diff-display { + white-space: pre-wrap; + font-family: monospace; + }
@@ -47,6 +51,7 @@