diff --git a/README.md b/README.md index 3853127..57a0285 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ message meets a specified criterion. - **Advanced parameters** – tune generation settings like temperature, top‑p and more from the options page. - **Markdown conversion** – optionally convert HTML bodies to Markdown before sending them to the AI service. - **Debug logging** – optional colorized logs help troubleshoot interactions with the AI service. +- **Debug tab** – view the last request payload and a diff between the unaltered message text and the final prompt. - **Light/Dark themes** – automatically match Thunderbird's appearance with optional manual override. - **Automatic rules** – create rules that tag, move, copy, forward, reply, delete, archive, mark read/unread or flag/unflag messages based on AI classification. Rules can optionally apply only to unread messages and can ignore messages outside a chosen age range. - **Rule ordering** – drag rules to prioritize them and optionally stop processing after a match. @@ -140,6 +141,8 @@ uses the following third party libraries: - MIT License - [turndown v7.2.0](https://github.com/mixmark-io/turndown/tree/v7.2.0) - MIT License +- [diff](https://github.com/google/diff-match-patch/blob/62f2e689f498f9c92dbc588c58750addec9b1654/javascript/diff_match_patch_uncompressed.js) + - Apache-2.0 license ## License diff --git a/_locales/en-US/messages.json b/_locales/en-US/messages.json index 983bb86..65181c8 100644 --- a/_locales/en-US/messages.json +++ b/_locales/en-US/messages.json @@ -18,7 +18,8 @@ "options.htmlToMarkdown": { "message": "Convert HTML body to Markdown" }, "options.stripUrlParams": { "message": "Remove URL tracking parameters" }, "options.altTextImages": { "message": "Replace images with alt text" }, - "options.collapseWhitespace": { "message": "Collapse long whitespace" } + "options.collapseWhitespace": { "message": "Collapse long whitespace" }, + "options.tokenReduction": { "message": "Aggressive token reduction" } ,"action.read": { "message": "read" } ,"action.flag": { "message": "flag" } ,"action.copy": { "message": "copy" } diff --git a/ai-filter.sln b/ai-filter.sln index 57705eb..ea71f1b 100644 --- a/ai-filter.sln +++ b/ai-filter.sln @@ -108,6 +108,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "img", "img", "{F266602F-175 EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "js", "js", "{21D2A42C-3F85-465C-9141-C106AFD92B68}" ProjectSection(SolutionItems) = preProject + resources\js\diff_match_patch_uncompressed.js = resources\js\diff_match_patch_uncompressed.js resources\js\turndown.js = resources\js\turndown.js EndProjectSection EndProject diff --git a/background.js b/background.js index c7579d0..fc585ff 100644 --- a/background.js +++ b/background.js @@ -26,11 +26,14 @@ let htmlToMarkdown = false; let stripUrlParams = false; let altTextImages = false; let collapseWhitespace = false; +let tokenReduction = false; +let maxTokens = 4096; let TurndownService = null; let userTheme = 'auto'; let currentTheme = 'light'; let detectSystemTheme; let errorPending = false; +let showDebugTab = false; const ERROR_NOTIFICATION_ID = 'sortana-error'; function normalizeRules(rules) { @@ -125,12 +128,16 @@ function byteSize(str) { } function replaceInlineBase64(text) { - return text.replace(/[A-Za-z0-9+/]{100,}={0,2}/g, - m => `[base64: ${byteSize(m)} bytes]`); + return text.replace(/(?:data:[^;]+;base64,)?[A-Za-z0-9+/=\r\n]{100,}/g, + m => tokenReduction ? '__BASE64__' : `[base64: ${byteSize(m)} bytes]`); } function sanitizeString(text) { let t = String(text); + if (tokenReduction) { + t = t.replace(//gs, '') + .replace(/url\([^\)]*\)/gi, 'url(__IMG__)'); + } if (stripUrlParams) { t = t.replace(/https?:\/\/[^\s)]+/g, m => { const idx = m.indexOf('?'); @@ -138,7 +145,7 @@ function sanitizeString(text) { }); } if (collapseWhitespace) { - t = t.replace(/[ \t\u00A0]{2,}/g, ' ').replace(/\n{3,}/g, '\n\n'); + t = t.replace(/[\u200B-\u200D\u2060\s]{2,}/g, ' ').replace(/\n{3,}/g, '\n\n'); } return t; } @@ -157,12 +164,26 @@ function collectText(part, bodyParts, attachments) { attachments.push(`${name} (${ct}, ${part.size || byteSize(body)} bytes)`); } else if (ct.startsWith("text/html")) { const doc = new DOMParser().parseFromString(body, 'text/html'); - if (altTextImages) { - doc.querySelectorAll('img').forEach(img => { - const alt = img.getAttribute('alt') || ''; - img.replaceWith(doc.createTextNode(alt)); + if (tokenReduction) { + doc.querySelectorAll('script,style').forEach(el => el.remove()); + const walker = doc.createTreeWalker(doc, NodeFilter.SHOW_COMMENT); + let node; + while ((node = walker.nextNode())) { + node.parentNode.removeChild(node); + } + doc.querySelectorAll('*').forEach(el => { + for (const attr of Array.from(el.attributes)) { + if (!['href','src','alt'].includes(attr.name)) { + el.removeAttribute(attr.name); + } + } }); } + doc.querySelectorAll('img').forEach(img => { + const alt = img.getAttribute('alt') || ''; + const text = altTextImages ? alt : '__IMG__'; + img.replaceWith(doc.createTextNode(text)); + }); if (stripUrlParams) { doc.querySelectorAll('[href]').forEach(a => { const href = a.getAttribute('href'); @@ -189,17 +210,46 @@ function collectText(part, bodyParts, attachments) { } } -function buildEmailText(full) { +function collectRawText(part, bodyParts, attachments) { + if (part.parts && part.parts.length) { + for (const p of part.parts) collectRawText(p, bodyParts, attachments); + return; + } + const ct = (part.contentType || "text/plain").toLowerCase(); + const cd = (part.headers?.["content-disposition"]?.[0] || "").toLowerCase(); + const body = String(part.body || ""); + if (cd.includes("attachment") || !ct.startsWith("text/")) { + const nameMatch = /filename\s*=\s*"?([^";]+)/i.exec(cd) || /name\s*=\s*"?([^";]+)/i.exec(part.headers?.["content-type"]?.[0] || ""); + const name = nameMatch ? nameMatch[1] : ""; + attachments.push(`${name} (${ct}, ${part.size || byteSize(body)} bytes)`); + } else if (ct.startsWith("text/html")) { + const doc = new DOMParser().parseFromString(body, 'text/html'); + bodyParts.push(doc.body.textContent || ""); + } else { + bodyParts.push(body); + } +} + +function buildEmailText(full, applyTransforms = true) { const bodyParts = []; const attachments = []; - collectText(full, bodyParts, attachments); + const collect = applyTransforms ? collectText : collectRawText; + collect(full, bodyParts, attachments); const headers = Object.entries(full.headers || {}) .map(([k, v]) => `${k}: ${v.join(' ')}`) .join('\n'); const attachInfo = `Attachments: ${attachments.length}` + (attachments.length ? "\n" + attachments.map(a => ` - ${a}`).join('\n') : ""); - const combined = `${headers}\n${attachInfo}\n\n${bodyParts.join('\n')}`.trim(); - return sanitizeString(combined); + let combined = `${headers}\n${attachInfo}\n\n${bodyParts.join('\n')}`.trim(); + if (applyTransforms && tokenReduction) { + const seen = new Set(); + combined = combined.split('\n').filter(l => { + if (seen.has(l)) return false; + seen.add(l); + return true; + }).join('\n'); + } + return applyTransforms ? sanitizeString(combined) : combined; } function updateTimingStats(elapsed) { @@ -233,7 +283,17 @@ async function processMessage(id) { updateActionIcon(); try { const full = await messenger.messages.getFull(id); - const text = buildEmailText(full); + const originalText = buildEmailText(full, false); + let text = buildEmailText(full); + if (tokenReduction && maxTokens > 0) { + const limit = Math.floor(maxTokens * 0.9); + if (text.length > limit) { + text = text.slice(0, limit); + } + } + if (showDebugTab) { + await storage.local.set({ lastFullText: originalText, lastPromptText: text }); + } let hdr; let currentTags = []; let alreadyRead = false; @@ -391,7 +451,7 @@ async function clearCacheForMessages(idsInput) { } try { - const store = await storage.local.get(["endpoint", "templateName", "customTemplate", "customSystemPrompt", "aiParams", "debugLogging", "htmlToMarkdown", "stripUrlParams", "altTextImages", "collapseWhitespace", "aiRules", "theme", "errorPending"]); + const store = await storage.local.get(["endpoint", "templateName", "customTemplate", "customSystemPrompt", "aiParams", "debugLogging", "htmlToMarkdown", "stripUrlParams", "altTextImages", "collapseWhitespace", "tokenReduction", "aiRules", "theme", "errorPending", "showDebugTab"]); logger.setDebug(store.debugLogging); await AiClassifier.setConfig(store); userTheme = store.theme || 'auto'; @@ -401,7 +461,12 @@ async function clearCacheForMessages(idsInput) { stripUrlParams = store.stripUrlParams === true; altTextImages = store.altTextImages === true; collapseWhitespace = store.collapseWhitespace === true; + tokenReduction = store.tokenReduction === true; + if (store.aiParams && typeof store.aiParams.max_tokens !== 'undefined') { + maxTokens = parseInt(store.aiParams.max_tokens) || maxTokens; + } errorPending = store.errorPending === true; + showDebugTab = store.showDebugTab === true; const savedStats = await storage.local.get('classifyStats'); if (savedStats.classifyStats && typeof savedStats.classifyStats === 'object') { Object.assign(timingStats, savedStats.classifyStats); @@ -423,7 +488,12 @@ async function clearCacheForMessages(idsInput) { if (changes.templateName) config.templateName = changes.templateName.newValue; if (changes.customTemplate) config.customTemplate = changes.customTemplate.newValue; if (changes.customSystemPrompt) config.customSystemPrompt = changes.customSystemPrompt.newValue; - if (changes.aiParams) config.aiParams = changes.aiParams.newValue; + if (changes.aiParams) { + config.aiParams = changes.aiParams.newValue; + if (changes.aiParams.newValue && typeof changes.aiParams.newValue.max_tokens !== 'undefined') { + maxTokens = parseInt(changes.aiParams.newValue.max_tokens) || maxTokens; + } + } if (changes.debugLogging) { config.debugLogging = changes.debugLogging.newValue === true; logger.setDebug(config.debugLogging); @@ -447,6 +517,13 @@ async function clearCacheForMessages(idsInput) { collapseWhitespace = changes.collapseWhitespace.newValue === true; logger.aiLog("collapseWhitespace updated from storage change", { debug: true }, collapseWhitespace); } + if (changes.tokenReduction) { + tokenReduction = changes.tokenReduction.newValue === true; + logger.aiLog("tokenReduction updated from storage change", { debug: true }, tokenReduction); + } + if (changes.showDebugTab) { + showDebugTab = changes.showDebugTab.newValue === true; + } if (changes.errorPending) { errorPending = changes.errorPending.newValue === true; updateActionIcon(); diff --git a/manifest.json b/manifest.json index c3b45a7..e7cb9d8 100644 --- a/manifest.json +++ b/manifest.json @@ -1,13 +1,13 @@ { "manifest_version": 2, "name": "Sortana", - "version": "2.1.1", + "version": "2.2.0", "default_locale": "en-US", "applications": { "gecko": { "id": "ai-filter@jordanwages", "strict_min_version": "128.0", - "strict_max_version": "139.*" + "strict_max_version": "140.*" } }, "icons": { diff --git a/modules/AiClassifier.js b/modules/AiClassifier.js index b35cb2c..8313654 100644 --- a/modules/AiClassifier.js +++ b/modules/AiClassifier.js @@ -308,6 +308,11 @@ async function classifyText(text, criterion, cacheKey = null) { } const payload = buildPayload(text, criterion); + try { + await storage.local.set({ lastPayload: JSON.parse(payload) }); + } catch (e) { + aiLog('failed to save last payload', { level: 'warn' }, e); + } aiLog(`[AiClassifier] Sending classification request to ${gEndpoint}`, {debug: true}); aiLog(`[AiClassifier] Classification request payload:`, { debug: true }, payload); diff --git a/options/options.html b/options/options.html index 57f407c..ddc5ee0 100644 --- a/options/options.html +++ b/options/options.html @@ -31,6 +31,10 @@ .tag { --bulma-tag-h: 318; } + #diff-display { + white-space: pre-wrap; + font-family: monospace; + }
@@ -47,6 +51,7 @@