From 183ca8f355783f43a21555338cae6144190c871e Mon Sep 17 00:00:00 2001 From: Jordan Wages Date: Sat, 19 Jul 2025 17:31:03 -0500 Subject: [PATCH] Add token reduction options and context length cap --- _locales/en-US/messages.json | 4 ++- background.js | 64 ++++++++++++++++++++++++++++++------ options/options.html | 11 +++++++ options/options.js | 12 ++++++- 4 files changed, 79 insertions(+), 12 deletions(-) diff --git a/_locales/en-US/messages.json b/_locales/en-US/messages.json index 983bb86..fb439b9 100644 --- a/_locales/en-US/messages.json +++ b/_locales/en-US/messages.json @@ -18,7 +18,9 @@ "options.htmlToMarkdown": { "message": "Convert HTML body to Markdown" }, "options.stripUrlParams": { "message": "Remove URL tracking parameters" }, "options.altTextImages": { "message": "Replace images with alt text" }, - "options.collapseWhitespace": { "message": "Collapse long whitespace" } + "options.collapseWhitespace": { "message": "Collapse long whitespace" }, + "options.tokenReduction": { "message": "Aggressive token reduction" }, + "options.contextLength": { "message": "Context length" } ,"action.read": { "message": "read" } ,"action.flag": { "message": "flag" } ,"action.copy": { "message": "copy" } diff --git a/background.js b/background.js index c7579d0..c44bfed 100644 --- a/background.js +++ b/background.js @@ -26,6 +26,8 @@ let htmlToMarkdown = false; let stripUrlParams = false; let altTextImages = false; let collapseWhitespace = false; +let tokenReduction = false; +let contextLength = 16384; let TurndownService = null; let userTheme = 'auto'; let currentTheme = 'light'; @@ -125,12 +127,16 @@ function byteSize(str) { } function replaceInlineBase64(text) { - return text.replace(/[A-Za-z0-9+/]{100,}={0,2}/g, - m => `[base64: ${byteSize(m)} bytes]`); + return text.replace(/(?:data:[^;]+;base64,)?[A-Za-z0-9+/=\r\n]{100,}/g, + m => tokenReduction ? '__BASE64__' : `[base64: ${byteSize(m)} bytes]`); } function sanitizeString(text) { let t = String(text); + if (tokenReduction) { + t = t.replace(//gs, '') + .replace(/url\([^\)]*\)/gi, 'url(__IMG__)'); + } if (stripUrlParams) { t = t.replace(/https?:\/\/[^\s)]+/g, m => { const idx = m.indexOf('?'); @@ -138,7 +144,7 @@ function sanitizeString(text) { }); } if (collapseWhitespace) { - t = t.replace(/[ \t\u00A0]{2,}/g, ' ').replace(/\n{3,}/g, '\n\n'); + t = t.replace(/[\u200B-\u200D\u2060\s]{2,}/g, ' ').replace(/\n{3,}/g, '\n\n'); } return t; } @@ -157,12 +163,26 @@ function collectText(part, bodyParts, attachments) { attachments.push(`${name} (${ct}, ${part.size || byteSize(body)} bytes)`); } else if (ct.startsWith("text/html")) { const doc = new DOMParser().parseFromString(body, 'text/html'); - if (altTextImages) { - doc.querySelectorAll('img').forEach(img => { - const alt = img.getAttribute('alt') || ''; - img.replaceWith(doc.createTextNode(alt)); + if (tokenReduction) { + doc.querySelectorAll('script,style').forEach(el => el.remove()); + const walker = doc.createTreeWalker(doc, NodeFilter.SHOW_COMMENT); + let node; + while ((node = walker.nextNode())) { + node.parentNode.removeChild(node); + } + doc.querySelectorAll('*').forEach(el => { + for (const attr of Array.from(el.attributes)) { + if (!['href','src','alt'].includes(attr.name)) { + el.removeAttribute(attr.name); + } + } }); } + doc.querySelectorAll('img').forEach(img => { + const alt = img.getAttribute('alt') || ''; + const text = altTextImages ? alt : '__IMG__'; + img.replaceWith(doc.createTextNode(text)); + }); if (stripUrlParams) { doc.querySelectorAll('[href]').forEach(a => { const href = a.getAttribute('href'); @@ -198,7 +218,15 @@ function buildEmailText(full) { .join('\n'); const attachInfo = `Attachments: ${attachments.length}` + (attachments.length ? "\n" + attachments.map(a => ` - ${a}`).join('\n') : ""); - const combined = `${headers}\n${attachInfo}\n\n${bodyParts.join('\n')}`.trim(); + let combined = `${headers}\n${attachInfo}\n\n${bodyParts.join('\n')}`.trim(); + if (tokenReduction) { + const seen = new Set(); + combined = combined.split('\n').filter(l => { + if (seen.has(l)) return false; + seen.add(l); + return true; + }).join('\n'); + } return sanitizeString(combined); } @@ -233,7 +261,13 @@ async function processMessage(id) { updateActionIcon(); try { const full = await messenger.messages.getFull(id); - const text = buildEmailText(full); + let text = buildEmailText(full); + if (tokenReduction && contextLength > 0) { + const limit = Math.floor(contextLength * 0.9); + if (text.length > limit) { + text = text.slice(0, limit); + } + } let hdr; let currentTags = []; let alreadyRead = false; @@ -391,7 +425,7 @@ async function clearCacheForMessages(idsInput) { } try { - const store = await storage.local.get(["endpoint", "templateName", "customTemplate", "customSystemPrompt", "aiParams", "debugLogging", "htmlToMarkdown", "stripUrlParams", "altTextImages", "collapseWhitespace", "aiRules", "theme", "errorPending"]); + const store = await storage.local.get(["endpoint", "templateName", "customTemplate", "customSystemPrompt", "aiParams", "debugLogging", "htmlToMarkdown", "stripUrlParams", "altTextImages", "collapseWhitespace", "tokenReduction", "contextLength", "aiRules", "theme", "errorPending"]); logger.setDebug(store.debugLogging); await AiClassifier.setConfig(store); userTheme = store.theme || 'auto'; @@ -401,6 +435,8 @@ async function clearCacheForMessages(idsInput) { stripUrlParams = store.stripUrlParams === true; altTextImages = store.altTextImages === true; collapseWhitespace = store.collapseWhitespace === true; + tokenReduction = store.tokenReduction === true; + contextLength = parseInt(store.contextLength) || contextLength; errorPending = store.errorPending === true; const savedStats = await storage.local.get('classifyStats'); if (savedStats.classifyStats && typeof savedStats.classifyStats === 'object') { @@ -447,6 +483,14 @@ async function clearCacheForMessages(idsInput) { collapseWhitespace = changes.collapseWhitespace.newValue === true; logger.aiLog("collapseWhitespace updated from storage change", { debug: true }, collapseWhitespace); } + if (changes.tokenReduction) { + tokenReduction = changes.tokenReduction.newValue === true; + logger.aiLog("tokenReduction updated from storage change", { debug: true }, tokenReduction); + } + if (changes.contextLength) { + contextLength = parseInt(changes.contextLength.newValue) || contextLength; + logger.aiLog("contextLength updated from storage change", { debug: true }, contextLength); + } if (changes.errorPending) { errorPending = changes.errorPending.newValue === true; updateActionIcon(); diff --git a/options/options.html b/options/options.html index 57f407c..108d7f9 100644 --- a/options/options.html +++ b/options/options.html @@ -144,6 +144,17 @@ Collapse long whitespace +
+ +
+
+ +
+ +
+
diff --git a/options/options.js b/options/options.js index 096f1a8..f5af1b6 100644 --- a/options/options.js +++ b/options/options.js @@ -16,6 +16,8 @@ document.addEventListener('DOMContentLoaded', async () => { 'stripUrlParams', 'altTextImages', 'collapseWhitespace', + 'tokenReduction', + 'contextLength', 'aiRules', 'aiCache', 'theme' @@ -115,6 +117,12 @@ document.addEventListener('DOMContentLoaded', async () => { const collapseWhitespaceToggle = document.getElementById('collapse-whitespace'); collapseWhitespaceToggle.checked = defaults.collapseWhitespace === true; + const tokenReductionToggle = document.getElementById('token-reduction'); + tokenReductionToggle.checked = defaults.tokenReduction === true; + + const contextLengthInput = document.getElementById('context-length'); + contextLengthInput.value = defaults.contextLength || 16384; + const aiParams = Object.assign({}, DEFAULT_AI_PARAMS, defaults.aiParams || {}); for (const [key, val] of Object.entries(aiParams)) { const el = document.getElementById(key); @@ -791,8 +799,10 @@ document.addEventListener('DOMContentLoaded', async () => { const stripUrlParams = stripUrlToggle.checked; const altTextImages = altTextToggle.checked; const collapseWhitespace = collapseWhitespaceToggle.checked; + const tokenReduction = tokenReductionToggle.checked; + const contextLength = parseInt(contextLengthInput.value) || 0; const theme = themeSelect.value; - await storage.local.set({ endpoint, templateName, customTemplate: customTemplateText, customSystemPrompt, aiParams: aiParamsSave, debugLogging, htmlToMarkdown, stripUrlParams, altTextImages, collapseWhitespace, aiRules: rules, theme }); + await storage.local.set({ endpoint, templateName, customTemplate: customTemplateText, customSystemPrompt, aiParams: aiParamsSave, debugLogging, htmlToMarkdown, stripUrlParams, altTextImages, collapseWhitespace, tokenReduction, contextLength, aiRules: rules, theme }); await applyTheme(theme); try { await AiClassifier.setConfig({ endpoint, templateName, customTemplate: customTemplateText, customSystemPrompt, aiParams: aiParamsSave, debugLogging });