Merge pull request #105 from wagesj45/codex/implement-token-reduction-strategies

Implement advanced token reduction options
This commit is contained in:
Jordan Wages 2025-07-19 17:33:34 -05:00 committed by GitHub
commit e629cc6518
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 79 additions and 12 deletions

View file

@ -18,7 +18,9 @@
"options.htmlToMarkdown": { "message": "Convert HTML body to Markdown" },
"options.stripUrlParams": { "message": "Remove URL tracking parameters" },
"options.altTextImages": { "message": "Replace images with alt text" },
"options.collapseWhitespace": { "message": "Collapse long whitespace" }
"options.collapseWhitespace": { "message": "Collapse long whitespace" },
"options.tokenReduction": { "message": "Aggressive token reduction" },
"options.contextLength": { "message": "Context length" }
,"action.read": { "message": "read" }
,"action.flag": { "message": "flag" }
,"action.copy": { "message": "copy" }

View file

@ -26,6 +26,8 @@ let htmlToMarkdown = false;
let stripUrlParams = false;
let altTextImages = false;
let collapseWhitespace = false;
let tokenReduction = false;
let contextLength = 16384;
let TurndownService = null;
let userTheme = 'auto';
let currentTheme = 'light';
@ -125,12 +127,16 @@ function byteSize(str) {
}
function replaceInlineBase64(text) {
return text.replace(/[A-Za-z0-9+/]{100,}={0,2}/g,
m => `[base64: ${byteSize(m)} bytes]`);
return text.replace(/(?:data:[^;]+;base64,)?[A-Za-z0-9+/=\r\n]{100,}/g,
m => tokenReduction ? '__BASE64__' : `[base64: ${byteSize(m)} bytes]`);
}
function sanitizeString(text) {
let t = String(text);
if (tokenReduction) {
t = t.replace(/<!--.*?-->/gs, '')
.replace(/url\([^\)]*\)/gi, 'url(__IMG__)');
}
if (stripUrlParams) {
t = t.replace(/https?:\/\/[^\s)]+/g, m => {
const idx = m.indexOf('?');
@ -138,7 +144,7 @@ function sanitizeString(text) {
});
}
if (collapseWhitespace) {
t = t.replace(/[ \t\u00A0]{2,}/g, ' ').replace(/\n{3,}/g, '\n\n');
t = t.replace(/[\u200B-\u200D\u2060\s]{2,}/g, ' ').replace(/\n{3,}/g, '\n\n');
}
return t;
}
@ -157,12 +163,26 @@ function collectText(part, bodyParts, attachments) {
attachments.push(`${name} (${ct}, ${part.size || byteSize(body)} bytes)`);
} else if (ct.startsWith("text/html")) {
const doc = new DOMParser().parseFromString(body, 'text/html');
if (altTextImages) {
doc.querySelectorAll('img').forEach(img => {
const alt = img.getAttribute('alt') || '';
img.replaceWith(doc.createTextNode(alt));
if (tokenReduction) {
doc.querySelectorAll('script,style').forEach(el => el.remove());
const walker = doc.createTreeWalker(doc, NodeFilter.SHOW_COMMENT);
let node;
while ((node = walker.nextNode())) {
node.parentNode.removeChild(node);
}
doc.querySelectorAll('*').forEach(el => {
for (const attr of Array.from(el.attributes)) {
if (!['href','src','alt'].includes(attr.name)) {
el.removeAttribute(attr.name);
}
}
});
}
doc.querySelectorAll('img').forEach(img => {
const alt = img.getAttribute('alt') || '';
const text = altTextImages ? alt : '__IMG__';
img.replaceWith(doc.createTextNode(text));
});
if (stripUrlParams) {
doc.querySelectorAll('[href]').forEach(a => {
const href = a.getAttribute('href');
@ -198,7 +218,15 @@ function buildEmailText(full) {
.join('\n');
const attachInfo = `Attachments: ${attachments.length}` +
(attachments.length ? "\n" + attachments.map(a => ` - ${a}`).join('\n') : "");
const combined = `${headers}\n${attachInfo}\n\n${bodyParts.join('\n')}`.trim();
let combined = `${headers}\n${attachInfo}\n\n${bodyParts.join('\n')}`.trim();
if (tokenReduction) {
const seen = new Set();
combined = combined.split('\n').filter(l => {
if (seen.has(l)) return false;
seen.add(l);
return true;
}).join('\n');
}
return sanitizeString(combined);
}
@ -233,7 +261,13 @@ async function processMessage(id) {
updateActionIcon();
try {
const full = await messenger.messages.getFull(id);
const text = buildEmailText(full);
let text = buildEmailText(full);
if (tokenReduction && contextLength > 0) {
const limit = Math.floor(contextLength * 0.9);
if (text.length > limit) {
text = text.slice(0, limit);
}
}
let hdr;
let currentTags = [];
let alreadyRead = false;
@ -391,7 +425,7 @@ async function clearCacheForMessages(idsInput) {
}
try {
const store = await storage.local.get(["endpoint", "templateName", "customTemplate", "customSystemPrompt", "aiParams", "debugLogging", "htmlToMarkdown", "stripUrlParams", "altTextImages", "collapseWhitespace", "aiRules", "theme", "errorPending"]);
const store = await storage.local.get(["endpoint", "templateName", "customTemplate", "customSystemPrompt", "aiParams", "debugLogging", "htmlToMarkdown", "stripUrlParams", "altTextImages", "collapseWhitespace", "tokenReduction", "contextLength", "aiRules", "theme", "errorPending"]);
logger.setDebug(store.debugLogging);
await AiClassifier.setConfig(store);
userTheme = store.theme || 'auto';
@ -401,6 +435,8 @@ async function clearCacheForMessages(idsInput) {
stripUrlParams = store.stripUrlParams === true;
altTextImages = store.altTextImages === true;
collapseWhitespace = store.collapseWhitespace === true;
tokenReduction = store.tokenReduction === true;
contextLength = parseInt(store.contextLength) || contextLength;
errorPending = store.errorPending === true;
const savedStats = await storage.local.get('classifyStats');
if (savedStats.classifyStats && typeof savedStats.classifyStats === 'object') {
@ -447,6 +483,14 @@ async function clearCacheForMessages(idsInput) {
collapseWhitespace = changes.collapseWhitespace.newValue === true;
logger.aiLog("collapseWhitespace updated from storage change", { debug: true }, collapseWhitespace);
}
if (changes.tokenReduction) {
tokenReduction = changes.tokenReduction.newValue === true;
logger.aiLog("tokenReduction updated from storage change", { debug: true }, tokenReduction);
}
if (changes.contextLength) {
contextLength = parseInt(changes.contextLength.newValue) || contextLength;
logger.aiLog("contextLength updated from storage change", { debug: true }, contextLength);
}
if (changes.errorPending) {
errorPending = changes.errorPending.newValue === true;
updateActionIcon();

View file

@ -144,6 +144,17 @@
<input type="checkbox" id="collapse-whitespace"> Collapse long whitespace
</label>
</div>
<div class="field">
<label class="checkbox">
<input type="checkbox" id="token-reduction"> Aggressive token reduction
</label>
</div>
<div class="field">
<label class="label" for="context-length">Context length</label>
<div class="control">
<input class="input" type="number" id="context-length">
</div>
</div>
<div class="field">
<label class="label" for="max_tokens">Max tokens</label>
<div class="control">

View file

@ -16,6 +16,8 @@ document.addEventListener('DOMContentLoaded', async () => {
'stripUrlParams',
'altTextImages',
'collapseWhitespace',
'tokenReduction',
'contextLength',
'aiRules',
'aiCache',
'theme'
@ -115,6 +117,12 @@ document.addEventListener('DOMContentLoaded', async () => {
const collapseWhitespaceToggle = document.getElementById('collapse-whitespace');
collapseWhitespaceToggle.checked = defaults.collapseWhitespace === true;
const tokenReductionToggle = document.getElementById('token-reduction');
tokenReductionToggle.checked = defaults.tokenReduction === true;
const contextLengthInput = document.getElementById('context-length');
contextLengthInput.value = defaults.contextLength || 16384;
const aiParams = Object.assign({}, DEFAULT_AI_PARAMS, defaults.aiParams || {});
for (const [key, val] of Object.entries(aiParams)) {
const el = document.getElementById(key);
@ -791,8 +799,10 @@ document.addEventListener('DOMContentLoaded', async () => {
const stripUrlParams = stripUrlToggle.checked;
const altTextImages = altTextToggle.checked;
const collapseWhitespace = collapseWhitespaceToggle.checked;
const tokenReduction = tokenReductionToggle.checked;
const contextLength = parseInt(contextLengthInput.value) || 0;
const theme = themeSelect.value;
await storage.local.set({ endpoint, templateName, customTemplate: customTemplateText, customSystemPrompt, aiParams: aiParamsSave, debugLogging, htmlToMarkdown, stripUrlParams, altTextImages, collapseWhitespace, aiRules: rules, theme });
await storage.local.set({ endpoint, templateName, customTemplate: customTemplateText, customSystemPrompt, aiParams: aiParamsSave, debugLogging, htmlToMarkdown, stripUrlParams, altTextImages, collapseWhitespace, tokenReduction, contextLength, aiRules: rules, theme });
await applyTheme(theme);
try {
await AiClassifier.setConfig({ endpoint, templateName, customTemplate: customTemplateText, customSystemPrompt, aiParams: aiParamsSave, debugLogging });