Merge pull request #105 from wagesj45/codex/implement-token-reduction-strategies
Implement advanced token reduction options
This commit is contained in:
commit
e629cc6518
4 changed files with 79 additions and 12 deletions
|
@ -18,7 +18,9 @@
|
|||
"options.htmlToMarkdown": { "message": "Convert HTML body to Markdown" },
|
||||
"options.stripUrlParams": { "message": "Remove URL tracking parameters" },
|
||||
"options.altTextImages": { "message": "Replace images with alt text" },
|
||||
"options.collapseWhitespace": { "message": "Collapse long whitespace" }
|
||||
"options.collapseWhitespace": { "message": "Collapse long whitespace" },
|
||||
"options.tokenReduction": { "message": "Aggressive token reduction" },
|
||||
"options.contextLength": { "message": "Context length" }
|
||||
,"action.read": { "message": "read" }
|
||||
,"action.flag": { "message": "flag" }
|
||||
,"action.copy": { "message": "copy" }
|
||||
|
|
|
@ -26,6 +26,8 @@ let htmlToMarkdown = false;
|
|||
let stripUrlParams = false;
|
||||
let altTextImages = false;
|
||||
let collapseWhitespace = false;
|
||||
let tokenReduction = false;
|
||||
let contextLength = 16384;
|
||||
let TurndownService = null;
|
||||
let userTheme = 'auto';
|
||||
let currentTheme = 'light';
|
||||
|
@ -125,12 +127,16 @@ function byteSize(str) {
|
|||
}
|
||||
|
||||
function replaceInlineBase64(text) {
|
||||
return text.replace(/[A-Za-z0-9+/]{100,}={0,2}/g,
|
||||
m => `[base64: ${byteSize(m)} bytes]`);
|
||||
return text.replace(/(?:data:[^;]+;base64,)?[A-Za-z0-9+/=\r\n]{100,}/g,
|
||||
m => tokenReduction ? '__BASE64__' : `[base64: ${byteSize(m)} bytes]`);
|
||||
}
|
||||
|
||||
function sanitizeString(text) {
|
||||
let t = String(text);
|
||||
if (tokenReduction) {
|
||||
t = t.replace(/<!--.*?-->/gs, '')
|
||||
.replace(/url\([^\)]*\)/gi, 'url(__IMG__)');
|
||||
}
|
||||
if (stripUrlParams) {
|
||||
t = t.replace(/https?:\/\/[^\s)]+/g, m => {
|
||||
const idx = m.indexOf('?');
|
||||
|
@ -138,7 +144,7 @@ function sanitizeString(text) {
|
|||
});
|
||||
}
|
||||
if (collapseWhitespace) {
|
||||
t = t.replace(/[ \t\u00A0]{2,}/g, ' ').replace(/\n{3,}/g, '\n\n');
|
||||
t = t.replace(/[\u200B-\u200D\u2060\s]{2,}/g, ' ').replace(/\n{3,}/g, '\n\n');
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
@ -157,12 +163,26 @@ function collectText(part, bodyParts, attachments) {
|
|||
attachments.push(`${name} (${ct}, ${part.size || byteSize(body)} bytes)`);
|
||||
} else if (ct.startsWith("text/html")) {
|
||||
const doc = new DOMParser().parseFromString(body, 'text/html');
|
||||
if (altTextImages) {
|
||||
doc.querySelectorAll('img').forEach(img => {
|
||||
const alt = img.getAttribute('alt') || '';
|
||||
img.replaceWith(doc.createTextNode(alt));
|
||||
if (tokenReduction) {
|
||||
doc.querySelectorAll('script,style').forEach(el => el.remove());
|
||||
const walker = doc.createTreeWalker(doc, NodeFilter.SHOW_COMMENT);
|
||||
let node;
|
||||
while ((node = walker.nextNode())) {
|
||||
node.parentNode.removeChild(node);
|
||||
}
|
||||
doc.querySelectorAll('*').forEach(el => {
|
||||
for (const attr of Array.from(el.attributes)) {
|
||||
if (!['href','src','alt'].includes(attr.name)) {
|
||||
el.removeAttribute(attr.name);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
doc.querySelectorAll('img').forEach(img => {
|
||||
const alt = img.getAttribute('alt') || '';
|
||||
const text = altTextImages ? alt : '__IMG__';
|
||||
img.replaceWith(doc.createTextNode(text));
|
||||
});
|
||||
if (stripUrlParams) {
|
||||
doc.querySelectorAll('[href]').forEach(a => {
|
||||
const href = a.getAttribute('href');
|
||||
|
@ -198,7 +218,15 @@ function buildEmailText(full) {
|
|||
.join('\n');
|
||||
const attachInfo = `Attachments: ${attachments.length}` +
|
||||
(attachments.length ? "\n" + attachments.map(a => ` - ${a}`).join('\n') : "");
|
||||
const combined = `${headers}\n${attachInfo}\n\n${bodyParts.join('\n')}`.trim();
|
||||
let combined = `${headers}\n${attachInfo}\n\n${bodyParts.join('\n')}`.trim();
|
||||
if (tokenReduction) {
|
||||
const seen = new Set();
|
||||
combined = combined.split('\n').filter(l => {
|
||||
if (seen.has(l)) return false;
|
||||
seen.add(l);
|
||||
return true;
|
||||
}).join('\n');
|
||||
}
|
||||
return sanitizeString(combined);
|
||||
}
|
||||
|
||||
|
@ -233,7 +261,13 @@ async function processMessage(id) {
|
|||
updateActionIcon();
|
||||
try {
|
||||
const full = await messenger.messages.getFull(id);
|
||||
const text = buildEmailText(full);
|
||||
let text = buildEmailText(full);
|
||||
if (tokenReduction && contextLength > 0) {
|
||||
const limit = Math.floor(contextLength * 0.9);
|
||||
if (text.length > limit) {
|
||||
text = text.slice(0, limit);
|
||||
}
|
||||
}
|
||||
let hdr;
|
||||
let currentTags = [];
|
||||
let alreadyRead = false;
|
||||
|
@ -391,7 +425,7 @@ async function clearCacheForMessages(idsInput) {
|
|||
}
|
||||
|
||||
try {
|
||||
const store = await storage.local.get(["endpoint", "templateName", "customTemplate", "customSystemPrompt", "aiParams", "debugLogging", "htmlToMarkdown", "stripUrlParams", "altTextImages", "collapseWhitespace", "aiRules", "theme", "errorPending"]);
|
||||
const store = await storage.local.get(["endpoint", "templateName", "customTemplate", "customSystemPrompt", "aiParams", "debugLogging", "htmlToMarkdown", "stripUrlParams", "altTextImages", "collapseWhitespace", "tokenReduction", "contextLength", "aiRules", "theme", "errorPending"]);
|
||||
logger.setDebug(store.debugLogging);
|
||||
await AiClassifier.setConfig(store);
|
||||
userTheme = store.theme || 'auto';
|
||||
|
@ -401,6 +435,8 @@ async function clearCacheForMessages(idsInput) {
|
|||
stripUrlParams = store.stripUrlParams === true;
|
||||
altTextImages = store.altTextImages === true;
|
||||
collapseWhitespace = store.collapseWhitespace === true;
|
||||
tokenReduction = store.tokenReduction === true;
|
||||
contextLength = parseInt(store.contextLength) || contextLength;
|
||||
errorPending = store.errorPending === true;
|
||||
const savedStats = await storage.local.get('classifyStats');
|
||||
if (savedStats.classifyStats && typeof savedStats.classifyStats === 'object') {
|
||||
|
@ -447,6 +483,14 @@ async function clearCacheForMessages(idsInput) {
|
|||
collapseWhitespace = changes.collapseWhitespace.newValue === true;
|
||||
logger.aiLog("collapseWhitespace updated from storage change", { debug: true }, collapseWhitespace);
|
||||
}
|
||||
if (changes.tokenReduction) {
|
||||
tokenReduction = changes.tokenReduction.newValue === true;
|
||||
logger.aiLog("tokenReduction updated from storage change", { debug: true }, tokenReduction);
|
||||
}
|
||||
if (changes.contextLength) {
|
||||
contextLength = parseInt(changes.contextLength.newValue) || contextLength;
|
||||
logger.aiLog("contextLength updated from storage change", { debug: true }, contextLength);
|
||||
}
|
||||
if (changes.errorPending) {
|
||||
errorPending = changes.errorPending.newValue === true;
|
||||
updateActionIcon();
|
||||
|
|
|
@ -144,6 +144,17 @@
|
|||
<input type="checkbox" id="collapse-whitespace"> Collapse long whitespace
|
||||
</label>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label class="checkbox">
|
||||
<input type="checkbox" id="token-reduction"> Aggressive token reduction
|
||||
</label>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label class="label" for="context-length">Context length</label>
|
||||
<div class="control">
|
||||
<input class="input" type="number" id="context-length">
|
||||
</div>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label class="label" for="max_tokens">Max tokens</label>
|
||||
<div class="control">
|
||||
|
|
|
@ -16,6 +16,8 @@ document.addEventListener('DOMContentLoaded', async () => {
|
|||
'stripUrlParams',
|
||||
'altTextImages',
|
||||
'collapseWhitespace',
|
||||
'tokenReduction',
|
||||
'contextLength',
|
||||
'aiRules',
|
||||
'aiCache',
|
||||
'theme'
|
||||
|
@ -115,6 +117,12 @@ document.addEventListener('DOMContentLoaded', async () => {
|
|||
const collapseWhitespaceToggle = document.getElementById('collapse-whitespace');
|
||||
collapseWhitespaceToggle.checked = defaults.collapseWhitespace === true;
|
||||
|
||||
const tokenReductionToggle = document.getElementById('token-reduction');
|
||||
tokenReductionToggle.checked = defaults.tokenReduction === true;
|
||||
|
||||
const contextLengthInput = document.getElementById('context-length');
|
||||
contextLengthInput.value = defaults.contextLength || 16384;
|
||||
|
||||
const aiParams = Object.assign({}, DEFAULT_AI_PARAMS, defaults.aiParams || {});
|
||||
for (const [key, val] of Object.entries(aiParams)) {
|
||||
const el = document.getElementById(key);
|
||||
|
@ -791,8 +799,10 @@ document.addEventListener('DOMContentLoaded', async () => {
|
|||
const stripUrlParams = stripUrlToggle.checked;
|
||||
const altTextImages = altTextToggle.checked;
|
||||
const collapseWhitespace = collapseWhitespaceToggle.checked;
|
||||
const tokenReduction = tokenReductionToggle.checked;
|
||||
const contextLength = parseInt(contextLengthInput.value) || 0;
|
||||
const theme = themeSelect.value;
|
||||
await storage.local.set({ endpoint, templateName, customTemplate: customTemplateText, customSystemPrompt, aiParams: aiParamsSave, debugLogging, htmlToMarkdown, stripUrlParams, altTextImages, collapseWhitespace, aiRules: rules, theme });
|
||||
await storage.local.set({ endpoint, templateName, customTemplate: customTemplateText, customSystemPrompt, aiParams: aiParamsSave, debugLogging, htmlToMarkdown, stripUrlParams, altTextImages, collapseWhitespace, tokenReduction, contextLength, aiRules: rules, theme });
|
||||
await applyTheme(theme);
|
||||
try {
|
||||
await AiClassifier.setConfig({ endpoint, templateName, customTemplate: customTemplateText, customSystemPrompt, aiParams: aiParamsSave, debugLogging });
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue