Merge pull request #42 from wagesj45/codex/navigate-ebay-email-alerts-preferences

Improve message parsing for AI classifier
This commit is contained in:
Jordan Wages 2025-06-26 17:02:27 -05:00 committed by GitHub
commit 51a9a23312
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 57 additions and 4 deletions

View file

@ -43,3 +43,18 @@ Additional documentation exists outside this repository.
- [Bulma.css](https://github.com/jgthms/bulma) - [Bulma.css](https://github.com/jgthms/bulma)
- Issue tracker: [Thunderbird tracker on Bugzilla](https://bugzilla.mozilla.org/describecomponents.cgi?product=Thunderbird) - Issue tracker: [Thunderbird tracker on Bugzilla](https://bugzilla.mozilla.org/describecomponents.cgi?product=Thunderbird)
### Message Structure Notes
Messages retrieved with `messenger.messages.getFull` are returned as
nested objects. The root contains `headers` and a `parts` array. Each part may
itself contain `parts` for multipart messages or a `body` string. Attachments are
indicated via the `content-disposition` header.
When constructing the text sent to the AI service, parse the full message
recursively. Include key headers such as `from`, `to`, `subject`, and others, and
record attachment summaries rather than raw binary data. Inline or attached
base64 data should be replaced with placeholders showing the byte size. The
final string should have the headers, a brief attachment section, then the plain
text extracted from all text parts.

View file

@ -39,6 +39,45 @@ async function sha256Hex(str) {
return Array.from(new Uint8Array(buf), b => b.toString(16).padStart(2, '0')).join(''); return Array.from(new Uint8Array(buf), b => b.toString(16).padStart(2, '0')).join('');
} }
function byteSize(str) {
return new TextEncoder().encode(str || "").length;
}
function replaceInlineBase64(text) {
return text.replace(/[A-Za-z0-9+/]{100,}={0,2}/g,
m => `[base64: ${byteSize(m)} bytes]`);
}
function collectText(part, bodyParts, attachments) {
if (part.parts && part.parts.length) {
for (const p of part.parts) collectText(p, bodyParts, attachments);
return;
}
const ct = (part.contentType || "text/plain").toLowerCase();
const cd = (part.headers?.["content-disposition"]?.[0] || "").toLowerCase();
const body = String(part.body || "");
if (cd.includes("attachment") || !ct.startsWith("text/")) {
const nameMatch = /filename\s*=\s*"?([^";]+)/i.exec(cd) || /name\s*=\s*"?([^";]+)/i.exec(part.headers?.["content-type"]?.[0] || "");
const name = nameMatch ? nameMatch[1] : "";
attachments.push(`${name} (${ct}, ${part.size || byteSize(body)} bytes)`);
} else if (ct.startsWith("text/html")) {
const doc = new DOMParser().parseFromString(body, 'text/html');
bodyParts.push(replaceInlineBase64(doc.body.textContent || ""));
} else {
bodyParts.push(replaceInlineBase64(body));
}
}
function buildEmailText(full) {
const bodyParts = [];
const attachments = [];
collectText(full, bodyParts, attachments);
const headers = Object.entries(full.headers || {})
.map(([k,v]) => `${k}: ${v.join(' ')}`)
.join('\n');
const attachInfo = `Attachments: ${attachments.length}` + (attachments.length ? "\n" + attachments.map(a => ` - ${a}`).join('\n') : "");
return `${headers}\n${attachInfo}\n\n${bodyParts.join('\n')}`.trim();
}
async function applyAiRules(idsInput) { async function applyAiRules(idsInput) {
const ids = Array.isArray(idsInput) ? idsInput : [idsInput]; const ids = Array.isArray(idsInput) ? idsInput : [idsInput];
if (!ids.length) return queue; if (!ids.length) return queue;
@ -66,10 +105,9 @@ async function applyAiRules(idsInput) {
updateActionIcon(); updateActionIcon();
try { try {
const full = await messenger.messages.getFull(id); const full = await messenger.messages.getFull(id);
const text = full?.parts?.[0]?.body || ""; const text = buildEmailText(full);
for (const rule of aiRules) { const cacheKey = await sha256Hex(`${id}|${rule.criterion}`);
const cacheKey = await sha256Hex(`${id}|${rule.criterion}`); const matched = await AiClassifier.classifyText(text, rule.criterion, cacheKey);
const matched = await AiClassifier.classifyText(text, rule.criterion, cacheKey);
if (matched) { if (matched) {
for (const act of (rule.actions || [])) { for (const act of (rule.actions || [])) {
if (act.type === 'tag' && act.tagKey) { if (act.type === 'tag' && act.tagKey) {