Merge pull request #42 from wagesj45/codex/navigate-ebay-email-alerts-preferences

Improve message parsing for AI classifier
2025-06-26 17:02:27 -05:00 · 2025-06-26 17:02:27 -05:00 · 51a9a23312
commit 51a9a23312
parent 93a36ea2d8 1070610174
2 changed files with 57 additions and 4 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@ -43,3 +43,18 @@ Additional documentation exists outside this repository.
  - [Bulma.css](https://github.com/jgthms/bulma)
 - Issue tracker: [Thunderbird tracker on Bugzilla](https://bugzilla.mozilla.org/describecomponents.cgi?product=Thunderbird)
 ### Message Structure Notes
 Messages retrieved with `messenger.messages.getFull` are returned as
 nested objects. The root contains `headers` and a `parts` array. Each part may
 itself contain `parts` for multipart messages or a `body` string. Attachments are
 indicated via the `content-disposition` header.
 When constructing the text sent to the AI service, parse the full message
 recursively. Include key headers such as `from`, `to`, `subject`, and others, and
 record attachment summaries rather than raw binary data. Inline or attached
 base64 data should be replaced with placeholders showing the byte size. The
 final string should have the headers, a brief attachment section, then the plain
 text extracted from all text parts.
--- a/background.js
+++ b/background.js
@ -39,6 +39,45 @@ async function sha256Hex(str) {
    return Array.from(new Uint8Array(buf), b => b.toString(16).padStart(2, '0')).join('');
 }
 function byteSize(str) {
    return new TextEncoder().encode(str || "").length;
 }
 function replaceInlineBase64(text) {
    return text.replace(/[A-Za-z0-9+/]{100,}={0,2}/g,
        m => `[base64: ${byteSize(m)} bytes]`);
 }
 function collectText(part, bodyParts, attachments) {
    if (part.parts && part.parts.length) {
        for (const p of part.parts) collectText(p, bodyParts, attachments);
        return;
    }
    const ct = (part.contentType || "text/plain").toLowerCase();
    const cd = (part.headers?.["content-disposition"]?.[0] || "").toLowerCase();
    const body = String(part.body || "");
    if (cd.includes("attachment") || !ct.startsWith("text/")) {
        const nameMatch = /filename\s*=\s*"?([^";]+)/i.exec(cd) || /name\s*=\s*"?([^";]+)/i.exec(part.headers?.["content-type"]?.[0] || "");
        const name = nameMatch ? nameMatch[1] : "";
        attachments.push(`${name} (${ct}, ${part.size || byteSize(body)} bytes)`);
    } else if (ct.startsWith("text/html")) {
        const doc = new DOMParser().parseFromString(body, 'text/html');
        bodyParts.push(replaceInlineBase64(doc.body.textContent || ""));
    } else {
        bodyParts.push(replaceInlineBase64(body));
    }
 }
 function buildEmailText(full) {
    const bodyParts = [];
    const attachments = [];
    collectText(full, bodyParts, attachments);
    const headers = Object.entries(full.headers || {})
        .map(([k,v]) => `${k}: ${v.join(' ')}`)
        .join('\n');
    const attachInfo = `Attachments: ${attachments.length}` + (attachments.length ? "\n" + attachments.map(a => ` - ${a}`).join('\n') : "");
    return `${headers}\n${attachInfo}\n\n${bodyParts.join('\n')}`.trim();
 }
 async function applyAiRules(idsInput) {
    const ids = Array.isArray(idsInput) ? idsInput : [idsInput];
    if (!ids.length) return queue;
@ -66,10 +105,9 @@ async function applyAiRules(idsInput) {
            updateActionIcon();
            try {
                const full = await messenger.messages.getFull(id);
-                const text = full?.parts?.[0]?.body || "";
+                const text = buildEmailText(full);
-                for (const rule of aiRules) {
+                const cacheKey = await sha256Hex(`${id}|${rule.criterion}`);
-                    const cacheKey = await sha256Hex(`${id}|${rule.criterion}`);
+                const matched = await AiClassifier.classifyText(text, rule.criterion, cacheKey);
                    const matched = await AiClassifier.classifyText(text, rule.criterion, cacheKey);
                    if (matched) {
                        for (const act of (rule.actions || [])) {
                            if (act.type === 'tag' && act.tagKey) {