Skip to main content

mxr_export/
llm.rs

1use crate::ExportThread;
2use mxr_reader::{clean, ReaderConfig};
3use std::collections::HashSet;
4
5/// Export thread optimized for AI consumption.
6/// Uses the reader pipeline to strip noise, producing a token-efficient representation.
7pub fn export_llm_context(thread: &ExportThread, reader_config: &ReaderConfig) -> String {
8    let mut out = String::new();
9
10    let participants: Vec<&str> = thread
11        .messages
12        .iter()
13        .map(|m| m.from_email.as_str())
14        .collect::<HashSet<_>>()
15        .into_iter()
16        .collect();
17
18    out.push_str(&format!("Thread: {}\n", thread.subject));
19    out.push_str(&format!("Participants: {}\n", participants.join(", ")));
20    out.push_str(&format!("Messages: {}\n", thread.messages.len()));
21
22    for msg in &thread.messages {
23        out.push_str("\n---\n");
24
25        let date = msg.date.format("%b %d %H:%M");
26        out.push_str(&format!("[{}, {}]\n", msg.from_email, date));
27
28        // Run reader pipeline for maximum noise reduction
29        let reader_output = clean(
30            msg.body_text.as_deref(),
31            msg.body_html.as_deref(),
32            reader_config,
33        );
34        out.push_str(&reader_output.content);
35        out.push('\n');
36
37        // Attachment metadata (no binary content)
38        if !msg.attachments.is_empty() {
39            let att_summary: Vec<String> = msg
40                .attachments
41                .iter()
42                .map(|a| format!("{} ({}KB)", a.filename, a.size_bytes / 1024))
43                .collect();
44            out.push_str(&format!("\nAttachments: {}\n", att_summary.join(", ")));
45        }
46    }
47
48    out
49}
50
51#[cfg(test)]
52mod tests {
53    use super::*;
54    use crate::tests::{empty_body_thread, sample_thread, single_message_thread};
55    use crate::{ExportAttachment, ExportMessage, ExportThread};
56    use chrono::TimeZone;
57
58    fn default_config() -> ReaderConfig {
59        ReaderConfig::default()
60    }
61
62    #[test]
63    fn llm_starts_with_thread_metadata() {
64        let result = export_llm_context(&sample_thread(), &default_config());
65        assert!(result.starts_with("Thread: Deployment rollback plan\n"));
66        assert!(result.contains("Participants: "));
67        assert!(result.contains("Messages: 2\n"));
68    }
69
70    #[test]
71    fn llm_uses_compact_date_format() {
72        let result = export_llm_context(&sample_thread(), &default_config());
73        // Compact dates like "Mar 17 09:30", not full RFC2822
74        assert!(result.contains("Mar 17 09:30"));
75    }
76
77    #[test]
78    fn llm_uses_email_not_name() {
79        let result = export_llm_context(&sample_thread(), &default_config());
80        // LLM context uses email (more precise) not display name
81        assert!(result.contains("[alice@example.com,"));
82        assert!(result.contains("[bob@example.com,"));
83    }
84
85    #[test]
86    fn llm_does_not_contain_full_headers() {
87        let result = export_llm_context(&sample_thread(), &default_config());
88        // No Subject: header or Date: header — just compact metadata
89        assert!(!result.contains("Subject:"));
90        assert!(!result.contains("Date:"));
91    }
92
93    #[test]
94    fn llm_includes_cleaned_body_content() {
95        let result = export_llm_context(&sample_thread(), &default_config());
96        assert!(result.contains("rollback strategy"));
97        assert!(result.contains("blue-green deployment"));
98    }
99
100    #[test]
101    fn llm_strips_signatures_from_body() {
102        let mut thread = sample_thread();
103        thread.messages[0].body_text = Some(
104            "Important content here.\n\n-- \nAlice\nSenior Engineer\nalice@company.com".into(),
105        );
106        let result = export_llm_context(&thread, &default_config());
107        assert!(result.contains("Important content here"));
108        assert!(!result.contains("Senior Engineer"));
109    }
110
111    #[test]
112    fn llm_includes_attachment_summary() {
113        let result = export_llm_context(&sample_thread(), &default_config());
114        assert!(result.contains("Attachments: runbook.pdf (240KB)"));
115    }
116
117    #[test]
118    fn llm_handles_empty_body() {
119        let result = export_llm_context(&empty_body_thread(), &default_config());
120        // Should not crash; message delimiter still present
121        assert!(result.contains("---"));
122        assert!(result.contains("[ghost@void.com,"));
123    }
124
125    #[test]
126    fn llm_prefers_text_over_html() {
127        let result = export_llm_context(&single_message_thread(), &default_config());
128        // Has both text and html; should use text (the reader pipeline picks text first)
129        assert!(result.contains("Is this working?"));
130    }
131
132    #[test]
133    fn llm_falls_back_to_html_when_no_text() {
134        let mut thread = single_message_thread();
135        thread.messages[0].body_text = None;
136        // body_html is still "<p>Is this working?</p>"
137        let result = export_llm_context(&thread, &default_config());
138        assert!(result.contains("Is this working?"));
139    }
140
141    #[test]
142    fn llm_omits_markdown_formatting_overhead() {
143        let thread = sample_thread();
144        let config = default_config();
145        let md = crate::export_markdown(&thread);
146        let llm = export_llm_context(&thread, &config);
147        // LLM format uses compact headers (no "##", no "**Attachments:**", no footer)
148        assert!(!llm.contains("## "));
149        assert!(!llm.contains("**Attachments:**"));
150        assert!(!llm.contains("Exported from mxr"));
151        // Both contain the actual content
152        assert!(llm.contains("rollback strategy"));
153        assert!(md.contains("rollback strategy"));
154    }
155
156    #[test]
157    fn llm_separates_messages_with_delimiters() {
158        let result = export_llm_context(&sample_thread(), &default_config());
159        let delimiter_count = result.matches("\n---\n").count();
160        assert_eq!(delimiter_count, 2); // One per message
161    }
162
163    #[test]
164    fn llm_with_many_attachments_lists_all() {
165        let thread = ExportThread {
166            thread_id: "t".into(),
167            subject: "Files".into(),
168            messages: vec![ExportMessage {
169                id: "m".into(),
170                from_name: None,
171                from_email: "a@b.com".into(),
172                to: vec![],
173                date: chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
174                subject: "Files".into(),
175                body_text: Some("See attached.".into()),
176                body_html: None,
177                headers_raw: None,
178                attachments: vec![
179                    ExportAttachment {
180                        filename: "report.pdf".into(),
181                        size_bytes: 102_400,
182                        local_path: None,
183                    },
184                    ExportAttachment {
185                        filename: "data.csv".into(),
186                        size_bytes: 51_200,
187                        local_path: None,
188                    },
189                ],
190            }],
191        };
192        let result = export_llm_context(&thread, &default_config());
193        assert!(result.contains("report.pdf (100KB)"));
194        assert!(result.contains("data.csv (50KB)"));
195    }
196}