Skip to main content

mxr_provider_gmail/
parse.rs

1use crate::types::{GmailHeader, GmailMessage, GmailPayload};
2use base64::engine::general_purpose::URL_SAFE_NO_PAD;
3use base64::Engine;
4use chrono::{TimeZone, Utc};
5use mxr_compose::parse::{
6    body_unsubscribe_from_html, calendar_metadata_from_text, decode_format_flowed,
7    parse_address_list as parse_rfc_address_list, parse_headers_from_pairs,
8};
9use mxr_core::{
10    AccountId, Address, AttachmentId, AttachmentMeta, Envelope, MessageBody, MessageFlags,
11    MessageId, TextPlainFormat, ThreadId, UnsubscribeMethod,
12};
13use thiserror::Error;
14
15#[derive(Debug, Error)]
16pub enum ParseError {
17    #[error("Missing required header: {0}")]
18    MissingHeader(String),
19
20    #[error("Invalid date: {0}")]
21    InvalidDate(String),
22
23    #[error("Decode error: {0}")]
24    Decode(String),
25
26    #[error("Invalid headers: {0}")]
27    Headers(String),
28}
29
30pub fn gmail_message_to_envelope(
31    msg: &GmailMessage,
32    account_id: &AccountId,
33) -> Result<Envelope, ParseError> {
34    let headers = msg
35        .payload
36        .as_ref()
37        .and_then(|p| p.headers.as_ref())
38        .map(|h| h.as_slice())
39        .unwrap_or(&[]);
40
41    let internal_date = parse_internal_date(msg.internal_date.as_deref())?;
42    let header_pairs: Vec<(String, String)> = headers
43        .iter()
44        .map(|header| (header.name.clone(), header.value.clone()))
45        .collect();
46    let parsed_headers = parse_headers_from_pairs(&header_pairs, internal_date)
47        .map_err(|err| ParseError::Headers(err.to_string()))?;
48    let body_data = extract_body_data(msg);
49
50    let label_ids = msg.label_ids.as_deref().unwrap_or(&[]);
51    let flags = labels_to_flags(label_ids);
52    let has_attachments = check_has_attachments(msg.payload.as_ref());
53    let unsubscribe = match parsed_headers.unsubscribe {
54        UnsubscribeMethod::None => body_data
55            .text_html
56            .as_deref()
57            .and_then(body_unsubscribe_from_html)
58            .unwrap_or(UnsubscribeMethod::None),
59        unsubscribe => unsubscribe,
60    };
61
62    Ok(Envelope {
63        id: MessageId::from_provider_id("gmail", &msg.id),
64        account_id: account_id.clone(),
65        provider_id: msg.id.clone(),
66        thread_id: ThreadId::from_provider_id("gmail", &msg.thread_id),
67        message_id_header: parsed_headers.message_id_header,
68        in_reply_to: parsed_headers.in_reply_to,
69        references: parsed_headers.references,
70        from: parsed_headers.from.unwrap_or_else(|| Address {
71            name: None,
72            email: "unknown@unknown".to_string(),
73        }),
74        to: parsed_headers.to,
75        cc: parsed_headers.cc,
76        bcc: parsed_headers.bcc,
77        subject: parsed_headers.subject,
78        // Gmail's internalDate is the canonical received timestamp and matches
79        // Gmail mailbox ordering better than arbitrary sender-controlled Date headers.
80        date: internal_date.unwrap_or(parsed_headers.date),
81        flags,
82        snippet: msg.snippet.clone().unwrap_or_default(),
83        has_attachments,
84        size_bytes: msg.size_estimate.unwrap_or(0),
85        unsubscribe,
86        label_provider_ids: msg.label_ids.clone().unwrap_or_default(),
87    })
88}
89
90fn parse_internal_date(
91    internal_date: Option<&str>,
92) -> Result<Option<chrono::DateTime<Utc>>, ParseError> {
93    let Some(internal_date) = internal_date else {
94        return Ok(None);
95    };
96
97    let millis: i64 = internal_date
98        .parse()
99        .map_err(|_| ParseError::InvalidDate(internal_date.to_string()))?;
100    Ok(Some(
101        Utc.timestamp_millis_opt(millis)
102            .single()
103            .unwrap_or_else(Utc::now),
104    ))
105}
106
107pub fn labels_to_flags(label_ids: &[String]) -> MessageFlags {
108    let mut flags = MessageFlags::empty();
109
110    // Gmail: absence of UNREAD means the message is read
111    let has_unread = label_ids.iter().any(|l| l == "UNREAD");
112    if !has_unread {
113        flags |= MessageFlags::READ;
114    }
115
116    for label in label_ids {
117        match label.as_str() {
118            "STARRED" => flags |= MessageFlags::STARRED,
119            "DRAFT" => flags |= MessageFlags::DRAFT,
120            "SENT" => flags |= MessageFlags::SENT,
121            "TRASH" => flags |= MessageFlags::TRASH,
122            "SPAM" => flags |= MessageFlags::SPAM,
123            _ => {}
124        }
125    }
126
127    flags
128}
129
130pub fn parse_list_unsubscribe(headers: &[GmailHeader]) -> UnsubscribeMethod {
131    let header_pairs: Vec<(String, String)> = headers
132        .iter()
133        .map(|header| (header.name.clone(), header.value.clone()))
134        .collect();
135    parse_headers_from_pairs(&header_pairs, Some(Utc::now()))
136        .map(|parsed| parsed.unsubscribe)
137        .unwrap_or(UnsubscribeMethod::None)
138}
139
140pub fn parse_address(raw: &str) -> Address {
141    parse_rfc_address_list(raw)
142        .into_iter()
143        .next()
144        .unwrap_or(Address {
145            name: None,
146            email: raw.trim().to_string(),
147        })
148}
149
150pub fn parse_address_list(raw: &str) -> Vec<Address> {
151    parse_rfc_address_list(raw)
152}
153
154pub fn base64_decode_url(data: &str) -> Result<String, anyhow::Error> {
155    let bytes = URL_SAFE_NO_PAD.decode(data)?;
156    Ok(String::from_utf8(bytes)?)
157}
158
159fn check_has_attachments(payload: Option<&GmailPayload>) -> bool {
160    let payload = match payload {
161        Some(p) => p,
162        None => return false,
163    };
164
165    // If this part has a non-empty filename, it's an attachment
166    if let Some(ref filename) = payload.filename {
167        if !filename.is_empty() {
168            return true;
169        }
170    }
171
172    // If this part has an attachment_id in its body, it's an attachment
173    if let Some(ref body) = payload.body {
174        if body.attachment_id.is_some() {
175            return true;
176        }
177    }
178
179    // Recurse into child parts
180    if let Some(ref parts) = payload.parts {
181        for part in parts {
182            if check_has_attachments(Some(part)) {
183                return true;
184            }
185        }
186    }
187
188    false
189}
190
191#[derive(Debug, Default)]
192struct ExtractedBodyData {
193    text_plain: Option<String>,
194    text_html: Option<String>,
195    attachments: Vec<AttachmentMeta>,
196    calendar: Option<mxr_core::types::CalendarMetadata>,
197}
198
199/// Extract text_plain and text_html from a GmailMessage payload.
200pub fn extract_body(msg: &GmailMessage) -> (Option<String>, Option<String>, Vec<AttachmentMeta>) {
201    let body_data = extract_body_data(msg);
202    (
203        body_data.text_plain,
204        body_data.text_html,
205        body_data.attachments,
206    )
207}
208
209fn extract_body_data(msg: &GmailMessage) -> ExtractedBodyData {
210    let mut data = ExtractedBodyData::default();
211    if let Some(ref payload) = msg.payload {
212        walk_parts(payload, &msg.id, &mut data);
213    }
214    data
215}
216
217fn walk_parts(payload: &GmailPayload, provider_msg_id: &str, body_data: &mut ExtractedBodyData) {
218    let mime = payload
219        .mime_type
220        .as_deref()
221        .unwrap_or("application/octet-stream");
222
223    // Check for attachment (has filename or attachment_id)
224    let is_attachment = payload
225        .filename
226        .as_ref()
227        .map(|f| !f.is_empty())
228        .unwrap_or(false)
229        || payload
230            .body
231            .as_ref()
232            .and_then(|b| b.attachment_id.as_ref())
233            .is_some();
234
235    if is_attachment && !mime.starts_with("multipart/") {
236        let filename = payload
237            .filename
238            .clone()
239            .unwrap_or_else(|| "unnamed".to_string());
240        let size = payload.body.as_ref().and_then(|b| b.size).unwrap_or(0);
241        let provider_id = payload
242            .body
243            .as_ref()
244            .and_then(|b| b.attachment_id.clone())
245            .unwrap_or_default();
246
247        body_data.attachments.push(AttachmentMeta {
248            id: AttachmentId::from_provider_id(
249                "gmail",
250                &format!("{provider_msg_id}:{provider_id}"),
251            ),
252            message_id: MessageId::from_provider_id("gmail", provider_msg_id),
253            filename,
254            mime_type: mime.to_string(),
255            size_bytes: size,
256            local_path: None,
257            provider_id,
258        });
259        return;
260    }
261
262    // Leaf text node
263    match mime {
264        "text/plain" if body_data.text_plain.is_none() => {
265            if let Some(data) = payload.body.as_ref().and_then(|b| b.data.as_ref()) {
266                if let Ok(decoded) = base64_decode_url(data) {
267                    body_data.text_plain = Some(decoded);
268                }
269            }
270        }
271        "text/html" if body_data.text_html.is_none() => {
272            if let Some(data) = payload.body.as_ref().and_then(|b| b.data.as_ref()) {
273                if let Ok(decoded) = base64_decode_url(data) {
274                    body_data.text_html = Some(decoded);
275                }
276            }
277        }
278        "text/calendar" if body_data.calendar.is_none() => {
279            if let Some(data) = payload.body.as_ref().and_then(|b| b.data.as_ref()) {
280                if let Ok(decoded) = base64_decode_url(data) {
281                    body_data.calendar = calendar_metadata_from_text(&decoded);
282                }
283            }
284        }
285        _ => {}
286    }
287
288    // Recurse into child parts
289    if let Some(ref parts) = payload.parts {
290        for part in parts {
291            walk_parts(part, provider_msg_id, body_data);
292        }
293    }
294}
295
296pub fn extract_message_body(msg: &GmailMessage) -> MessageBody {
297    let header_pairs: Vec<(String, String)> = msg
298        .payload
299        .as_ref()
300        .and_then(|payload| payload.headers.as_ref())
301        .map(|headers| {
302            headers
303                .iter()
304                .map(|header| (header.name.clone(), header.value.clone()))
305                .collect()
306        })
307        .unwrap_or_default();
308    let parsed_headers = parse_headers_from_pairs(&header_pairs, Some(Utc::now())).ok();
309    let body_data = extract_body_data(msg);
310    let mut metadata = parsed_headers
311        .map(|parsed| parsed.metadata)
312        .unwrap_or_default();
313    metadata.calendar = body_data.calendar.clone();
314    let text_plain = match (&body_data.text_plain, &metadata.text_plain_format) {
315        (Some(text_plain), Some(TextPlainFormat::Flowed { delsp })) => {
316            Some(decode_format_flowed(text_plain, *delsp))
317        }
318        (Some(text_plain), _) => Some(text_plain.clone()),
319        (None, _) => None,
320    };
321    MessageBody {
322        message_id: MessageId::from_provider_id("gmail", &msg.id),
323        text_plain,
324        text_html: body_data.text_html,
325        attachments: body_data.attachments,
326        fetched_at: Utc::now(),
327        metadata,
328    }
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334    use crate::types::GmailBody;
335    use base64::engine::general_purpose::URL_SAFE_NO_PAD;
336    use chrono::Datelike;
337    use mail_parser::MessageParser;
338    use mxr_compose::parse::extract_raw_header_block;
339    use mxr_test_support::{fixture_stem, standards_fixture_bytes, standards_fixture_names};
340    use serde_json::json;
341
342    fn make_headers(pairs: &[(&str, &str)]) -> Vec<GmailHeader> {
343        pairs
344            .iter()
345            .map(|(n, v)| GmailHeader {
346                name: n.to_string(),
347                value: v.to_string(),
348            })
349            .collect()
350    }
351
352    fn make_test_message() -> GmailMessage {
353        GmailMessage {
354            id: "msg-001".to_string(),
355            thread_id: "thread-001".to_string(),
356            label_ids: Some(vec!["INBOX".to_string(), "UNREAD".to_string()]),
357            snippet: Some("Hello world preview".to_string()),
358            history_id: Some("12345".to_string()),
359            internal_date: Some("1700000000000".to_string()),
360            size_estimate: Some(2048),
361            payload: Some(GmailPayload {
362                mime_type: Some("text/plain".to_string()),
363                headers: Some(make_headers(&[
364                    ("From", "Alice <alice@example.com>"),
365                    ("To", "Bob <bob@example.com>"),
366                    ("Subject", "Test email"),
367                    ("Message-ID", "<test123@example.com>"),
368                    ("In-Reply-To", "<prev@example.com>"),
369                    ("References", "<first@example.com> <prev@example.com>"),
370                ])),
371                body: Some(GmailBody {
372                    attachment_id: None,
373                    size: Some(100),
374                    data: None,
375                }),
376                parts: None,
377                filename: None,
378            }),
379        }
380    }
381
382    fn gmail_message_from_fixture(name: &str) -> GmailMessage {
383        let raw = standards_fixture_bytes(name);
384        let parsed = MessageParser::default().parse(&raw).unwrap();
385        let mut headers = Vec::new();
386        let mut current_name = String::new();
387        let mut current_value = String::new();
388        for line in extract_raw_header_block(&raw).unwrap().lines() {
389            if line.starts_with(' ') || line.starts_with('\t') {
390                current_value.push(' ');
391                current_value.push_str(line.trim());
392                continue;
393            }
394
395            if !current_name.is_empty() {
396                headers.push(GmailHeader {
397                    name: current_name.clone(),
398                    value: current_value.trim().to_string(),
399                });
400            }
401
402            if let Some((name, value)) = line.split_once(':') {
403                current_name = name.to_string();
404                current_value = value.trim().to_string();
405            } else {
406                current_name.clear();
407                current_value.clear();
408            }
409        }
410        if !current_name.is_empty() {
411            headers.push(GmailHeader {
412                name: current_name,
413                value: current_value.trim().to_string(),
414            });
415        }
416        let body = parsed
417            .body_text(0)
418            .or_else(|| parsed.body_html(0))
419            .unwrap_or_default();
420
421        GmailMessage {
422            id: format!("fixture-{}", fixture_stem(name)),
423            thread_id: format!("fixture-thread-{}", fixture_stem(name)),
424            label_ids: Some(vec!["INBOX".to_string(), "UNREAD".to_string()]),
425            snippet: Some(body.lines().next().unwrap_or_default().to_string()),
426            history_id: Some("500".to_string()),
427            internal_date: Some("1710495000000".to_string()),
428            size_estimate: Some(raw.len() as u64),
429            payload: Some(GmailPayload {
430                mime_type: Some("text/plain".to_string()),
431                headers: Some(headers),
432                body: Some(GmailBody {
433                    attachment_id: None,
434                    size: Some(body.len() as u64),
435                    data: Some(URL_SAFE_NO_PAD.encode(body.as_bytes())),
436                }),
437                parts: None,
438                filename: None,
439            }),
440        }
441    }
442
443    #[test]
444    fn parse_gmail_message_to_envelope() {
445        let msg = make_test_message();
446        let account_id = AccountId::from_provider_id("gmail", "test-account");
447        let env = gmail_message_to_envelope(&msg, &account_id).unwrap();
448
449        assert_eq!(env.provider_id, "msg-001");
450        assert_eq!(env.from.email, "alice@example.com");
451        assert_eq!(env.from.name, Some("Alice".to_string()));
452        assert_eq!(env.to.len(), 1);
453        assert_eq!(env.to[0].email, "bob@example.com");
454        assert_eq!(env.subject, "Test email");
455        assert_eq!(
456            env.message_id_header,
457            Some("<test123@example.com>".to_string())
458        );
459        assert_eq!(env.in_reply_to, Some("<prev@example.com>".to_string()));
460        assert_eq!(env.references.len(), 2);
461        assert_eq!(env.snippet, "Hello world preview");
462        assert_eq!(env.size_bytes, 2048);
463        assert_eq!(
464            env.date,
465            Utc.timestamp_millis_opt(1_700_000_000_000)
466                .single()
467                .unwrap()
468        );
469        // UNREAD present → not read
470        assert!(!env.flags.contains(MessageFlags::READ));
471        // Deterministic IDs
472        assert_eq!(env.id, MessageId::from_provider_id("gmail", "msg-001"));
473        assert_eq!(
474            env.thread_id,
475            ThreadId::from_provider_id("gmail", "thread-001")
476        );
477    }
478
479    #[test]
480    fn parse_list_unsubscribe_one_click() {
481        let headers = make_headers(&[
482            (
483                "List-Unsubscribe",
484                "<https://unsub.example.com/oneclick>, <mailto:unsub@example.com>",
485            ),
486            ("List-Unsubscribe-Post", "List-Unsubscribe=One-Click"),
487        ]);
488        let result = parse_list_unsubscribe(&headers);
489        assert!(matches!(
490            result,
491            UnsubscribeMethod::OneClick { ref url } if url == "https://unsub.example.com/oneclick"
492        ));
493    }
494
495    #[test]
496    fn parse_list_unsubscribe_mailto() {
497        let headers = make_headers(&[("List-Unsubscribe", "<mailto:unsub@example.com>")]);
498        let result = parse_list_unsubscribe(&headers);
499        assert!(matches!(
500            result,
501            UnsubscribeMethod::Mailto { ref address, .. } if address == "unsub@example.com"
502        ));
503    }
504
505    #[test]
506    fn parse_list_unsubscribe_http() {
507        let headers = make_headers(&[("List-Unsubscribe", "<https://unsub.example.com/link>")]);
508        let result = parse_list_unsubscribe(&headers);
509        assert!(matches!(
510            result,
511            UnsubscribeMethod::HttpLink { ref url } if url == "https://unsub.example.com/link"
512        ));
513    }
514
515    #[test]
516    fn parse_address_name_angle() {
517        let addr = parse_address("Alice <alice@example.com>");
518        assert_eq!(addr.name, Some("Alice".to_string()));
519        assert_eq!(addr.email, "alice@example.com");
520    }
521
522    #[test]
523    fn parse_address_bare() {
524        let addr = parse_address("alice@example.com");
525        assert_eq!(addr.name, None);
526        assert_eq!(addr.email, "alice@example.com");
527    }
528
529    #[test]
530    fn labels_to_flags_all_combinations() {
531        // No UNREAD → READ
532        let flags = labels_to_flags(&["INBOX".to_string()]);
533        assert!(flags.contains(MessageFlags::READ));
534
535        // UNREAD present → not READ
536        let flags = labels_to_flags(&["UNREAD".to_string()]);
537        assert!(!flags.contains(MessageFlags::READ));
538
539        // All special labels
540        let flags = labels_to_flags(&[
541            "STARRED".to_string(),
542            "DRAFT".to_string(),
543            "SENT".to_string(),
544            "TRASH".to_string(),
545            "SPAM".to_string(),
546        ]);
547        assert!(flags.contains(MessageFlags::READ)); // no UNREAD
548        assert!(flags.contains(MessageFlags::STARRED));
549        assert!(flags.contains(MessageFlags::DRAFT));
550        assert!(flags.contains(MessageFlags::SENT));
551        assert!(flags.contains(MessageFlags::TRASH));
552        assert!(flags.contains(MessageFlags::SPAM));
553    }
554
555    #[test]
556    fn base64url_decode() {
557        // "Hello, World!" in URL-safe base64 no padding
558        let encoded = "SGVsbG8sIFdvcmxkIQ";
559        let decoded = base64_decode_url(encoded).unwrap();
560        assert_eq!(decoded, "Hello, World!");
561    }
562
563    #[test]
564    fn parse_list_unsubscribe_multi_uri_prefers_one_click() {
565        // Multiple URIs: mailto + https with one-click header
566        let headers = make_headers(&[
567            (
568                "List-Unsubscribe",
569                "<mailto:unsub@example.com>, <https://unsub.example.com/oneclick>",
570            ),
571            ("List-Unsubscribe-Post", "List-Unsubscribe=One-Click"),
572        ]);
573        let result = parse_list_unsubscribe(&headers);
574        // With one-click header, prefers the HTTPS URL for OneClick
575        assert!(matches!(
576            result,
577            UnsubscribeMethod::OneClick { ref url } if url == "https://unsub.example.com/oneclick"
578        ));
579    }
580
581    #[test]
582    fn parse_list_unsubscribe_missing() {
583        let headers = make_headers(&[("Subject", "No unsubscribe here")]);
584        let result = parse_list_unsubscribe(&headers);
585        assert!(matches!(result, UnsubscribeMethod::None));
586    }
587
588    #[test]
589    fn parse_address_quoted_name() {
590        let addr = parse_address("\"Last, First\" <first.last@example.com>");
591        assert_eq!(addr.name, Some("Last, First".to_string()));
592        assert_eq!(addr.email, "first.last@example.com");
593    }
594
595    #[test]
596    fn parse_address_empty_string() {
597        let addr = parse_address("");
598        assert!(addr.name.is_none());
599        assert!(addr.email.is_empty());
600    }
601
602    #[test]
603    fn parse_address_list_with_quoted_commas() {
604        let addrs = parse_address_list("\"Last, First\" <a@example.com>, Bob <b@example.com>");
605        assert_eq!(addrs.len(), 2);
606        assert_eq!(addrs[0].name, Some("Last, First".to_string()));
607        assert_eq!(addrs[0].email, "a@example.com");
608        assert_eq!(addrs[1].email, "b@example.com");
609    }
610
611    #[test]
612    fn parse_deeply_nested_mime() {
613        // multipart/mixed containing multipart/alternative
614        let msg = GmailMessage {
615            id: "msg-nested".to_string(),
616            thread_id: "thread-nested".to_string(),
617            label_ids: None,
618            snippet: None,
619            history_id: None,
620            internal_date: None,
621            size_estimate: None,
622            payload: Some(GmailPayload {
623                mime_type: Some("multipart/mixed".to_string()),
624                headers: None,
625                body: None,
626                parts: Some(vec![
627                    GmailPayload {
628                        mime_type: Some("multipart/alternative".to_string()),
629                        headers: None,
630                        body: None,
631                        parts: Some(vec![
632                            GmailPayload {
633                                mime_type: Some("text/plain".to_string()),
634                                headers: None,
635                                body: Some(GmailBody {
636                                    attachment_id: None,
637                                    size: Some(5),
638                                    data: Some("SGVsbG8".to_string()), // "Hello"
639                                }),
640                                parts: None,
641                                filename: None,
642                            },
643                            GmailPayload {
644                                mime_type: Some("text/html".to_string()),
645                                headers: None,
646                                body: Some(GmailBody {
647                                    attachment_id: None,
648                                    size: Some(12),
649                                    data: Some("PGI-SGVsbG88L2I-".to_string()),
650                                }),
651                                parts: None,
652                                filename: None,
653                            },
654                        ]),
655                        filename: None,
656                    },
657                    GmailPayload {
658                        mime_type: Some("application/pdf".to_string()),
659                        headers: None,
660                        body: Some(GmailBody {
661                            attachment_id: Some("att-001".to_string()),
662                            size: Some(50000),
663                            data: None,
664                        }),
665                        parts: None,
666                        filename: Some("report.pdf".to_string()),
667                    },
668                ]),
669                filename: None,
670            }),
671        };
672
673        let (text_plain, text_html, attachments) = extract_body(&msg);
674        assert_eq!(text_plain, Some("Hello".to_string()));
675        assert!(text_html.is_some());
676        assert_eq!(attachments.len(), 1);
677        assert_eq!(attachments[0].filename, "report.pdf");
678        assert_eq!(attachments[0].mime_type, "application/pdf");
679        assert_eq!(attachments[0].size_bytes, 50000);
680    }
681
682    #[test]
683    fn parse_message_with_attachments_metadata() {
684        let msg = GmailMessage {
685            id: "msg-att".to_string(),
686            thread_id: "thread-att".to_string(),
687            label_ids: Some(vec!["INBOX".to_string()]),
688            snippet: Some("See attached".to_string()),
689            history_id: None,
690            internal_date: Some("1700000000000".to_string()),
691            size_estimate: Some(100000),
692            payload: Some(GmailPayload {
693                mime_type: Some("multipart/mixed".to_string()),
694                headers: Some(make_headers(&[
695                    ("From", "alice@example.com"),
696                    ("To", "bob@example.com"),
697                    ("Subject", "Files attached"),
698                ])),
699                body: None,
700                parts: Some(vec![
701                    GmailPayload {
702                        mime_type: Some("text/plain".to_string()),
703                        headers: None,
704                        body: Some(GmailBody {
705                            attachment_id: None,
706                            size: Some(5),
707                            data: Some("SGVsbG8".to_string()),
708                        }),
709                        parts: None,
710                        filename: None,
711                    },
712                    GmailPayload {
713                        mime_type: Some("image/png".to_string()),
714                        headers: None,
715                        body: Some(GmailBody {
716                            attachment_id: Some("att-img".to_string()),
717                            size: Some(25000),
718                            data: None,
719                        }),
720                        parts: None,
721                        filename: Some("screenshot.png".to_string()),
722                    },
723                ]),
724                filename: None,
725            }),
726        };
727
728        let account_id = AccountId::from_provider_id("gmail", "test-account");
729        let env = gmail_message_to_envelope(&msg, &account_id).unwrap();
730        assert!(env.has_attachments);
731        assert_eq!(env.subject, "Files attached");
732
733        let (_, _, attachments) = extract_body(&msg);
734        assert_eq!(attachments.len(), 1);
735        assert_eq!(attachments[0].filename, "screenshot.png");
736        assert_eq!(attachments[0].mime_type, "image/png");
737    }
738
739    #[test]
740    fn gmail_envelope_prefers_internal_date_over_header_date() {
741        let mut msg = make_test_message();
742        msg.internal_date = Some("1710495000000".to_string());
743        msg.payload.as_mut().unwrap().headers = Some(make_headers(&[
744            ("From", "Alice <alice@example.com>"),
745            ("To", "Bob <bob@example.com>"),
746            ("Subject", "Timestamp sanity"),
747            ("Date", "Sun, 15 Jun 2025 09:08:00 +0000"),
748        ]));
749
750        let account_id = AccountId::from_provider_id("gmail", "test-account");
751        let env = gmail_message_to_envelope(&msg, &account_id).unwrap();
752
753        assert_eq!(
754            env.date,
755            Utc.timestamp_millis_opt(1_710_495_000_000)
756                .single()
757                .unwrap()
758        );
759    }
760
761    #[test]
762    fn gmail_envelope_falls_back_to_header_date_when_internal_date_missing() {
763        let mut msg = make_test_message();
764        msg.internal_date = None;
765        msg.payload.as_mut().unwrap().headers = Some(make_headers(&[
766            ("From", "Alice <alice@example.com>"),
767            ("To", "Bob <bob@example.com>"),
768            ("Subject", "Header date fallback"),
769            ("Date", "Sun, 15 Jun 2025 09:08:00 +0000"),
770        ]));
771
772        let account_id = AccountId::from_provider_id("gmail", "test-account");
773        let env = gmail_message_to_envelope(&msg, &account_id).unwrap();
774
775        assert_eq!(env.date.year(), 2025);
776        assert_eq!(env.date.month(), 6);
777        assert_eq!(env.date.day(), 15);
778    }
779
780    #[test]
781    fn body_extraction_multipart() {
782        let msg = GmailMessage {
783            id: "msg-mp".to_string(),
784            thread_id: "thread-mp".to_string(),
785            label_ids: None,
786            snippet: None,
787            history_id: None,
788            internal_date: None,
789            size_estimate: None,
790            payload: Some(GmailPayload {
791                mime_type: Some("multipart/alternative".to_string()),
792                headers: None,
793                body: None,
794                parts: Some(vec![
795                    GmailPayload {
796                        mime_type: Some("text/plain".to_string()),
797                        headers: None,
798                        body: Some(GmailBody {
799                            attachment_id: None,
800                            size: Some(5),
801                            // "Hello" in URL-safe base64 no padding
802                            data: Some("SGVsbG8".to_string()),
803                        }),
804                        parts: None,
805                        filename: None,
806                    },
807                    GmailPayload {
808                        mime_type: Some("text/html".to_string()),
809                        headers: None,
810                        body: Some(GmailBody {
811                            attachment_id: None,
812                            size: Some(12),
813                            // "<b>Hello</b>" in URL-safe base64 no padding
814                            data: Some("PGI-SGVsbG88L2I-".to_string()),
815                        }),
816                        parts: None,
817                        filename: None,
818                    },
819                ]),
820                filename: None,
821            }),
822        };
823
824        let (text_plain, text_html, _) = extract_body(&msg);
825        assert_eq!(text_plain, Some("Hello".to_string()));
826        assert!(text_html.is_some());
827    }
828
829    #[test]
830    fn standards_fixture_like_gmail_message_snapshot() {
831        let msg: GmailMessage = serde_json::from_value(json!({
832            "id": "fixture-1",
833            "threadId": "fixture-thread",
834            "labelIds": ["INBOX", "UNREAD"],
835            "snippet": "Fixture snippet",
836            "historyId": "500",
837            "internalDate": "1710495000000",
838            "sizeEstimate": 4096,
839            "payload": {
840                "mimeType": "multipart/mixed",
841                "headers": [
842                    {"name": "From", "value": "Alice Smith <alice@example.com>"},
843                    {"name": "To", "value": "Bob Example <bob@example.com>"},
844                    {"name": "Subject", "value": "Planning meeting"},
845                    {"name": "Date", "value": "Tue, 19 Mar 2024 14:15:00 +0000"},
846                    {"name": "Message-ID", "value": "<calendar@example.com>"},
847                    {"name": "Authentication-Results", "value": "mx.example.net; dkim=pass"},
848                    {"name": "Content-Language", "value": "en"},
849                    {"name": "List-Unsubscribe", "value": "<https://example.com/unsubscribe>"}
850                ],
851                "parts": [
852                    {
853                        "mimeType": "text/plain",
854                        "body": {"size": 33, "data": "UGxlYXNlIGpvaW4gdGhlIHBsYW5uaW5nIG1lZXRpbmcu"}
855                    },
856                    {
857                        "mimeType": "text/html",
858                        "body": {"size": 76, "data": "PHA-PlBsZWFzZSBqb2luIHRoZSA8YSBocmVmPSJodHRwczovL2V4YW1wbGUuY29tL3Vuc3Vic2NyaWJlIj5tYWlsIHByZWZlcmVuY2VzPC9hPi48L3A-"}
859                    },
860                    {
861                        "mimeType": "application/pdf",
862                        "filename": "report.pdf",
863                        "body": {"attachmentId": "att-1", "size": 5}
864                    },
865                    {
866                        "mimeType": "text/calendar",
867                        "body": {"size": 82, "data": "QkVHSU46VkNBTEVOREFSDQpNRVRIT0Q6UkVRVUVTVA0KQkVHSU46VkVWRU5UDQpTVU1NQVJZOlBsYW5uaW5nIG1lZXRpbmcNCkVORDpWRVZFTlQNCkVORDpWQ0FMRU5EQVI"}
868                    }
869                ]
870            }
871        }))
872        .unwrap();
873
874        let account_id = AccountId::from_provider_id("gmail", "test-account");
875        let envelope = gmail_message_to_envelope(&msg, &account_id).unwrap();
876        let body = extract_message_body(&msg);
877        insta::assert_yaml_snapshot!(
878            "gmail_fixture_message",
879            json!({
880                "subject": envelope.subject,
881                "unsubscribe": format!("{:?}", envelope.unsubscribe),
882                "flags": envelope.flags.bits(),
883                "attachment_filenames": body.attachments.iter().map(|attachment| attachment.filename.clone()).collect::<Vec<_>>(),
884                "calendar": body.metadata.calendar,
885                "auth_results": body.metadata.auth_results,
886                "content_language": body.metadata.content_language,
887                "plain_text": body.text_plain,
888            })
889        );
890    }
891
892    #[test]
893    fn standards_fixture_gmail_header_matrix_snapshots() {
894        let account_id = AccountId::from_provider_id("gmail", "matrix-account");
895
896        for fixture in standards_fixture_names() {
897            let msg = gmail_message_from_fixture(fixture);
898            let envelope = gmail_message_to_envelope(&msg, &account_id).unwrap();
899            let body = extract_message_body(&msg);
900
901            insta::assert_yaml_snapshot!(
902                format!("gmail_fixture__{}", fixture_stem(fixture)),
903                json!({
904                    "subject": envelope.subject,
905                    "from": envelope.from,
906                    "to": envelope.to,
907                    "cc": envelope.cc,
908                    "message_id": envelope.message_id_header,
909                    "in_reply_to": envelope.in_reply_to,
910                    "references": envelope.references,
911                    "unsubscribe": format!("{:?}", envelope.unsubscribe),
912                    "list_id": body.metadata.list_id,
913                    "auth_results": body.metadata.auth_results,
914                    "content_language": body.metadata.content_language,
915                    "text_plain_format": format!("{:?}", body.metadata.text_plain_format),
916                    "plain_excerpt": body.text_plain.as_deref().map(|text| text.lines().take(2).collect::<Vec<_>>().join("\n")),
917                })
918            );
919        }
920    }
921}