Skip to main content

eml_codec/message/
mod.rs

1/// Representation of all headers in a toplevel message
2pub mod field;
3
4#[cfg(feature = "arbitrary")]
5use arbitrary::Arbitrary;
6use bounded_static::ToStatic;
7
8use crate::header;
9use crate::i18n::ContainsUtf8;
10use crate::imf;
11use crate::message::field::{MessageEntry, MessageField, NaiveMessageFields};
12use crate::mime;
13use crate::part;
14use crate::print::{print_seq, Formatter, Print};
15use crate::raw_input::RawInput;
16#[cfg(feature = "arbitrary")]
17use crate::{
18    arbitrary_utils::{arbitrary_shuffle, arbitrary_vec_where},
19    fuzz_eq::FuzzEq,
20    imf::Imf,
21    part::MimeBody,
22};
23
24/// A complete **toplevel message**.
25/// This represent a complete "email" that can be send and received over the wire, for example.
26#[derive(Clone, Debug, PartialEq, ToStatic)]
27#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
28pub struct Message<'a> {
29    // Invariant: `all_fields` must contain an entry for every piece of information
30    // contained in `imf` and `mime_body`'s mime headers that is mandatory or is
31    // not the default value..
32    // Invariant: IMF trace fields must occur before any other IMF or MIME fields.
33    // Invariant: the indices of Trace, Comments and Keywords entries occur in-order
34    // (0, 1, ...). In other words, it is the respective Vec in `imf` that contain
35    // the referenced data that define the order).
36    pub imf: imf::Imf<'a>,
37    pub mime_body: part::MimeBody<'a>,
38    pub entries: Vec<MessageEntry<'a>>,
39    pub raw: RawInput<'a>,
40    pub raw_headers: RawInput<'a>,
41}
42
43impl<'a> Message<'a> {
44    pub fn contains_utf8_headers(&self) -> bool {
45        self.entries
46            .iter()
47            .find(|f| match f {
48                field::MessageEntry::Unstructured(u) => u.contains_utf8(),
49                _ => false,
50            })
51            .is_some()
52            || self.imf.contains_utf8()
53            || self.mime_body.mime().contains_utf8()
54    }
55
56    // TODO: return an iterator instead of a Vec?
57    pub fn field_list(&self) -> Vec<MessageField<'a>> {
58        let mime = self.mime_body.mime();
59        let mut v = vec![];
60        for e in &self.entries {
61            // SAFETY: `self.entries` must only contain entries that actually
62            // appear in self.imf/self.mime_body.mime()
63            let field = match e {
64                MessageEntry::MIME { e, raw_body } => MessageField::MIME {
65                    f: mime.get_field(*e).unwrap(),
66                    raw_body: raw_body.clone(),
67                },
68                MessageEntry::Imf { e, raw_body } => MessageField::Imf {
69                    f: self.imf.get_field(*e).unwrap(),
70                    raw_body: raw_body.clone(),
71                },
72                MessageEntry::Unstructured(u) => MessageField::Unstructured(u.clone()),
73            };
74            v.push(field);
75        }
76        v
77    }
78}
79
80impl<'a> Print for Message<'a> {
81    fn print(&self, fmt: &mut impl Formatter) {
82        fmt.begin_line_folding();
83        print_seq(fmt, &self.field_list(), |_| ());
84        if self.imf.mime_version.is_none() {
85            // The RFC requires that an implementation that obeys the MIME RFC
86            // always outputs a MIME-Version header. We do this at printing time
87            // to avoid having to insert a synthetic header in the AST that does
88            // not exist in the input.
89            imf::field::Field::MIMEVersion(imf::mime::Version::default()).print(fmt);
90        }
91        fmt.end_line_folding();
92        fmt.write_crlf();
93        self.mime_body.print_body(fmt);
94    }
95}
96
97#[cfg(feature = "arbitrary")]
98impl<'a> Arbitrary<'a> for Message<'a> {
99    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
100        let mut imf: Imf = u.arbitrary()?;
101        // hack: because the printer (see above) prints a MIME-Version header if
102        // it is missing, if we start with an AST without such a header, print
103        // it and parse it, we will get a different AST, which breaks the
104        // roundtrip property that the fuzzer checks. As a workaround we thus
105        // avoid generating such ASTs...
106        if imf.mime_version.is_none() {
107            imf.mime_version = Some(imf::mime::Version::default());
108        }
109        let (trace_entries, imf_entries) = imf.field_entries();
110        let mime_body: MimeBody = u.arbitrary()?;
111
112        fn arbitrary_unstructured<'a>(
113            u: &mut arbitrary::Unstructured<'a>,
114        ) -> arbitrary::Result<Vec<header::Unstructured<'a>>> {
115            arbitrary_vec_where(u, |f: &header::Unstructured| {
116                !imf::field::is_imf_header(&f.name) && !mime::field::is_mime_header(&f.name)
117            })
118        }
119
120        // compute the trace section (which includes unstructured headers)
121        let mut entries: Vec<_> = trace_entries
122            .into_iter()
123            .map(|e| MessageEntry::Imf {
124                e,
125                raw_body: RawInput::none(),
126            })
127            .collect();
128        entries.extend(
129            arbitrary_unstructured(u)?
130                .into_iter()
131                .map(MessageEntry::Unstructured),
132        );
133        arbitrary_shuffle(u, &mut entries)?;
134        // Renumber Trace entries so that their index is in order.
135        {
136            let mut id = 0;
137            for ent in entries.iter_mut() {
138                if let MessageEntry::Imf {
139                    e: e @ imf::field::Entry::Trace(_),
140                    ..
141                } = ent
142                {
143                    *e = imf::field::Entry::Trace(id);
144                    id += 1
145                }
146            }
147        }
148
149        // compute the rest
150        let mut rest: Vec<MessageEntry> = mime_body
151            .mime()
152            .field_entries()
153            .into_iter()
154            .map(|e| MessageEntry::MIME {
155                e,
156                raw_body: RawInput::none(),
157            })
158            .collect();
159        rest.extend(imf_entries.into_iter().map(|e| MessageEntry::Imf {
160            e,
161            raw_body: RawInput::none(),
162        }));
163        rest.extend(
164            arbitrary_unstructured(u)?
165                .into_iter()
166                .map(MessageEntry::Unstructured),
167        );
168        arbitrary_shuffle(u, &mut rest)?;
169        // Renumber `Comments` and `Keywords` entries.
170        {
171            let mut comments_id = 0;
172            let mut keywords_id = 0;
173            for ent in rest.iter_mut() {
174                if let MessageEntry::Imf {
175                    e: e @ imf::field::Entry::Comments(_),
176                    ..
177                } = ent
178                {
179                    *e = imf::field::Entry::Comments(comments_id);
180                    comments_id += 1
181                } else if let MessageEntry::Imf {
182                    e: e @ imf::field::Entry::Keywords(_),
183                    ..
184                } = ent
185                {
186                    *e = imf::field::Entry::Keywords(keywords_id);
187                    keywords_id += 1
188                }
189            }
190        }
191
192        // concatenate both sections
193        entries.extend(rest);
194
195        Ok(Message {
196            imf,
197            mime_body,
198            entries,
199            raw: RawInput::none(),
200            raw_headers: RawInput::none(),
201        })
202    }
203}
204
205/// Parse a toplevel message.
206pub fn message<'a>(input: &'a [u8]) -> Message<'a> {
207    // parse headers
208    let (input_body, headers) = header::header_kv(input);
209    let fields: NaiveMessageFields = headers.into_iter().collect();
210    let mime = fields.mime.to_interpreted(mime::DefaultType::Generic);
211    // parse body
212    let mime_body = part::part_body(mime)(input_body);
213    Message {
214        imf: fields.imf,
215        mime_body,
216        entries: fields.entries,
217        raw: input.into(),
218        raw_headers: input[0..input.len() - input_body.len()].into(),
219    }
220}
221
222pub fn imf<'a>(input: &'a [u8]) -> (&'a [u8], imf::Imf<'a>) {
223    // parse headers
224    let (input_body, headers) = header::header_kv(input);
225    let fields: NaiveMessageFields = headers.into_iter().collect();
226    (input_body, fields.imf)
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232    use crate::imf::address::*;
233    use crate::imf::datetime::DateTime;
234    use crate::imf::mailbox::*;
235    use crate::imf::{From, Imf};
236    use crate::mime::{CommonMIME, MIME};
237    use crate::part::composite::Multipart;
238    use crate::part::discrete::Text;
239    use crate::part::field::EntityEntry;
240    use crate::part::{AnyPart, MimeBody};
241    use crate::print::tests::print_to_vec;
242    use crate::text::charset::EmailCharset;
243    use crate::text::encoding::{
244        Base64Word, EncodedWord, EncodedWordToken, QuotedChunk, QuotedWord,
245    };
246    use crate::text::misc_token::*;
247    use crate::text::words::Atom;
248    use chrono::{FixedOffset, TimeZone};
249    use pretty_assertions::assert_eq;
250
251    fn test_message_roundtrip<'a>(txt: &[u8], parsed: Message<'a>) {
252        assert_eq!(message(txt), parsed.clone());
253        let printed = print_to_vec(parsed);
254        assert_eq!(
255            String::from_utf8_lossy(&printed),
256            String::from_utf8_lossy(txt)
257        )
258    }
259
260    fn test_message_parse_print<'a>(txt: &[u8], parsed: Message<'a>, printed: &[u8]) {
261        assert_eq!(message(txt), parsed.clone());
262        let reprinted = print_to_vec(parsed);
263        assert_eq!(
264            String::from_utf8_lossy(&reprinted),
265            String::from_utf8_lossy(printed)
266        )
267    }
268
269    fn test_message_reprint(txt: &[u8], printed: &[u8]) {
270        let parsed = message(txt);
271        let reprinted = print_to_vec(parsed);
272        assert_eq!(
273            String::from_utf8_lossy(&reprinted),
274            String::from_utf8_lossy(printed)
275        )
276    }
277
278    #[test]
279    fn test_simple() {
280        let fullmail = b"Date: Tue, 7 Mar 2023 08:00:00 +0200\r
281From: someone@example.com\r
282To: someone_else@example.com\r
283Subject: An  RFC 822  formatted message\r
284MIME-Version: 1.0\r
285\r
286This is the plain text body of the message. Note the blank line
287between the header information and the body of the message.";
288
289        test_message_roundtrip(fullmail, {
290            let from = MailboxRef {
291                name: None,
292                addrspec: AddrSpec {
293                    local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
294                        "someone"[..].into(),
295                    )))]),
296                    domain: Domain::Atoms(vec![Atom("example"[..].into()), Atom("com"[..].into())]),
297                },
298            };
299            let mut imf = Imf::new();
300            imf.from = From::Single { from, sender: None };
301            imf.date = imf::DateTimeOpt::Some(DateTime(
302                FixedOffset::east_opt(2 * 3600)
303                    .unwrap()
304                    .with_ymd_and_hms(2023, 3, 7, 8, 0, 0)
305                    .unwrap(),
306            ));
307            imf.to = vec![AddressRef::Single(MailboxRef {
308                name: None,
309                addrspec: AddrSpec {
310                    local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
311                        "someone_else"[..].into(),
312                    )))]),
313                    domain: Domain::Atoms(vec![Atom("example"[..].into()), Atom("com"[..].into())]),
314                },
315            })];
316            imf.subject = Some(Unstructured(vec![
317                UnstrToken::from_plain(" ", UnstrTxtKind::Fws),
318                UnstrToken::from_plain("An", UnstrTxtKind::Txt),
319                UnstrToken::from_plain("  ", UnstrTxtKind::Fws),
320                UnstrToken::from_plain("RFC", UnstrTxtKind::Txt),
321                UnstrToken::from_plain(" ", UnstrTxtKind::Fws),
322                UnstrToken::from_plain("822", UnstrTxtKind::Txt),
323                UnstrToken::from_plain("  ", UnstrTxtKind::Fws),
324                UnstrToken::from_plain("formatted", UnstrTxtKind::Txt),
325                UnstrToken::from_plain(" ", UnstrTxtKind::Fws),
326                UnstrToken::from_plain("message", UnstrTxtKind::Txt),
327            ]));
328            imf.mime_version = Some(imf::mime::Version::default());
329
330            let mime_body = part::MimeBody::Txt(
331                    part::discrete::Text {
332                        mime: MIME {
333                            ctype: mime::r#type::Text::default(),
334                            fields: CommonMIME::default(),
335                        },
336                        body: b"This is the plain text body of the message. Note the blank line\nbetween the header information and the body of the message."[..].into(),
337                        raw_body: RawInput::between(fullmail, b"This is the", b"and the body of the message."),
338                    }
339                );
340
341            let entries = vec![
342                MessageEntry::Imf {
343                    e: imf::field::Entry::Date,
344                    raw_body: RawInput::between_excl(fullmail, b"Date:", b"\r\nFrom:"),
345                },
346                MessageEntry::Imf {
347                    e: imf::field::Entry::From,
348                    raw_body: RawInput::between_excl(fullmail, b"From:", b"\r\nTo:"),
349                },
350                MessageEntry::Imf {
351                    e: imf::field::Entry::To,
352                    raw_body: RawInput::between_excl(fullmail, b"To:", b"\r\nSubject:"),
353                },
354                MessageEntry::Imf {
355                    e: imf::field::Entry::Subject,
356                    raw_body: RawInput::between_excl(fullmail, b"Subject:", b"\r\nMIME-Version:"),
357                },
358                MessageEntry::Imf {
359                    e: imf::field::Entry::MIMEVersion,
360                    raw_body: b" 1.0".into(),
361                },
362            ];
363
364            Message {
365                imf,
366                mime_body,
367                entries,
368                raw: fullmail.into(),
369                raw_headers: RawInput::between(fullmail, b"Date", b"MIME-Version: 1.0\r\n\r\n"),
370            }
371        });
372    }
373
374    #[test]
375    fn test_message() {
376        let fullmail: &[u8] = r#"Date: Sat, 8 Jul 2023 07:14:29 +0200
377From: Grrrnd Zero <grrrndzero@example.org>
378To: John Doe <jdoe@machine.example>
379CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
380Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
381    =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
382X-Unknown: something something
383Bad entry
384  on multiple lines
385Message-Id: <NTAxNzA2AC47634Y366BAMTY4ODc5MzQyODY0ODY5@www.grrrndzero.org>
386MIME-Version: 1.0
387Subject: Bad_redundant_subject
388Content-Type: multipart/alternative;
389 boundary="b1_e376dc71bafc953c0b0fdeb9983a9956"
390Content-Transfer-Encoding: 7bit
391Content-Transfer-Encoding: bad_redundant
392
393This is a multi-part message in MIME format.
394
395--b1_e376dc71bafc953c0b0fdeb9983a9956
396Content-Type: text/plain; charset=utf-8
397Content-Transfer-Encoding: quoted-printable
398
399GZ
400OoOoO
401oOoOoOoOo
402oOoOoOoOoOoOoOoOo
403oOoOoOoOoOoOoOoOoOoOoOo
404oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
405OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
406
407--b1_e376dc71bafc953c0b0fdeb9983a9956
408X-Custom: foobar
409Content-Type: text/html; charset=us-ascii
410
411<div style="text-align: center;"><strong>GZ</strong><br />
412OoOoO<br />
413oOoOoOoOo<br />
414oOoOoOoOoOoOoOoOo<br />
415oOoOoOoOoOoOoOoOoOoOoOo<br />
416oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
417OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
418</div>
419
420--b1_e376dc71bafc953c0b0fdeb9983a9956--
421"#
422        .as_bytes();
423
424        let preamble = b"This is a multi-part message in MIME format.
425";
426
427        let ast =
428            Message {
429                    imf: {
430                        let from = imf::mailbox::MailboxRef {
431                                name: Some(Phrase(vec![
432                                    PhraseToken::Word(Word::Atom(Atom("Grrrnd"[..].into()))),
433                                    PhraseToken::Word(Word::Atom(Atom("Zero"[..].into()))),
434                                ])),
435                                addrspec: imf::mailbox::AddrSpec {
436                                    local_part: imf::mailbox::LocalPart(vec![
437                                        imf::mailbox::LocalPartToken::Word(Word::Atom(Atom("grrrndzero"[..].into())))
438                                    ]),
439                                    domain: imf::mailbox::Domain::Atoms(vec![
440                                        Atom("example"[..].into()),
441                                        Atom("org"[..].into()),
442                                    ]),
443                                }
444                            };
445                        let date = imf::datetime::DateTime(FixedOffset::east_opt(2 * 3600)
446                            .unwrap()
447                            .with_ymd_and_hms(2023, 07, 8, 7, 14, 29)
448                            .unwrap());
449
450                        let mut imf = imf::Imf::new();
451                        imf.from = imf::From::Single { from, sender: None };
452                        imf.date = imf::DateTimeOpt::Some(date);
453                        imf.to = vec![imf::address::AddressRef::Single(imf::mailbox::MailboxRef {
454                                name: Some(Phrase(vec![
455                                    PhraseToken::Word(Word::Atom(Atom("John"[..].into()))),
456                                    PhraseToken::Word(Word::Atom(Atom("Doe"[..].into()))),
457                                ])),
458                                addrspec: imf::mailbox::AddrSpec {
459                                    local_part: imf::mailbox::LocalPart(vec![
460                                        imf::mailbox::LocalPartToken::Word(Word::Atom(Atom("jdoe"[..].into())))
461                                    ]),
462                                    domain: imf::mailbox::Domain::Atoms(vec![
463                                        Atom("machine"[..].into()),
464                                        Atom("example"[..].into()),
465                                    ]),
466                                }
467                         })];
468
469                        imf.cc = vec![imf::address::AddressRef::Single(imf::mailbox::MailboxRef {
470                            name: Some(Phrase(vec![
471                                PhraseToken::Encoded(EncodedWord(vec![
472                                    EncodedWordToken::Quoted(QuotedWord {
473                                        enc: EmailCharset::from(b"iso-8859-1"),
474                                        chunks: vec![
475                                            QuotedChunk::Safe(b"Andr"[..].into()),
476                                            QuotedChunk::Encoded(vec![0xE9]),
477                                        ],
478                                    })
479                                ])),
480                                PhraseToken::Word(Word::Atom(Atom("Pirard"[..].into()))),
481                            ])),
482                            addrspec: imf::mailbox::AddrSpec {
483                                local_part: imf::mailbox::LocalPart(vec![
484                                    imf::mailbox::LocalPartToken::Word(Word::Atom(Atom("PIRARD"[..].into())))
485                                ]),
486                                domain: imf::mailbox::Domain::Atoms(vec![
487                                    Atom("vm1"[..].into()),
488                                    Atom("ulg"[..].into()),
489                                    Atom("ac"[..].into()),
490                                    Atom("be"[..].into()),
491                                ]),
492                            }
493                        })];
494
495                        imf.subject = Some(Unstructured(vec![
496                            UnstrToken::from_plain(" ", UnstrTxtKind::Fws),
497                            UnstrToken::Encoded(EncodedWord(vec![
498                                EncodedWordToken::Base64(Base64Word{
499                                    enc: EmailCharset::from(b"iso-8859-1"),
500                                    content: b"SWYgeW91IGNhbiByZWFkIHRoaXMgeW8"[..].into(),
501                                }),
502                                EncodedWordToken::Base64(Base64Word{
503                                    enc: EmailCharset::from(b"iso-8859-2"),
504                                    content: b"dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg"[..].into(),
505                                })
506                            ])),
507                        ]));
508
509                        imf.msg_id = Some(imf::identification::MessageID::ObsLeftRight {
510                            left: LocalPart(vec![
511                                LocalPartToken::Word(Word::Atom(Atom("NTAxNzA2AC47634Y366BAMTY4ODc5MzQyODY0ODY5".into()))), // spellchecker:disable-line
512                            ]),
513                            right: Domain::Atoms(vec![
514                                Atom("www".into()),
515                                Atom("grrrndzero".into()),
516                                Atom("org".into()),
517                            ]),
518                        });
519
520                        imf.mime_version = Some(imf::mime::Version::default());
521
522                        imf
523                    },
524                    entries: vec![
525                        MessageEntry::Imf {
526                            e: imf::field::Entry::Date,
527                            raw_body: RawInput::between_excl(fullmail, b"Date:", b"\nFrom:"),
528                        },
529                        MessageEntry::Imf {
530                            e: imf::field::Entry::From,
531                            raw_body: RawInput::between_excl(fullmail, b"From:", b"\nTo:"),
532                        },
533                        MessageEntry::Imf {
534                            e: imf::field::Entry::To,
535                            raw_body: RawInput::between_excl(fullmail, b"To:", b"\nCC:"),
536                        },
537                        MessageEntry::Imf {
538                            e: imf::field::Entry::Cc,
539                            raw_body: RawInput::between_excl(fullmail, b"CC:", b"\nSubject: =?"),
540                        },
541                        MessageEntry::Imf {
542                            e: imf::field::Entry::Subject,
543                            raw_body: RawInput::between_excl(fullmail, b".be>\nSubject:", b"\nX-Unknown:"),
544                        },
545                        MessageEntry::Unstructured(
546                            header::Unstructured {
547                                name: header::FieldName(b"X-Unknown"[..].into()),
548                                body: Unstructured(vec![
549                                    UnstrToken::from_plain(" ", UnstrTxtKind::Fws),
550                                    UnstrToken::from_plain("something", UnstrTxtKind::Txt),
551                                    UnstrToken::from_plain(" ", UnstrTxtKind::Fws),
552                                    UnstrToken::from_plain("something", UnstrTxtKind::Txt),
553                                ]),
554                                raw_body: RawInput::between_excl(fullmail, b"X-Unknown:", b"\nBad entry"),
555                            }
556                        ),
557                        MessageEntry::Imf {
558                            e: imf::field::Entry::MessageID,
559                            raw_body: RawInput::between_excl(fullmail, b"Message-Id:", b"\nMIME-Version:"),
560                        },
561                        MessageEntry::Imf {
562                            e: imf::field::Entry::MIMEVersion,
563                            raw_body: RawInput::between_excl(fullmail, b"MIME-Version:", b"\nSubject: Bad"),
564                        },
565                        MessageEntry::MIME {
566                            e: mime::field::Entry::Type,
567                            raw_body: b" multipart/alternative;\n boundary=\"b1_e376dc71bafc953c0b0fdeb9983a9956\"".into(),
568                        },
569                        MessageEntry::MIME {
570                            e: mime::field::Entry::TransferEncoding,
571                            raw_body: b" 7bit".into()
572                        },
573                    ],
574                    mime_body: MimeBody::Mult(Multipart {
575                        mime: mime::MIME {
576                            ctype: mime::r#type::Multipart {
577                                subtype: mime::r#type::MultipartSubtype::Alternative,
578                                boundary: Some("b1_e376dc71bafc953c0b0fdeb9983a9956".to_string()),
579                                other_params: vec![],
580                            },
581                            fields: mime::CommonMIME {
582                                transfer_encoding: mime::mechanism::Mechanism::_7Bit,
583                                ..mime::CommonMIME::default()
584                            },
585                        },
586                        preamble: preamble.into(),
587                        epilogue: vec![].into(),
588                        children: vec![
589                            AnyPart {
590                                entries: vec![
591                                    EntityEntry::MIME {
592                                        e: mime::field::Entry::Type,
593                                        raw_body: b" text/plain; charset=utf-8".into(),
594                                    },
595                                    EntityEntry::MIME {
596                                        e: mime::field::Entry::TransferEncoding,
597                                        raw_body: b" quoted-printable".into(),
598                                    }
599                                ],
600                                mime_body: MimeBody::Txt(Text {
601                                    mime: mime::MIME {
602                                        ctype: mime::r#type::Text {
603                                            subtype: mime::r#type::TextSubtype::Plain,
604                                            charset: EmailCharset::utf8(),
605                                            other_params: vec![],
606                                        },
607                                        fields: mime::CommonMIME {
608                                            transfer_encoding: mime::mechanism::Mechanism::QuotedPrintable,
609                                            ..mime::CommonMIME::default()
610                                        }
611                                    },
612                                    body: b"GZ\nOoOoO\noOoOoOoOo\noOoOoOoOoOoOoOoOo\noOoOoOoOoOoOoOoOoOoOoOo\noOoOoOoOoOoOoOoOoOoOoOoOoOoOo\nOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO\n"[..].into(),
613                                    raw_body: RawInput::between(fullmail, b"GZ\nOoOoO", b"OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO\n"),
614                                }),
615                                raw: RawInput::between(fullmail, b"Content-Type: text/plain", b"OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO\n"),
616                                raw_headers: b"Content-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: quoted-printable\n\n".into(),
617                            },
618                            AnyPart {
619                                entries: vec![
620                                    EntityEntry::Unstructured(header::Unstructured {
621                                        name: header::FieldName(b"X-Custom".into()),
622                                        body: Unstructured(vec![
623                                            UnstrToken::from_plain(" ", UnstrTxtKind::Fws),
624                                            UnstrToken::from_plain("foobar", UnstrTxtKind::Txt),
625                                        ]),
626                                        raw_body: b" foobar".into(),
627                                    }),
628                                    EntityEntry::MIME {
629                                        e: mime::field::Entry::Type,
630                                        raw_body: b" text/html; charset=us-ascii".into(),
631                                    },
632                                ],
633                                mime_body: MimeBody::Txt(Text {
634                                    mime: mime::MIME {
635                                        ctype: mime::r#type::Text {
636                                            subtype: mime::r#type::TextSubtype::Html,
637                                            charset: EmailCharset::US_ASCII,
638                                            other_params: vec![],
639                                        },
640
641                                        fields: mime::CommonMIME::default(),
642                                    },
643                                    body: br#"<div style="text-align: center;"><strong>GZ</strong><br />
644OoOoO<br />
645oOoOoOoOo<br />
646oOoOoOoOoOoOoOoOo<br />
647oOoOoOoOoOoOoOoOoOoOoOo<br />
648oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
649OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
650</div>
651"#[..].into(),
652                                    raw_body: RawInput::between(fullmail, b"<div style", b"</div>\n"),
653                                }),
654                                raw: RawInput::between(fullmail, b"X-Custom", b"</div>\n"),
655                                raw_headers: b"X-Custom: foobar\nContent-Type: text/html; charset=us-ascii\n\n".into(),
656                            },
657                        ],
658                        raw_body: RawInput::between(fullmail, b"This is a multi-part", b"b1_e376dc71bafc953c0b0fdeb9983a9956--\n"),
659                    }),
660                raw: fullmail.into(),
661                raw_headers: RawInput::between(fullmail, b"Date:", b"bad_redundant\n\n"),
662            };
663
664        let reprinted: &[u8] = "Date: Sat, 8 Jul 2023 07:14:29 +0200\r
665From: Grrrnd Zero <grrrndzero@example.org>\r
666To: John Doe <jdoe@machine.example>\r
667Cc: =?windows-1252?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>\r
668Subject: =?windows-1252?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8?=\r
669 =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg?=\r
670X-Unknown: something something\r
671Message-ID: <NTAxNzA2AC47634Y366BAMTY4ODc5MzQyODY0ODY5@www.grrrndzero.org>\r
672MIME-Version: 1.0\r
673Content-Type: multipart/alternative;\r
674 boundary=\"V1Qy0rpB5tWE76WF3UelfGW5K9LZpjHjZ3PKE26vpVNnvofq7BLuYTWxzQB3HrYu7\"\r
675Content-Transfer-Encoding: 7bit\r
676\r
677--V1Qy0rpB5tWE76WF3UelfGW5K9LZpjHjZ3PKE26vpVNnvofq7BLuYTWxzQB3HrYu7\r
678Content-Type: text/plain; charset=UTF-8\r
679Content-Transfer-Encoding: quoted-printable\r
680\r
681GZ
682OoOoO
683oOoOoOoOo
684oOoOoOoOoOoOoOoOo
685oOoOoOoOoOoOoOoOoOoOoOo
686oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
687OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
688\r
689--V1Qy0rpB5tWE76WF3UelfGW5K9LZpjHjZ3PKE26vpVNnvofq7BLuYTWxzQB3HrYu7\r
690X-Custom: foobar\r
691Content-Type: text/html; charset=us-ascii\r
692\r
693<div style=\"text-align: center;\"><strong>GZ</strong><br />
694OoOoO<br />
695oOoOoOoOo<br />
696oOoOoOoOoOoOoOoOo<br />
697oOoOoOoOoOoOoOoOoOoOoOo<br />
698oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
699OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
700</div>
701\r
702--V1Qy0rpB5tWE76WF3UelfGW5K9LZpjHjZ3PKE26vpVNnvofq7BLuYTWxzQB3HrYu7--\r
703"
704        .as_bytes();
705
706        test_message_parse_print(fullmail, ast, reprinted);
707    }
708
709    #[test]
710    fn test_best_effort() {
711        let input = b"date: uhh
712hello: yolo
713
714hello??";
715        test_message_parse_print(
716            input,
717            {
718                let imf = Imf::new();
719
720                let mime_body = part::MimeBody::Txt(part::discrete::Text {
721                    mime: MIME {
722                        ctype: mime::r#type::Text::default(),
723                        fields: CommonMIME::default(),
724                    },
725                    body: b"hello??".into(),
726                    raw_body: b"hello??".into(),
727                });
728
729                let entries = vec![MessageEntry::Unstructured(header::Unstructured {
730                    name: header::FieldName(b"hello".into()),
731                    body: Unstructured(vec![
732                        UnstrToken::from_plain(" ", UnstrTxtKind::Fws),
733                        UnstrToken::from_plain("yolo", UnstrTxtKind::Txt),
734                    ]),
735                    raw_body: b" yolo".into(),
736                })];
737
738                Message {
739                    imf,
740                    mime_body,
741                    entries,
742                    raw: input.into(),
743                    raw_headers: b"date: uhh\nhello: yolo\n\n".into(),
744                }
745            },
746            b"hello: yolo\r
747MIME-Version: 1.0\r
748\r
749hello??",
750        );
751    }
752
753    #[test]
754    fn test_trace_unstructured() {
755        test_message_reprint(
756            b"X-Mozilla-Status: 0001
757X-Mozilla-Status2: 00000000
758Return-Path: <hello@sympa.lmf.cnrs.fr>
759Received: from mx.lmf.cnrs.fr ([127.0.0.1])
760        by mx.lmf.cnrs.fr with LMTP
761        id oFAUKCuwpWmTPRAAFSOJEQ
762        (envelope-from <infos-gs-owner@sympa.lmf.cnrs.fr>); Mon, 02 Mar 2026 15:43:39 +0000
763X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on mx.lmf.cnrs.fr
764Received-SPF: Pass (mailfrom) identity=mailfrom; client-ip=10.0.0.2; helo=sympa.lmf.cnrs.fr; envelope-from=hello@sympa.lmf.cnrs.fr; receiver=<UNKNOWN>
765Received: from sympa.lmf.cnrs.fr (sympa.lmf.cnrs.fr [10.0.0.2])
766        (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
767         key-exchange X25519 server-signature RSA-PSS (2048 bits))
768        (No client certificate requested)
769        by mx.lmf.cnrs.fr (Postfix) with ESMTPS id DC88D214EA;
770        Mon,  2 Mar 2026 15:43:37 +0000 (UTC)
771Received: by sympa.lmf.cnrs.fr (Postfix, from userid 106)
772        id ACE8B4A03ED; Mon,  2 Mar 2026 16:43:37 +0100 (CET)
773",
774            b"X-Mozilla-Status: 0001\r
775X-Mozilla-Status2: 00000000\r
776Return-Path: <hello@sympa.lmf.cnrs.fr>\r
777Received: from mx.lmf.cnrs.fr ([127.0.0.1])        by mx.lmf.cnrs.fr with LMTP\r
778        id oFAUKCuwpWmTPRAAFSOJEQ        (envelope-from\r
779 <infos-gs-owner@sympa.lmf.cnrs.fr>); Mon, 02 Mar 2026 15:43:39 +0000\r
780X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on mx.lmf.cnrs.fr\r
781Received-SPF: Pass (mailfrom) identity=mailfrom; client-ip=10.0.0.2;\r
782 helo=sympa.lmf.cnrs.fr; envelope-from=hello@sympa.lmf.cnrs.fr;\r
783 receiver=<UNKNOWN>\r
784Received: from sympa.lmf.cnrs.fr (sympa.lmf.cnrs.fr [10.0.0.2])        (using\r
785 TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)        \r
786 key-exchange X25519 server-signature RSA-PSS (2048 bits))        (No client\r
787 certificate requested)        by mx.lmf.cnrs.fr (Postfix) with ESMTPS id\r
788 DC88D214EA;        Mon,  2 Mar 2026 15:43:37 +0000 (UTC)\r
789Received: by sympa.lmf.cnrs.fr (Postfix, from userid 106)        id\r
790 ACE8B4A03ED; Mon,  2 Mar 2026 16:43:37 +0100 (CET)\r
791MIME-Version: 1.0\r
792\r
793"
794        );
795    }
796
797    // tests for UTF8 from https://github.com/arnt/eai-test-messages
798
799    #[test]
800    fn test_utf8_addresses() {
801        test_message_reprint(
802            "From: Jøran Øygårdvær <jøran@example.com>
803Cc: Jøran Øygårdvær <jøran@example.com>
804Signed-Off-By: Jøran Øygårdvær <jøran@example.com>
805To: Arnt Gulbrandsen <arnt@example.com>
806Date: Thu, 20 May 2004 14:28:51 +0200
807
808"
809            .as_bytes(),
810            "From: Jøran Øygårdvær <jøran@example.com>\r
811Cc: Jøran Øygårdvær <jøran@example.com>\r
812Signed-Off-By: Jøran Øygårdvær <jøran@example.com>\r
813To: Arnt Gulbrandsen <arnt@example.com>\r
814Date: Thu, 20 May 2004 14:28:51 +0200\r
815MIME-Version: 1.0\r
816\r
817"
818            .as_bytes(),
819        );
820    }
821
822    #[test]
823    fn test_utf8_attachment() {
824        test_message_reprint(
825            r#"From: Arnt Gulbrandsen <arnt@example.com>
826To: Arnt Gulbrandsen <arnt@example.com>
827Date: Thu, 20 May 2004 14:28:51 +0200
828Content-Type: multipart/mixed; boundary=-
829Mime-Version: 1.0
830
831---
832Content-Type: text/plain; format=flowed; x-eai-please-do-not="abstürzen"
833
834There's nothing to do about this bodypart, except not crash. The attachment
835has a somewhat challenging filename.
836
837---
838Content-Disposition: attachment; filename="blåbærsyltetøy"
839Content-Type: image/jpeg
840Content-Transfer-Encoding: base64
841
842snip
843-----
844"#
845            .as_bytes(),
846            "From: Arnt Gulbrandsen <arnt@example.com>\r
847To: Arnt Gulbrandsen <arnt@example.com>\r
848Date: Thu, 20 May 2004 14:28:51 +0200\r
849Content-Type: multipart/mixed;\r
850 boundary=\"V1Qy0rpB5tWE76WF3UelfGW5K9LZpjHjZ3PKE26vpVNnvofq7BLuYTWxzQB3HrYu7\"\r
851MIME-Version: 1.0\r
852\r
853--V1Qy0rpB5tWE76WF3UelfGW5K9LZpjHjZ3PKE26vpVNnvofq7BLuYTWxzQB3HrYu7\r
854Content-Type: text/plain; charset=us-ascii; format=flowed;\r
855 x-eai-please-do-not=\"abstürzen\"\r
856\r
857There's nothing to do about this bodypart, except not crash. The attachment
858has a somewhat challenging filename.
859\r
860--V1Qy0rpB5tWE76WF3UelfGW5K9LZpjHjZ3PKE26vpVNnvofq7BLuYTWxzQB3HrYu7\r
861Content-Disposition: attachment; filename=\"blåbærsyltetøy\"\r
862Content-Type: image/jpeg\r
863Content-Transfer-Encoding: base64\r
864\r
865snip\r
866--V1Qy0rpB5tWE76WF3UelfGW5K9LZpjHjZ3PKE26vpVNnvofq7BLuYTWxzQB3HrYu7--\r
867"
868            .as_bytes(),
869        );
870    }
871
872    #[test]
873    fn test_utf8_from() {
874        test_message_reprint(
875            "From: Jøran Øygårdvær <jøran@example.com>
876To: Arnt Gulbrandsen <arnt@example.com>
877Date: Thu, 20 May 2004 14:28:51 +0200
878
879asdf"
880                .as_bytes(),
881            "From: Jøran Øygårdvær <jøran@example.com>\r
882To: Arnt Gulbrandsen <arnt@example.com>\r
883Date: Thu, 20 May 2004 14:28:51 +0200\r
884MIME-Version: 1.0\r
885\r
886asdf"
887                .as_bytes(),
888        );
889    }
890
891    #[test]
892    fn test_utf8_mimefield() {
893        test_message_reprint(
894            "From: Arnt Gulbrandsen <arnt@example.com>\r
895To: Arnt Gulbrandsen <arnt@example.com>\r
896Date: Thu, 20 May 2004 14:28:51 +0200\r
897Content-Disposition: attachment; filename=\"blåbærsyltetøy\"\r
898Content-Type: text/plain; format=flowed\r
899Mime-Version: 1.0\r
900\r
901It's a bit odd that a single-part message is an attachment with a
902filename. But perfectly legal."
903                .as_bytes(),
904            "From: Arnt Gulbrandsen <arnt@example.com>\r
905To: Arnt Gulbrandsen <arnt@example.com>\r
906Date: Thu, 20 May 2004 14:28:51 +0200\r
907Content-Disposition: attachment; filename=\"blåbærsyltetøy\"\r
908Content-Type: text/plain; charset=us-ascii; format=flowed\r
909MIME-Version: 1.0\r
910\r
911It's a bit odd that a single-part message is an attachment with a
912filename. But perfectly legal."
913                .as_bytes(),
914        );
915    }
916
917    #[test]
918    fn test_message_global_recover() {
919        // If an embedded message contains UTF8, ensure its content type is
920        // message/global. (message/rfc822 is not supposed to contain UTF-8
921        // headers but we parse those nevertheless...)
922        test_message_reprint(
923            "From: admin@example.com
924To: user@example.com
925Date: Thu, 20 May 2004 14:28:51 +0200
926Content-Type: message/rfc822
927
928From: \"Armaël\" <armaël@example.com>
929To: \"Müller\" <müller@example.test>
930Subject: Café? ☕
931Content-Type: text/plain; charset=\"utf-8\"
932
933☕?"
934            .as_bytes(),
935            "From: admin@example.com\r
936To: user@example.com\r
937Date: Thu, 20 May 2004 14:28:51 +0200\r
938Content-Type: message/global\r
939MIME-Version: 1.0\r
940\r
941From: \"Armaël\" <armaël@example.com>\r
942To: \"Müller\" <müller@example.test>\r
943Subject: Café? ☕\r
944Content-Type: text/plain; charset=UTF-8\r
945MIME-Version: 1.0\r
946\r
947☕?"
948            .as_bytes(),
949        );
950    }
951}