Skip to main content

eml_codec/part/
composite.rs

1#[cfg(feature = "arbitrary")]
2use arbitrary::Arbitrary;
3use bounded_static::ToStatic;
4use std::borrow::Cow;
5use std::fmt;
6#[cfg(feature = "tracing-recover")]
7use tracing::warn;
8#[cfg(feature = "tracing")]
9use tracing::{span, Level};
10
11use crate::header;
12use crate::message;
13use crate::mime;
14use crate::part::{self, field::NaiveEntityFields, AnyPart};
15use crate::raw_input::RawInput;
16use crate::text::boundary::{boundary, Delimiter};
17#[cfg(feature = "arbitrary")]
18use crate::{arbitrary_utils::arbitrary_vec_nonempty, fuzz_eq::FuzzEq};
19
20//--- Multipart
21#[derive(Clone, PartialEq, ToStatic)]
22#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
23pub struct Multipart<'a> {
24    pub mime: mime::MIME<'a, mime::r#type::Multipart<'a>>,
25    // Invariant: `children` is non-empty
26    pub children: Vec<AnyPart<'a>>,
27    #[cfg_attr(feature = "arbitrary", fuzz_eq(ignore))]
28    pub preamble: Cow<'a, [u8]>,
29    #[cfg_attr(feature = "arbitrary", fuzz_eq(ignore))]
30    pub epilogue: Cow<'a, [u8]>,
31    pub raw_body: RawInput<'a>,
32}
33impl<'a> fmt::Debug for Multipart<'a> {
34    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
35        fmt.debug_struct("part::Multipart")
36            .field("mime", &self.mime)
37            .field("children", &self.children)
38            .field("preamble", &String::from_utf8_lossy(&self.preamble))
39            .field("epilogue", &String::from_utf8_lossy(&self.epilogue))
40            .field("raw_body", &self.raw_body)
41            .finish()
42    }
43}
44#[cfg(feature = "arbitrary")]
45impl<'a> Arbitrary<'a> for Multipart<'a> {
46    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
47        Ok(Multipart {
48            mime: u.arbitrary()?,
49            children: arbitrary_vec_nonempty(u)?,
50            preamble: b"".into(),
51            epilogue: b"".into(),
52            raw_body: RawInput::none(),
53        })
54    }
55}
56
57// REQUIRES: `m.ctype.boundary` is `Some(_)`. This is guaranteed by
58// the parser for `mime::MIME<_, Multipart>`.
59pub fn multipart<'a>(
60    m: mime::MIME<'a, mime::r#type::Multipart<'a>>,
61) -> impl Fn(&'a [u8]) -> (&'a [u8], Multipart<'a>) {
62    let m = m.clone();
63
64    move |input| {
65        #[cfg(feature = "tracing")]
66        let _span = span!(Level::DEBUG, "part::composite::multipart", ?m).entered();
67
68        let full_input = input;
69
70        // init
71        // NOTE: the `.unwrap()` cannot fail as long as `m` is produced by
72        // the parser, which always specifies a `boundary` (the boundary
73        // used by the input).
74        let bound = m.ctype.boundary.as_ref().unwrap().as_bytes();
75        let part_raw = part_raw(bound);
76        let mut mparts: Vec<AnyPart> = vec![];
77
78        // preamble
79        let (mut input_loop, preamble) = part_raw(input);
80
81        let (rest, mut multipart) = loop {
82            let input = match boundary(bound)(input_loop) {
83                Err(_) => {
84                    // We encountered a malformed boundary, stop parsing.
85                    let raw_body = &full_input[0..full_input.len() - input_loop.len()];
86                    break (
87                        input_loop,
88                        Multipart {
89                            mime: m.clone(),
90                            children: mparts,
91                            preamble: preamble.into(),
92                            epilogue: [][..].into(),
93                            raw_body: raw_body.into(),
94                        },
95                    );
96                }
97                Ok((inp, Delimiter::Last)) => {
98                    break (
99                        &[],
100                        Multipart {
101                            mime: m.clone(),
102                            children: mparts,
103                            preamble: preamble.into(),
104                            epilogue: inp.into(),
105                            raw_body: full_input.into(),
106                        },
107                    )
108                }
109                Ok((inp, Delimiter::Next)) => inp,
110            };
111
112            // parse mime headers, otherwise pick default mime
113            let (input_body, fields_raw) = header::header_kv(input);
114            let NaiveEntityFields { entries, mime } =
115                fields_raw.into_iter().collect::<NaiveEntityFields>();
116
117            // interpret mime according to context
118            let mime = match m.ctype.subtype {
119                mime::r#type::MultipartSubtype::Digest => {
120                    mime.to_interpreted(mime::DefaultType::Digest)
121                }
122                _ => mime.to_interpreted(mime::DefaultType::Generic),
123            };
124
125            // parse raw part for the body
126            let (input_next, rpart) = part_raw(input_body);
127
128            // parse mime body
129            // XXX this can be an (indirect) recursive call;
130            // -> risk of stack overflow
131            let mime_body = part::part_body(mime)(rpart);
132            mparts.push(AnyPart {
133                entries,
134                mime_body,
135                raw: input[0..input.len() - input_next.len()].into(),
136                raw_headers: input[0..input.len() - input_body.len()].into(),
137            });
138
139            input_loop = input_next;
140        };
141
142        // RFC2064 specifies that a multipart must have at least one child part.
143        // If there is no child part, insert an empty part as recovery strategy.
144        if multipart.children.is_empty() {
145            #[cfg(feature = "tracing-recover")]
146            warn!("multipart containing zero parts");
147            multipart.children.push(AnyPart::default());
148        }
149
150        (rest, multipart)
151    }
152}
153
154// Recognizes bytes for the next part, until the next boundary or the end of the input.
155fn part_raw<'a, 'b>(bound: &[u8]) -> impl Fn(&'a [u8]) -> (&'a [u8], &'a [u8]) + 'b {
156    use memchr::memmem::Finder;
157    // This low-level implementation (which basically just calls `memmem`) is faster
158    // than trying to express this using parser combinators.
159
160    // search for "--{bound}"
161    let mut needle = b"--".to_vec();
162    needle.extend(bound.iter());
163    let finder = Finder::new(&needle).into_owned();
164
165    move |input| {
166        for i in finder.find_iter(input) {
167            // a boundary can be at the beginning of the input
168            if i == 0 {
169                return (input, &[]);
170            }
171
172            // or it can be after a newline
173            if i.checked_sub(1).is_some_and(|j| input[j] == b'\n') {
174                // best-effort: recognize both \n and \r\n before the boundary
175                let i = i
176                    .checked_sub(2)
177                    .filter(|j| input[*j] == b'\r')
178                    .unwrap_or(i - 1);
179                return (&input[i..], &input[0..i]);
180            }
181        }
182        // no matching boundary found; return the entire input
183        (&[], input)
184    }
185}
186
187//--- Message
188
189// Invariant: if message headers use non-ascii UTF-8, message subtype RFC822
190// must not be used and subtype Global must be used instead.
191#[derive(Clone, Debug, PartialEq, ToStatic)]
192#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
193pub struct Message<'a> {
194    pub mime: mime::MIME<'a, mime::r#type::Message<'a>>,
195
196    // NOTE: RFC2046 specifies that an encapsulated message "isn't restricted
197    // to material in strict conformance to RFC822" and that it "could well be a
198    // News article or a MIME message".
199    //
200    // This could be interpreted as saying that we shouldn't parse the embedded
201    // message as a toplevel message. However, it is not clear whether there
202    // actually exist embedded messages that are not actually toplevel messages.
203    //
204    // Additionally, IMAP requires that we are able to construct an IMF envelope
205    // for an embedded message, and our AST for messages is able to handle missing
206    // fields that wouldn't be strictly compliant.
207    //
208    // We thus decide to parse the contents as a toplevel message, i.e. an
209    // `message::Message`, which involves interpreting IMF headers.
210    pub child: Box<message::Message<'a>>,
211    pub raw_body: RawInput<'a>,
212}
213
214#[cfg(feature = "arbitrary")]
215impl<'a> Arbitrary<'a> for Message<'a> {
216    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
217        let mut mime: mime::MIME<'a, mime::r#type::Message<'a>> = u.arbitrary()?;
218        let child: Box<message::Message<'a>> = u.arbitrary()?;
219        // TODO: clarify whether we should take the body into account as well, and
220        // not just the headers (for later when we start interpreting bodies?)
221        if matches!(mime.ctype.subtype, mime::r#type::MessageSubtype::RFC822)
222            && child.contains_utf8_headers()
223        {
224            mime.ctype.subtype = mime::r#type::MessageSubtype::Global
225        }
226        Ok(Message {
227            mime,
228            child,
229            raw_body: RawInput::none(),
230        })
231    }
232}
233
234/// Parse an embedded message.
235///
236/// This function always consumes its entire input.
237pub fn message<'a>(
238    m: mime::MIME<'a, mime::r#type::Message<'a>>,
239) -> impl Fn(&'a [u8]) -> Message<'a> {
240    move |input: &[u8]| {
241        #[cfg(feature = "tracing")]
242        let _span = span!(Level::DEBUG, "part::composite::message", ?m).entered();
243
244        // parse the input as a toplevel message
245        let msg = message::message(input);
246        let mut msg_mime = m.clone();
247        // If the headers contain non-ascii UTF8 and if this is a
248        // message/RFC822, promote the message outer MIME to message/global
249        if msg.contains_utf8_headers()
250            && matches!(msg_mime.ctype.subtype, mime::r#type::MessageSubtype::RFC822)
251        {
252            msg_mime.ctype.subtype = mime::r#type::MessageSubtype::Global;
253        }
254
255        Message {
256            mime: msg_mime,
257            child: Box::new(msg),
258            raw_body: input.into(),
259        }
260    }
261}
262
263#[cfg(test)]
264mod tests {
265    use super::*;
266    use crate::mime::field::Entry;
267    use crate::part::discrete::Text;
268    use crate::part::field::EntityEntry;
269    use crate::part::{AnyPart, MimeBody};
270    use crate::text::charset::EmailCharset;
271    use pretty_assertions::assert_eq;
272
273    #[test]
274    fn test_preamble() {
275        assert_eq!(
276            part_raw(b"hello")(
277                b"blip
278bloup
279
280blip
281bloup--
282--bim
283--bim--
284
285--hello
286Field: Body
287"
288            ),
289            (
290                &b"\n--hello\nField: Body\n"[..],
291                &b"blip\nbloup\n\nblip\nbloup--\n--bim\n--bim--\n"[..],
292            )
293        );
294    }
295
296    #[test]
297    fn test_part_raw() {
298        assert_eq!(
299            part_raw(b"simple boundary")(b"Content-type: text/plain; charset=us-ascii
300
301This is explicitly typed plain US-ASCII text.
302It DOES end with a linebreak.
303
304--simple boundary--
305"),
306            (
307                &b"\n--simple boundary--\n"[..],
308                &b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..],
309            )
310        );
311    }
312
313    #[test]
314    fn test_multipart() {
315        let base_mime = mime::MIME {
316            ctype: mime::r#type::Multipart {
317                subtype: mime::r#type::MultipartSubtype::Alternative,
318                boundary: Some("simple boundary".to_string()),
319                other_params: vec![],
320            },
321            fields: mime::CommonMIME::default(),
322        };
323
324        let input = b"This is the preamble.  It is to be ignored, though it
325is a handy place for composition agents to include an
326explanatory note to non-MIME conformant readers.
327
328--simple boundary
329
330This is implicitly typed plain US-ASCII text.
331It does NOT end with a linebreak.
332--simple boundary
333Content-type: text/plain; charset=us-ascii
334
335This is explicitly typed plain US-ASCII text.
336It DOES end with a linebreak.
337
338--simple boundary--
339
340This is the epilogue. It is also to be ignored.
341";
342
343        let preamble = b"This is the preamble.  It is to be ignored, though it
344is a handy place for composition agents to include an
345explanatory note to non-MIME conformant readers.
346";
347
348        let epilogue = b"
349This is the epilogue. It is also to be ignored.
350";
351
352        assert_eq!(
353            multipart(base_mime.clone())(input),
354            (&b""[..],
355             Multipart {
356                 mime: base_mime,
357                 preamble: preamble.into(),
358                 epilogue: epilogue.into(),
359                 children: vec![
360                     AnyPart {
361                         entries: vec![],
362                         mime_body: MimeBody::Txt(Text {
363                             mime: mime::MIME {
364                                 ctype: mime::r#type::Text::default(),
365                                 fields: mime::CommonMIME::default(),
366                             },
367                             body: b"This is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak.".into(),
368                             raw_body: RawInput::between(input, b"This is implicitly", b"NOT end with a linebreak."),
369                         }),
370                         raw: RawInput::between(input, b"\nThis is implicitly", b"NOT end with a linebreak."),
371                         raw_headers: b"\n".into(),
372                     },
373                     AnyPart {
374                         entries: vec![EntityEntry::MIME { e: Entry::Type, raw_body: b" text/plain; charset=us-ascii".into() }],
375                         mime_body: MimeBody::Txt(Text {
376                             mime: mime::MIME {
377                                 ctype: mime::r#type::Text {
378                                     subtype: mime::r#type::TextSubtype::Plain,
379                                     charset: EmailCharset::US_ASCII,
380                                     other_params: vec![],
381                                 },
382                                 fields: mime::CommonMIME::default(),
383                             },
384                             body: b"This is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n".into(),
385                             raw_body: RawInput::between(input, b"This is explicitly", b"DOES end with a linebreak.\n"),
386                         }),
387                         raw: RawInput::between(input, b"Content-type", b"DOES end with a linebreak.\n"),
388                         raw_headers: b"Content-type: text/plain; charset=us-ascii\n\n".into(),
389                     },
390                 ],
391                 raw_body: input.into(),
392             },
393            )
394        );
395    }
396
397    // The terminator of a multipart entity can be missing.
398    // This should be properly handled even for nested multiparts
399    // (RFC2046 specifies this in sec 5.1.2).
400    #[test]
401    fn test_nested_multipart_inner_broken() {
402        let base_mime = mime::MIME {
403            ctype: mime::r#type::Multipart {
404                subtype: mime::r#type::MultipartSubtype::Mixed,
405                boundary: Some("outer boundary".to_string()),
406                other_params: vec![],
407            },
408            fields: mime::CommonMIME::default(),
409        };
410
411        let input = b"
412--outer boundary
413Content-Type: multipart/mixed; boundary=\"inner boundary\"
414
415--inner boundary
416
417This is the inner part; it misses its terminator
418--outer boundary
419
420This is implicitly typed plain US-ASCII text.
421--outer boundary--";
422
423        assert_eq!(
424            multipart(base_mime.clone())(input),
425            (
426                &b""[..],
427                Multipart {
428                    mime: base_mime,
429                    preamble: b"".into(),
430                    epilogue: b"".into(),
431                    children: vec![
432                        AnyPart {
433                            entries: vec![EntityEntry::MIME {
434                                e: Entry::Type,
435                                raw_body: b" multipart/mixed; boundary=\"inner boundary\"".into(),
436                            },],
437                            mime_body: MimeBody::Mult(Multipart {
438                                mime: mime::MIME {
439                                    ctype: mime::r#type::Multipart {
440                                        subtype: mime::r#type::MultipartSubtype::Mixed,
441                                        boundary: Some("inner boundary".to_string()),
442                                        other_params: vec![],
443                                    },
444                                    fields: mime::CommonMIME::default(),
445                                },
446                                preamble: b"".into(),
447                                epilogue: b"".into(),
448                                children: vec![AnyPart {
449                                    entries: vec![],
450                                    mime_body: MimeBody::Txt(Text {
451                                        mime: mime::MIME {
452                                            ctype: mime::r#type::Text::default(),
453                                            fields: mime::CommonMIME::default(),
454                                        },
455                                        body: b"This is the inner part; it misses its terminator"
456                                            .into(),
457                                        raw_body: RawInput::between(
458                                            input,
459                                            b"This is the inner",
460                                            b"terminator"
461                                        ),
462                                    }),
463                                    raw: RawInput::between(
464                                        input,
465                                        b"\nThis is the inner",
466                                        b"terminator"
467                                    ),
468                                    raw_headers: b"\n".into(),
469                                },],
470                                raw_body: RawInput::between(
471                                    input,
472                                    b"--inner boundary\n\nThis is the inner",
473                                    b"terminator"
474                                ),
475                            }),
476                            raw: RawInput::between(input, b"Content-Type", b"terminator"),
477                            raw_headers:
478                                b"Content-Type: multipart/mixed; boundary=\"inner boundary\"\n\n"
479                                    .into(),
480                        },
481                        AnyPart {
482                            entries: vec![],
483                            mime_body: MimeBody::Txt(Text {
484                                mime: mime::MIME {
485                                    ctype: mime::r#type::Text::default(),
486                                    fields: mime::CommonMIME::default(),
487                                },
488                                body: b"This is implicitly typed plain US-ASCII text.".into(),
489                                raw_body: b"This is implicitly typed plain US-ASCII text.".into(),
490                            }),
491                            raw: b"\nThis is implicitly typed plain US-ASCII text.".into(),
492                            raw_headers: b"\n".into(),
493                        },
494                    ],
495                    raw_body: input.into(),
496                },
497            )
498        );
499    }
500
501    // Parsing stops on a broken boundary that starts with the correct boundary
502    // but is followed by a suffix containing junk
503    // FIXME: the RFC requires that we handle whitespace characters as a suffix,
504    // but this is not done currently.
505    #[test]
506    fn test_broken_boundary() {
507        let base_mime = mime::MIME {
508            ctype: mime::r#type::Multipart {
509                subtype: mime::r#type::MultipartSubtype::Mixed,
510                boundary: Some("boundary".to_string()),
511                other_params: vec![],
512            },
513            fields: mime::CommonMIME::default(),
514        };
515
516        let input = b"
517--boundary
518
519Part text
520--boundary+++out of cheese
521
522leftovers";
523
524        assert_eq!(
525            multipart(base_mime.clone())(input),
526            (
527                &b"\n--boundary+++out of cheese\n\nleftovers"[..],
528                Multipart {
529                    mime: base_mime,
530                    preamble: b"".into(),
531                    epilogue: b"".into(),
532                    children: vec![AnyPart {
533                        entries: vec![],
534                        mime_body: MimeBody::Txt(Text {
535                            mime: mime::MIME {
536                                ctype: mime::r#type::Text::default(),
537                                fields: mime::CommonMIME::default(),
538                            },
539                            body: b"Part text".into(),
540                            raw_body: b"Part text".into(),
541                        }),
542                        raw: b"\nPart text".into(),
543                        raw_headers: b"\n".into(),
544                    },],
545                    raw_body: b"\n--boundary\n\nPart text".into(),
546                },
547            )
548        );
549    }
550
551    #[test]
552    fn test_multipart_cr() {
553        let base_mime = mime::MIME {
554            ctype: mime::r#type::Multipart {
555                subtype: mime::r#type::MultipartSubtype::Alternative,
556                boundary: Some("boundary".to_string()),
557                other_params: vec![],
558            },
559            fields: mime::CommonMIME::default(),
560        };
561
562        let input = b"--boundary
563
564\r\r
565--boundary--
566";
567
568        assert_eq!(
569            multipart(base_mime.clone())(input),
570            (
571                &b""[..],
572                Multipart {
573                    mime: base_mime,
574                    preamble: b"".into(),
575                    epilogue: b"".into(),
576                    children: vec![AnyPart {
577                        entries: vec![],
578                        mime_body: MimeBody::Txt(Text {
579                            mime: mime::MIME {
580                                ctype: mime::r#type::Text::default(),
581                                fields: mime::CommonMIME::default(),
582                            },
583                            body: b"\r".into(),
584                            raw_body: b"\r".into(),
585                        }),
586                        raw: b"\n\r".into(),
587                        raw_headers: b"\n".into(),
588                    },],
589                    raw_body: input.into(),
590                },
591            )
592        );
593    }
594
595    #[test]
596    fn test_multipart_no_parts() {
597        let base_mime = mime::MIME {
598            ctype: mime::r#type::Multipart {
599                subtype: mime::r#type::MultipartSubtype::Alternative,
600                boundary: Some("boundary".to_string()),
601                other_params: vec![],
602            },
603            fields: mime::CommonMIME::default(),
604        };
605
606        let input = b"--boundary--";
607
608        assert_eq!(
609            multipart(base_mime.clone())(input),
610            (
611                &b""[..],
612                Multipart {
613                    mime: base_mime,
614                    preamble: b"".into(),
615                    epilogue: b"".into(),
616                    children: vec![AnyPart::default()],
617                    raw_body: input.into(),
618                },
619            )
620        );
621    }
622}