Skip to main content

eml_codec/mime/
type.rs

1#[cfg(feature = "arbitrary")]
2use arbitrary::Arbitrary;
3use bounded_static::{IntoBoundedStatic, ToBoundedStatic, ToStatic};
4use nom::{
5    branch::alt,
6    bytes::complete::tag,
7    combinator::{map, opt},
8    multi::many0,
9    sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
10    IResult,
11};
12#[cfg(feature = "tracing")]
13use tracing::warn;
14
15#[cfg(feature = "arbitrary")]
16use crate::fuzz_eq::FuzzEq;
17use crate::i18n::ContainsUtf8;
18use crate::print::{Formatter, Print, ToStringFromPrint};
19use crate::text::charset::EmailCharset;
20use crate::text::misc_token::{mime_word, MIMEWord};
21use crate::text::quoted::{print_quoted, QuotedString};
22use crate::text::recovery::take_quoted_or_until;
23use crate::text::whitespace::cfws;
24use crate::text::words::{mime_atom, MIMEAtom};
25#[cfg(any(feature = "tracing-recover", feature = "tracing-unsupported"))]
26use crate::utils::bytes_to_trace_string;
27
28// --------- NAIVE TYPE
29#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic)]
30#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
31pub struct NaiveType<'a> {
32    pub main: MIMEAtom<'a>,
33    pub sub: MIMEAtom<'a>,
34    pub params: Vec<Parameter<'a>>,
35}
36impl<'a> NaiveType<'a> {
37    pub fn to_type(&self) -> AnyType<'a> {
38        self.into()
39    }
40}
41pub fn naive_type(input: &[u8]) -> IResult<&[u8], NaiveType<'_>> {
42    let (input, (main, sub)) = alt((
43        separated_pair(mime_atom, tag("/"), mime_atom),
44        // Recognize some broken content-types found in the real world:
45        recover_broken_type(b"text", b"text", b"plain"),
46        recover_broken_type(b".pdf", b"application", b"pdf"),
47    ))(input)?;
48    let (input, params) = parameter_list(input)?;
49    Ok((input, NaiveType { main, sub, params }))
50}
51pub fn recover_broken_type<'a>(
52    broken_name: &'a [u8],
53    main: &'a [u8],
54    sub: &'a [u8],
55) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], (MIMEAtom<'a>, MIMEAtom<'a>)> {
56    move |input: &[u8]| {
57        map(delimited(opt(cfws), tag(broken_name), opt(cfws)), |_| {
58            #[cfg(feature = "tracing-recover")]
59            warn!(
60                "use of broken content-type {}, interpreted as {}/{}",
61                String::from_utf8_lossy(broken_name),
62                String::from_utf8_lossy(main),
63                String::from_utf8_lossy(sub)
64            );
65            (MIMEAtom(main.into()), MIMEAtom(sub.into()))
66        })(input)
67    }
68}
69
70// XXX we allow printing content types without further validation;
71// this is not strictly allowed by the spec, which only allows
72// x-token or ietf-token on top of the RFC defined content types.
73impl<'a> Print for NaiveType<'a> {
74    fn print(&self, fmt: &mut impl Formatter) {
75        self.main.print(fmt);
76        fmt.write_bytes(b"/");
77        self.sub.print(fmt);
78        for param in &self.params {
79            fmt.write_bytes(b";");
80            fmt.write_fws();
81            param.print(fmt);
82        }
83    }
84}
85
86#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic)]
87#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
88pub struct Parameter<'a> {
89    pub name: MIMEAtom<'a>,
90    pub value: MIMEWord<'a>,
91}
92impl<'a> Print for Parameter<'a> {
93    fn print(&self, fmt: &mut impl Formatter) {
94        self.name.print(fmt);
95        fmt.write_bytes(b"=");
96        self.value.print(fmt)
97    }
98}
99
100/// Parses a parameter list that follows a content-type.
101///
102/// The RFC parameter-list syntax is:
103/// ```abnf
104///   parameter-list   =  *(";" mime-atom "=" mime-word)
105/// ```
106///
107/// Additionally, we handle partially broken parameter lists, where some
108/// segments (delimited by ";") contain invalid data. We drop invalid segments
109/// and keep the rest.
110///
111/// We thus parse the following grammar:
112/// ```abnf
113///   parameter-list   =   *(";" (mime-atom "=" mime-word / any-not-semicolon)) [";"]
114/// ```
115/// As a consequence, this combinator always consumes all of its input.
116pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter<'_>>> {
117    // recovery parser: skips over junk until the next ';'
118    let junk = |input| {
119        pair(
120            opt(cfws),
121            map(take_quoted_or_until(|c| c == b';'), |i| {
122                #[cfg(feature = "tracing-unsupported")]
123                if !i.is_empty() {
124                    warn!(input = %bytes_to_trace_string(i),
125                          "unsupported segment in parameter list");
126                }
127                i
128            }),
129        )(input)
130    };
131    let (input, params) = terminated(
132        many0(preceded(pair(junk, tag(";")), opt(parameter))),
133        pair(opt(tag(";")), junk),
134    )(input)?;
135
136    Ok((input, params.into_iter().flatten().collect()))
137}
138pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter<'_>> {
139    // We handle both '=' and ':' as separators. ':' is not valid but
140    // occurs in some emails we want to support...
141    let separator = alt((
142        tag("="),
143        map(tag(":"), |i| {
144            #[cfg(feature = "tracing-recover")]
145            warn!(input = %bytes_to_trace_string(input),
146                  "non-compliant use of ':' instead of '=' in parameter");
147            i
148        }),
149    ));
150
151    map(
152        tuple((mime_atom, separator, mime_word)),
153        |(name, _, value)| Parameter { name, value },
154    )(input)
155}
156
157// MIME TYPES TRANSLATED TO RUST TYPING SYSTEM
158
159#[derive(Clone, Debug, PartialEq, ToStatic)]
160#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
161pub enum AnyType<'a> {
162    // Composite types
163    Multipart(Multipart<'a>), // multipart/*
164    Message(Message<'a>),     // message/{rfc822, global}
165
166    // Discrete types
167    Text(Text<'a>),     // text/*
168    Binary(Binary<'a>), // everything else
169}
170
171impl<'a> AnyType<'a> {
172    pub fn params(&self) -> Vec<Parameter<'a>> {
173        match self {
174            AnyType::Multipart(t) => t.params(),
175            AnyType::Message(t) => t.params.clone(),
176            AnyType::Text(t) => t.params(),
177            AnyType::Binary(t) => t.ctype.params.clone(),
178        }
179    }
180}
181
182impl<'a> From<&NaiveType<'a>> for AnyType<'a> {
183    fn from(nt: &NaiveType<'a>) -> Self {
184        match nt.main.0.to_ascii_lowercase().as_slice() {
185            b"multipart" =>
186            // fails if there is no boundary parameter
187            {
188                Multipart::try_from(nt)
189                    .map(Self::Multipart)
190                    .unwrap_or(Self::Binary(Binary::from(nt)))
191            }
192            b"message" =>
193            // fails if this the subtype is not supported
194            {
195                Message::try_from(nt)
196                    .map(Self::Message)
197                    .unwrap_or(Self::Binary(Binary::from(nt)))
198            }
199            b"text" => Self::Text(Text::from(nt)),
200            _ => Self::Binary(Binary::from(nt)),
201        }
202    }
203}
204
205impl<'a> Print for AnyType<'a> {
206    fn print(&self, fmt: &mut impl Formatter) {
207        match self {
208            AnyType::Multipart(mp) => mp.print(fmt),
209            AnyType::Message(msg) => msg.print(fmt),
210            AnyType::Text(txt) => txt.print(fmt),
211            AnyType::Binary(bin) => bin.print(fmt),
212        }
213    }
214}
215
216// REAL PARTS
217
218#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic)]
219#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
220pub struct Multipart<'a> {
221    pub subtype: MultipartSubtype,
222    #[cfg_attr(feature = "arbitrary", fuzz_eq(ignore))]
223    #[contains_utf8(ignore)] // boundary is always ascii
224    // XXX: this `boundary` field is a hack.
225    //
226    // `boundary` is tracked in this AST node as a parsing implementation
227    // detail rather than some explicit information of the final email.
228    //
229    // During parsing, the parser for a multipart email body needs to know the
230    // boundary that was specified in the headers to be able to parse parts. The
231    // `boundary` field is used to propagate that information from the parser
232    // for MIME headers to the parser for a multipart body.
233    //
234    // After parsing, this field is ignored. In particular, during printing, a
235    // new boundary is generated by the eml-codec's printer, and is used instead
236    // of the original boundary. Indeed, the original boundary may not be
237    // correct to reuse if the body parts have been modified (by modifying the
238    // parts AST)---remember that boundaries must not appear in body parts.
239    //
240    // Finally, this `boundary` is an `Option<String>` rather than a `String` to
241    // account for the case where this AST node is constructed directly using
242    // the library API and not from the parser. In this case there is no input
243    // boundary to use, so the field can be set to `None`. In the other case
244    // where a `mime::type::Multipart` record is constructed by the parser, the
245    // `boundary` field is guaranteed to be `Some(...)`.
246    pub boundary: Option<String>,
247    // Invariant: parameters with .name != "boundary"
248    pub other_params: Vec<Parameter<'a>>,
249}
250
251impl<'a> Multipart<'a> {
252    pub fn params(&self) -> Vec<Parameter<'a>> {
253        let mut params = self.other_params.clone();
254        match &self.boundary {
255            Some(b) => params.push(Parameter {
256                name: MIMEAtom(b"boundary".into()),
257                value: MIMEWord::Quoted(QuotedString(vec![b.into()])).into_static(),
258            }),
259            None => {
260                // XXX in this case there is no boundary parameter returned,
261                // even the final email will contain one...
262            }
263        };
264        params
265    }
266}
267
268#[cfg(feature = "arbitrary")]
269impl<'a> Arbitrary<'a> for Multipart<'a> {
270    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
271        let other_params: Vec<Parameter> = u.arbitrary()?;
272        if other_params
273            .iter()
274            .any(|p| p.name.0.as_ref() == b"boundary")
275        {
276            return Err(arbitrary::Error::IncorrectFormat);
277        }
278        Ok(Self {
279            subtype: u.arbitrary()?,
280            boundary: None,
281            other_params,
282        })
283    }
284}
285
286impl<'a> Print for Multipart<'a> {
287    fn print(&self, fmt: &mut impl Formatter) {
288        fmt.push_new_boundary();
289        fmt.write_bytes(b"multipart/");
290        self.subtype.print(fmt);
291        fmt.write_bytes(b";");
292        fmt.write_fws();
293        // always quote the boundary ("never hurts" says RFC2046)
294        fmt.write_bytes(b"boundary=\"");
295        fmt.write_current_boundary();
296        fmt.write_bytes(b"\"");
297        for param in &self.other_params {
298            fmt.write_bytes(b";");
299            fmt.write_fws();
300            param.print(fmt);
301        }
302    }
303}
304
305impl<'a> TryFrom<&NaiveType<'a>> for Multipart<'a> {
306    type Error = ();
307
308    #[cfg_attr(
309        feature = "tracing",
310        tracing::instrument(name = "type::Multipart::try_from")
311    )]
312    fn try_from(nt: &NaiveType<'a>) -> Result<Self, Self::Error> {
313        let mut other_params = vec![];
314        let mut boundary = None;
315        for param in &nt.params {
316            if param.name.0.to_ascii_lowercase().as_slice() == b"boundary" {
317                let s = param.value.chars().collect::<String>();
318                if boundary.is_none() {
319                    boundary = Some(s);
320                } else {
321                    // drop any redundant "boundary" parameter that is not the first
322                    #[cfg(feature = "tracing-unsupported")]
323                    warn!(boundary = s, "dropping redundant boundary parameter")
324                }
325            } else {
326                other_params.push(param.clone())
327            }
328        }
329        match boundary {
330            Some(boundary) => Ok(Multipart {
331                subtype: MultipartSubtype::from(nt),
332                boundary: Some(boundary),
333                other_params,
334            }),
335            None => Err(()),
336        }
337    }
338}
339
340#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
341#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
342pub enum MultipartSubtype {
343    Alternative,
344    Mixed,
345    Digest,
346    Parallel,
347    Report,
348    // neither of the above (capitalization does not matter).
349    // should be treated as Mixed
350    Unknown(MIMEAtom<'static>),
351}
352impl MultipartSubtype {
353    pub fn as_bytes(&self) -> &[u8] {
354        match self {
355            Self::Alternative => b"alternative",
356            Self::Mixed => b"mixed",
357            Self::Digest => b"digest",
358            Self::Parallel => b"parallel",
359            Self::Report => b"report",
360            Self::Unknown(v) => &v.0,
361        }
362    }
363}
364impl Print for MultipartSubtype {
365    fn print(&self, fmt: &mut impl Formatter) {
366        fmt.write_bytes(self.as_bytes())
367    }
368}
369
370impl<'a> From<&NaiveType<'a>> for MultipartSubtype {
371    fn from(nt: &NaiveType<'a>) -> Self {
372        let sub = nt.sub.0.to_ascii_lowercase();
373        match sub.as_slice() {
374            b"alternative" => Self::Alternative,
375            b"mixed" => Self::Mixed,
376            b"digest" => Self::Digest,
377            b"parallel" => Self::Parallel,
378            b"report" => Self::Report,
379            _ => Self::Unknown(nt.sub.to_static()),
380        }
381    }
382}
383
384#[cfg(feature = "arbitrary")]
385impl<'a> Arbitrary<'a> for MultipartSubtype {
386    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
387        match u.int_in_range(0..=5)? {
388            0 => Ok(Self::Alternative),
389            1 => Ok(Self::Mixed),
390            2 => Ok(Self::Digest),
391            3 => Ok(Self::Parallel),
392            4 => Ok(Self::Report),
393            5 => {
394                let a: MIMEAtom = u.arbitrary()?;
395                if matches!(
396                    a.0.to_ascii_lowercase().as_slice(),
397                    b"alternative" | b"mixed" | b"digest" | b"parallel" | b"report"
398                ) {
399                    return Err(arbitrary::Error::IncorrectFormat);
400                }
401                Ok(Self::Unknown(a))
402            }
403            _ => unreachable!(),
404        }
405    }
406}
407
408#[derive(Clone, ContainsUtf8, Debug, Default, PartialEq, ToStatic, ToStringFromPrint)]
409#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
410pub enum MessageSubtype {
411    #[default]
412    RFC822,
413    Global, // RFC6532 subtype (message containing UTF-8 headers)
414}
415impl MessageSubtype {
416    pub fn as_bytes(&self) -> &[u8] {
417        match self {
418            Self::RFC822 => b"rfc822",
419            Self::Global => b"global",
420        }
421    }
422}
423impl Print for MessageSubtype {
424    fn print(&self, fmt: &mut impl Formatter) {
425        fmt.write_bytes(self.as_bytes())
426    }
427}
428
429impl<'a> TryFrom<&NaiveType<'a>> for MessageSubtype {
430    type Error = ();
431
432    fn try_from(nt: &NaiveType<'a>) -> Result<Self, ()> {
433        let sub = nt.sub.0.to_ascii_lowercase();
434        match sub.as_slice() {
435            b"rfc822" => Ok(Self::RFC822),
436            b"global" => Ok(Self::Global),
437            _ => Err(()),
438        }
439    }
440}
441
442#[cfg(feature = "arbitrary")]
443impl<'a> Arbitrary<'a> for MessageSubtype {
444    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
445        match u.int_in_range(0..=1)? {
446            0 => Ok(Self::RFC822),
447            1 => Ok(Self::Global),
448            _ => unreachable!(),
449        }
450    }
451}
452
453#[derive(Clone, ContainsUtf8, Debug, Default, PartialEq, ToStatic)]
454#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
455pub struct Message<'a> {
456    pub subtype: MessageSubtype,
457    pub params: Vec<Parameter<'a>>,
458}
459
460impl<'a> Print for Message<'a> {
461    fn print(&self, fmt: &mut impl Formatter) {
462        fmt.write_bytes(b"message/");
463        self.subtype.print(fmt);
464        for param in &self.params {
465            fmt.write_bytes(b";");
466            fmt.write_fws();
467            param.print(fmt);
468        }
469    }
470}
471
472impl<'a> TryFrom<&NaiveType<'a>> for Message<'a> {
473    type Error = ();
474    fn try_from(nt: &NaiveType<'a>) -> Result<Self, ()> {
475        Ok(Self {
476            subtype: MessageSubtype::try_from(nt)?,
477            params: nt.params.clone(),
478        })
479    }
480}
481
482#[derive(Clone, ContainsUtf8, Debug, PartialEq, Default, ToStatic)]
483#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
484pub struct Text<'a> {
485    // NOTE: an unknown subtype combined with an unknown charset should
486    // result in this type be treated as equivalent to the Binary type.
487    pub subtype: TextSubtype,
488    pub charset: EmailCharset,
489    // Invariant: parameters with .name != "charset"
490    pub other_params: Vec<Parameter<'a>>,
491}
492
493impl<'a> Text<'a> {
494    pub fn params(&self) -> Vec<Parameter<'a>> {
495        let mut params = self.other_params.clone();
496        params.push(Parameter {
497            name: MIMEAtom(b"charset".into()),
498            value: MIMEWord::Quoted(QuotedString(vec![self.charset.as_str().into()])).into_static(),
499        });
500        params
501    }
502}
503
504#[cfg(feature = "arbitrary")]
505impl<'a> Arbitrary<'a> for Text<'a> {
506    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
507        let other_params: Vec<Parameter> = u.arbitrary()?;
508        if other_params.iter().any(|p| p.name.0.as_ref() == b"charset") {
509            return Err(arbitrary::Error::IncorrectFormat);
510        }
511        Ok(Self {
512            subtype: u.arbitrary()?,
513            charset: u.arbitrary()?,
514            other_params,
515        })
516    }
517}
518
519impl<'a> Print for Text<'a> {
520    fn print(&self, fmt: &mut impl Formatter) {
521        fmt.write_bytes(b"text/");
522        self.subtype.print(fmt);
523        fmt.write_bytes(b";");
524        fmt.write_fws();
525        fmt.write_bytes(b"charset=");
526        match &self.charset {
527            EmailCharset::Unknown(s) =>
528            // print it as quoted just to be safe
529            {
530                print_quoted(fmt, s.chars())
531            }
532            _ => fmt.write_bytes(self.charset.as_bytes()),
533        }
534        for param in &self.other_params {
535            fmt.write_bytes(b";");
536            fmt.write_fws();
537            param.print(fmt);
538        }
539    }
540}
541
542impl<'a> From<&NaiveType<'a>> for Text<'a> {
543    #[cfg_attr(feature = "tracing", tracing::instrument)]
544    fn from(nt: &NaiveType<'a>) -> Self {
545        let mut other_params = vec![];
546        let mut charset = None;
547        for param in &nt.params {
548            if param.name.0.to_ascii_lowercase().as_slice() == b"charset" {
549                let value: String = param.value.chars().collect();
550                if charset.is_none() {
551                    charset = Some(EmailCharset::from(&value));
552                } else {
553                    // drop any "charset" parameter that is not the first
554                    #[cfg(feature = "tracing-unsupported")]
555                    warn!(param = value, "dropping redundant charset parameter");
556                }
557            } else {
558                other_params.push(param.clone())
559            }
560        }
561
562        Self {
563            subtype: TextSubtype::from(nt),
564            charset: charset.unwrap_or_default(),
565            other_params,
566        }
567    }
568}
569
570#[derive(Clone, ContainsUtf8, Debug, PartialEq, Default, ToStatic, ToStringFromPrint)]
571#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
572pub enum TextSubtype {
573    #[default]
574    Plain,
575    Html,
576    // none of the above
577    Unknown(MIMEAtom<'static>),
578}
579impl TextSubtype {
580    pub fn as_bytes(&self) -> &[u8] {
581        match self {
582            Self::Plain => b"plain",
583            Self::Html => b"html",
584            Self::Unknown(b) => &b.0,
585        }
586    }
587}
588impl Print for TextSubtype {
589    fn print(&self, fmt: &mut impl Formatter) {
590        fmt.write_bytes(self.as_bytes())
591    }
592}
593
594impl<'a> From<&NaiveType<'a>> for TextSubtype {
595    fn from(nt: &NaiveType<'a>) -> Self {
596        let sub = nt.sub.0.to_ascii_lowercase();
597        match sub.as_slice() {
598            b"plain" => Self::Plain,
599            b"html" => Self::Html,
600            _ => Self::Unknown(nt.sub.to_static()),
601        }
602    }
603}
604
605#[cfg(feature = "arbitrary")]
606impl<'a> Arbitrary<'a> for TextSubtype {
607    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
608        match u.int_in_range(0..=2)? {
609            0 => Ok(Self::Plain),
610            1 => Ok(Self::Html),
611            2 => {
612                let a: MIMEAtom = u.arbitrary()?;
613                if matches!(a.0.to_ascii_lowercase().as_slice(), b"plain" | b"html") {
614                    return Err(arbitrary::Error::IncorrectFormat);
615                }
616                Ok(Self::Unknown(a))
617            }
618            _ => unreachable!(),
619        }
620    }
621}
622
623#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic)]
624#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
625pub struct Binary<'a> {
626    // invariant: ctype.main is neither "multipart", "message" or "text"
627    pub ctype: NaiveType<'a>,
628}
629
630impl<'a> Print for Binary<'a> {
631    fn print(&self, fmt: &mut impl Formatter) {
632        self.ctype.print(fmt)
633    }
634}
635impl<'a> From<&NaiveType<'a>> for Binary<'a> {
636    fn from(nt: &NaiveType<'a>) -> Self {
637        Self { ctype: nt.clone() }
638    }
639}
640
641#[cfg(feature = "arbitrary")]
642impl<'a> Arbitrary<'a> for Binary<'a> {
643    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
644        let ctype: NaiveType = u.arbitrary()?;
645        if matches!(
646            ctype.main.0.to_ascii_lowercase().as_slice(),
647            b"multipart" | b"message" | b"text"
648        ) {
649            return Err(arbitrary::Error::IncorrectFormat);
650        }
651        Ok(Self { ctype })
652    }
653}
654
655#[cfg(test)]
656mod tests {
657    use super::*;
658    use crate::text::charset::EmailCharset;
659    use crate::text::quoted::QuotedString;
660
661    #[test]
662    fn test_parameter() {
663        assert_eq!(
664            parameter(b"charset=utf-8"),
665            Ok((
666                &b""[..],
667                Parameter {
668                    name: MIMEAtom(b"charset"[..].into()),
669                    value: MIMEWord::Atom(MIMEAtom(b"utf-8"[..].into())),
670                }
671            )),
672        );
673        assert_eq!(
674            parameter(b"charset=\"utf-8\""),
675            Ok((
676                &b""[..],
677                Parameter {
678                    name: MIMEAtom(b"charset"[..].into()),
679                    value: MIMEWord::Quoted(QuotedString(vec!["utf-8"[..].into()])),
680                }
681            )),
682        );
683    }
684
685    #[test]
686    fn test_content_type_plaintext() {
687        let (rest, nt) = naive_type(b"text/plain;\r\n charset=utf-8 ; hello=yolo").unwrap();
688        assert_eq!(rest, &b""[..]);
689
690        assert_eq!(
691            nt.to_type(),
692            AnyType::Text(Text {
693                charset: EmailCharset::utf8(),
694                subtype: TextSubtype::Plain,
695                other_params: vec![Parameter {
696                    name: MIMEAtom(b"hello"[..].into()),
697                    value: MIMEWord::Atom(MIMEAtom(b"yolo"[..].into())),
698                }],
699            })
700        );
701    }
702
703    // old invalid form of text/plain
704    #[test]
705    fn test_content_type_plaintext_old() {
706        let (rest, nt) = naive_type(b"  text ").unwrap();
707        assert_eq!(rest, &b""[..]);
708        assert_eq!(
709            nt.to_type(),
710            AnyType::Text(Text {
711                charset: EmailCharset::US_ASCII,
712                subtype: TextSubtype::Plain,
713                other_params: vec![],
714            })
715        );
716
717        let (rest, nt) = naive_type(b"text;\r\n charset=utf-8 ; hello=yolo").unwrap();
718        assert_eq!(rest, &b""[..]);
719        assert_eq!(
720            nt.to_type(),
721            AnyType::Text(Text {
722                charset: EmailCharset::utf8(),
723                subtype: TextSubtype::Plain,
724                other_params: vec![Parameter {
725                    name: MIMEAtom(b"hello"[..].into()),
726                    value: MIMEWord::Atom(MIMEAtom(b"yolo"[..].into())),
727                }],
728            })
729        );
730    }
731
732    #[test]
733    fn test_content_type_multipart() {
734        let (rest, nt) = naive_type(b"multipart/mixed;\r\n\tboundary=\"--==_mimepart_64a3f2c69114f_2a13d020975fe\";\r\n\tcharset=UTF-8").unwrap();
735        assert_eq!(rest, &[]);
736        assert_eq!(
737            nt.to_type(),
738            AnyType::Multipart(Multipart {
739                subtype: MultipartSubtype::Mixed,
740                boundary: Some("--==_mimepart_64a3f2c69114f_2a13d020975fe".into()),
741                other_params: vec![Parameter {
742                    name: MIMEAtom(b"charset"[..].into()),
743                    value: MIMEWord::Atom(MIMEAtom(b"UTF-8"[..].into())),
744                }],
745            })
746        );
747    }
748
749    #[test]
750    fn test_content_type_message() {
751        let (rest, nt) = naive_type(b"message/rfc822").unwrap();
752        assert_eq!(rest, &[]);
753        assert_eq!(
754            nt.to_type(),
755            AnyType::Message(Message {
756                subtype: MessageSubtype::RFC822,
757                params: vec![],
758            })
759        );
760
761        // unknown message subtype: treat it as "application/octet-stream"
762        // (i.e. opaque Binary part)
763        let (rest, nt) = naive_type(b"message/delivery-status").unwrap();
764        assert_eq!(rest, &[]);
765        assert_eq!(
766            nt.to_type(),
767            AnyType::Binary(Binary {
768                ctype: NaiveType {
769                    main: MIMEAtom(b"message"[..].into()),
770                    sub: MIMEAtom(b"delivery-status"[..].into()),
771                    params: vec![],
772                }
773            })
774        );
775    }
776
777    #[test]
778    fn test_content_type_comment() {
779        let (rest, nt) = naive_type(b"text/plain; charset=\"us-ascii\" (Plain text)").unwrap();
780        assert_eq!(rest, &[]);
781
782        assert_eq!(
783            nt.to_type(),
784            AnyType::Text(Text {
785                subtype: TextSubtype::Plain,
786                charset: EmailCharset::from(b"us-ascii"),
787                other_params: vec![],
788            })
789        );
790    }
791
792    #[test]
793    fn test_broken_content_type() {
794        let (rest, nt) = naive_type(b"abc/def/ghi; charset=us-ascii").unwrap();
795        assert_eq!(rest, &[]);
796
797        assert_eq!(
798            nt,
799            NaiveType {
800                main: MIMEAtom(b"abc".into()),
801                sub: MIMEAtom(b"def".into()),
802                params: vec![Parameter {
803                    name: MIMEAtom(b"charset"[..].into()),
804                    value: MIMEWord::Atom(MIMEAtom(b"us-ascii"[..].into())),
805                }],
806            }
807        );
808    }
809
810    #[test]
811    fn test_parameter_ascii() {
812        assert_eq!(
813            parameter(b"charset = (simple) us-ascii (Plain text)"),
814            Ok((
815                &b""[..],
816                Parameter {
817                    name: MIMEAtom(b"charset"[..].into()),
818                    value: MIMEWord::Atom(MIMEAtom(b"us-ascii"[..].into())),
819                }
820            ))
821        );
822    }
823
824    #[test]
825    fn test_parameter_list_semicolons() {
826        // we allow final semicolons
827        assert_eq!(
828            parameter_list(b";boundary=\"festivus\";"),
829            Ok((
830                &b""[..],
831                vec![Parameter {
832                    name: MIMEAtom(b"boundary"[..].into()),
833                    value: MIMEWord::Quoted(QuotedString(vec!["festivus"[..].into()])),
834                }],
835            ))
836        );
837
838        assert_eq!(
839            parameter_list(b"; charset=UTF-8; format=flowed; "),
840            Ok((
841                &b""[..],
842                vec![
843                    Parameter {
844                        name: MIMEAtom(b"charset"[..].into()),
845                        value: MIMEWord::Atom(MIMEAtom(b"UTF-8"[..].into())),
846                    },
847                    Parameter {
848                        name: MIMEAtom(b"format"[..].into()),
849                        value: MIMEWord::Atom(MIMEAtom(b"flowed"[..].into())),
850                    },
851                ],
852            ))
853        );
854
855        // semicolons can appear between quotes, this is part of the normal
856        // quote syntax
857        assert_eq!(
858            parameter_list(b"; boundary=\"abc;def\"; foo=bar"),
859            Ok((
860                &b""[..],
861                vec![
862                    Parameter {
863                        name: MIMEAtom(b"boundary"[..].into()),
864                        value: MIMEWord::Quoted(QuotedString(vec!["abc;def"[..].into()])),
865                    },
866                    Parameter {
867                        name: MIMEAtom(b"foo"[..].into()),
868                        value: MIMEWord::Atom(MIMEAtom(b"bar"[..].into())),
869                    },
870                ],
871            ))
872        );
873    }
874
875    #[test]
876    fn test_parameter_list_broken() {
877        // these test cases come from real-world emails with broken parameter lists
878        assert_eq!(
879            parameter_list(b"; name=threadTest.ml; charset="),
880            Ok((
881                &b""[..],
882                vec![Parameter {
883                    name: MIMEAtom(b"name".into()),
884                    value: MIMEWord::Atom(MIMEAtom(b"threadTest.ml".into())),
885                },]
886            ))
887        );
888
889        // Anytime emits emails with 'charset: UTF-8'; we add support for those...
890        assert_eq!(
891            parameter_list(b"; charset: UTF-8; foo=bar"),
892            Ok((
893                &b""[..],
894                vec![
895                    Parameter {
896                        name: MIMEAtom(b"charset".into()),
897                        value: MIMEWord::Atom(MIMEAtom(b"UTF-8".into())),
898                    },
899                    Parameter {
900                        name: MIMEAtom(b"foo".into()),
901                        value: MIMEWord::Atom(MIMEAtom(b"bar".into())),
902                    },
903                ]
904            ))
905        );
906
907        assert_eq!(
908            // Example emitted by inria CASA. An extra space was inserted before
909            // the Content-Transfer-Encoding header name, making it a
910            // continuation of the previous Content-Type header as per line
911            // folding rules... This ends up being read as an extra parameter
912            // "thanks" to the recovery of ':' as '='...
913            parameter_list(
914                b"; name=\"calendar.ics\";method=REQUEST;\n Content-Transfer-Encoding: 8bit;"
915            ),
916            Ok((
917                &b""[..],
918                vec![
919                    Parameter {
920                        name: MIMEAtom(b"name".into()),
921                        value: MIMEWord::Quoted(QuotedString(vec!["calendar.ics".into()])),
922                    },
923                    Parameter {
924                        name: MIMEAtom(b"method".into()),
925                        value: MIMEWord::Atom(MIMEAtom(b"REQUEST".into())),
926                    },
927                    Parameter {
928                        name: MIMEAtom(b"Content-Transfer-Encoding".into()),
929                        value: MIMEWord::Atom(MIMEAtom(b"8bit".into())),
930                    },
931                ]
932            ))
933        );
934
935        assert_eq!(
936            parameter_list(b"; name=threadTest.ml foo=bar; baz=qux"),
937            Ok((
938                &b""[..],
939                vec![
940                    Parameter {
941                        name: MIMEAtom(b"name".into()),
942                        value: MIMEWord::Atom(MIMEAtom(b"threadTest.ml".into())),
943                    },
944                    Parameter {
945                        name: MIMEAtom(b"baz".into()),
946                        value: MIMEWord::Atom(MIMEAtom(b"qux".into())),
947                    },
948                ]
949            ))
950        );
951    }
952
953    #[test]
954    fn test_roundtrip_unknown() {
955        let raw = b"Foo/Bar; bAr=Unknown; uU=zorrO";
956        let (rest, nt) = naive_type(raw).unwrap();
957        assert_eq!(rest, &[]);
958        let t: AnyType = nt.to_type();
959        assert!(matches!(t, AnyType::Binary(_)));
960        let printed = crate::print::tests::print_to_vec(t);
961        assert_eq!(
962            String::from_utf8_lossy(raw),
963            String::from_utf8_lossy(&printed)
964        )
965    }
966}