Skip to main content

eml_codec/mime/
mechanism.rs

1use crate::i18n::ContainsUtf8;
2use crate::print::{Formatter, Print, ToStringFromPrint};
3use crate::text::whitespace::cfws;
4use crate::text::words::{mime_atom, MIMEAtom};
5#[cfg(feature = "tracing-recover")]
6use crate::utils::bytes_to_trace_string;
7use bounded_static::ToStatic;
8use eml_codec_derives::instrument_input;
9use nom::{
10    branch::alt,
11    bytes::complete::{tag, tag_no_case},
12    combinator::{consumed, map, opt, value},
13    sequence::{delimited, tuple},
14    IResult,
15};
16#[cfg(feature = "tracing")]
17use tracing::warn;
18#[cfg(feature = "arbitrary")]
19use {crate::fuzz_eq::FuzzEq, arbitrary::Arbitrary};
20
21#[derive(Debug, Clone, PartialEq, Default, ToStatic, ToStringFromPrint)]
22#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
23pub enum Mechanism<'a> {
24    #[default]
25    _7Bit,
26    _8Bit,
27    Binary,
28    QuotedPrintable,
29    Base64,
30    Other(MIMEAtom<'a>),
31}
32impl<'a> Mechanism<'a> {
33    pub fn as_bytes(&self) -> &[u8] {
34        match self {
35            Self::_7Bit => b"7bit",
36            Self::_8Bit => b"8bit",
37            Self::Binary => b"binary",
38            Self::QuotedPrintable => b"quoted-printable",
39            Self::Base64 => b"base64",
40            Self::Other(x) => &x.0,
41        }
42    }
43}
44
45impl<'a> Print for Mechanism<'a> {
46    fn print(&self, fmt: &mut impl Formatter) {
47        fmt.write_bytes(self.as_bytes())
48    }
49}
50impl<'a> ContainsUtf8 for Mechanism<'a> {
51    fn contains_utf8(&self) -> bool {
52        false
53    }
54}
55impl<'a> Mechanism<'a> {
56    // RFC2046: for entities of type "multipart", no encoding other than 7bit,
57    // 8bit and binary is permitted.
58    //
59    // Real-world emails do sometimes specify other encodings, but test data
60    // suggests that each time the incorrect encoding should just be ignored.
61    // This function thus converts a `Mechanism` to ensure it belongs to one of
62    // these three encodings by returning the default mechanism in case of an
63    // invalid value.
64    #[cfg_attr(feature = "tracing", tracing::instrument)]
65    pub fn to_multipart_encoding(&self) -> Mechanism<'static> {
66        use bounded_static::ToBoundedStatic;
67        match self {
68            Mechanism::_7Bit | Mechanism::_8Bit | Mechanism::Binary => self.to_static(),
69            _ => {
70                #[cfg(feature = "tracing-recover")]
71                warn!(mechanism = ?self, "to_multipart_encoding: ignoring invalid mechanism");
72                Mechanism::default()
73            }
74        }
75    }
76
77    // RFC2046: for entities of type "message/rfc822", no encoding other than
78    // 7bit, 8bit and binary is permitted.
79    //
80    // We implement the same logic as for multipart entities, but define a
81    // separate function to allow defining recovery logic specific to each case,
82    // if needed. In particular, this is traced as tracing-unsupported for now
83    // as we lack enough real-world data to know if this is an acceptable
84    // recovery strategy.
85    #[cfg_attr(feature = "tracing", tracing::instrument)]
86    pub fn to_message_rfc822_encoding(&self) -> Mechanism<'static> {
87        use bounded_static::ToBoundedStatic;
88        match self {
89            Mechanism::_7Bit | Mechanism::_8Bit | Mechanism::Binary => self.to_static(),
90            _ => {
91                #[cfg(feature = "tracing-unsupported")]
92                warn!(mechanism = ?self, "to_message_encoding: ignoring invalid mechanism");
93                Mechanism::default()
94            }
95        }
96    }
97}
98
99#[instrument_input("tracing")]
100pub fn mechanism(input: &[u8]) -> IResult<&[u8], Mechanism<'_>> {
101    alt((
102        delimited(
103            opt(cfws),
104            alt((
105                value(Mechanism::_7Bit, tag_no_case("7bit")),
106                value(Mechanism::_8Bit, tag_no_case("8bit")),
107                value(Mechanism::Binary, tag_no_case("binary")),
108                value(Mechanism::QuotedPrintable, tag_no_case("quoted-printable")),
109                value(Mechanism::Base64, tag_no_case("base64")),
110            )),
111            // the ";" is not in the RFC but was found in some emails
112            tuple((opt(cfws), opt(tag(";")), opt(cfws))),
113        ),
114        map(consumed(mime_atom), |(_i, tok)| {
115            #[cfg(feature = "tracing-recover")]
116            warn!(input = %bytes_to_trace_string(_i), "unknown mechanism");
117            Mechanism::Other(tok)
118        }),
119    ))(input)
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125    #[test]
126    fn test_mechanism() {
127        assert_eq!(mechanism(b"7bit"), Ok((&b""[..], Mechanism::_7Bit)),);
128
129        assert_eq!(
130            mechanism(b"(youhou) 8bit"),
131            Ok((&b""[..], Mechanism::_8Bit)),
132        );
133
134        assert_eq!(mechanism(b"8Bit;"), Ok((&b""[..], Mechanism::_8Bit)),);
135
136        assert_eq!(
137            mechanism(b"(blip) bInArY (blip blip)"),
138            Ok((&b""[..], Mechanism::Binary)),
139        );
140
141        assert_eq!(mechanism(b" base64 "), Ok((&b""[..], Mechanism::Base64)),);
142
143        assert_eq!(
144            mechanism(b" Quoted-Printable "),
145            Ok((&b""[..], Mechanism::QuotedPrintable)),
146        );
147    }
148}