Skip to main content

eml_codec/
header.rs

1#[cfg(feature = "arbitrary")]
2use arbitrary::Arbitrary;
3use bounded_static::ToStatic;
4use nom::{
5    branch::alt,
6    bytes::complete::{tag, take_while1},
7    character::complete::space0,
8    combinator::{all_consuming, consumed, eof, map, rest},
9    multi::many0,
10    sequence::{pair, terminated, tuple},
11    IResult, Parser,
12};
13use std::borrow::Cow;
14use std::fmt;
15#[cfg(feature = "tracing")]
16use tracing::warn;
17
18use crate::i18n::ContainsUtf8;
19use crate::print::{Formatter, Print};
20use crate::raw_input::RawInput;
21use crate::text::misc_token;
22use crate::text::whitespace::{foldable_line, obs_crlf};
23#[cfg(any(feature = "tracing-recover", feature = "tracing-unsupported"))]
24use crate::utils::bytes_to_trace_string;
25#[cfg(feature = "arbitrary")]
26use crate::{arbitrary_utils::arbitrary_vec_nonempty_where, fuzz_eq::FuzzEq};
27
28// A valid header field name.
29#[derive(PartialEq, Clone, ContainsUtf8, ToStatic)]
30#[contains_utf8(false)]
31pub struct FieldName<'a>(pub Cow<'a, [u8]>);
32impl<'a> FieldName<'a> {
33    pub fn bytes(&'a self) -> &'a [u8] {
34        &self.0
35    }
36}
37impl<'a> fmt::Debug for FieldName<'a> {
38    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
39        fmt.debug_tuple("FieldName")
40            .field(&String::from_utf8_lossy(&self.0))
41            .finish()
42    }
43}
44impl<'a> Print for FieldName<'a> {
45    fn print(&self, fmt: &mut impl Formatter) {
46        fmt.write_bytes(&self.0)
47    }
48}
49#[cfg(feature = "arbitrary")]
50impl<'a> Arbitrary<'a> for FieldName<'a> {
51    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
52        let bytes: Vec<u8> = arbitrary_vec_nonempty_where(u, |c| is_ftext(*c), b'X')?;
53        Ok(FieldName(Cow::Owned(bytes)))
54    }
55}
56#[cfg(feature = "arbitrary")]
57impl<'a> FuzzEq for FieldName<'a> {
58    fn fuzz_eq(&self, other: &Self) -> bool {
59        self.0 == other.0
60    }
61}
62
63// Intermediate AST for two-step parsing of header fields. Structured headers
64// are then parsed from this.
65//
66// A `FieldRaw` corresponds to a header field after performing "framing", i.e.
67// identifier header field boundaries: it is the raw data found between two
68// header boundaries.
69//
70// - `Good` corresponds to a header field that could be split into a
71// valid name and arbitrary body. It does not say anything about the validity of
72// the body. The body is stored as a raw slice because it will be parsed further.
73//
74// - `Bad` corresponds to a header field that could not be split into a name and
75// body; it basically contains arbitrary data.
76#[derive(PartialEq, Clone)]
77pub struct FieldRaw<'a> {
78    pub name: FieldName<'a>,
79    pub body: &'a [u8],
80}
81impl<'a> fmt::Debug for FieldRaw<'a> {
82    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
83        fmt.debug_struct("header::FieldRaw")
84            .field("name", &self.name)
85            .field("body", &String::from_utf8_lossy(self.body))
86            .finish()
87    }
88}
89impl<'a> ContainsUtf8 for FieldRaw<'a> {
90    fn contains_utf8(&self) -> bool {
91        self.body.iter().any(|c| !c.is_ascii())
92    }
93}
94
95/// Parse headers as raw key/values.
96/// Stop at an empty line or at EOF.
97#[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
98pub fn header_kv(input: &[u8]) -> (&[u8], Vec<FieldRaw<'_>>) {
99    // SAFETY: `field_raw_opt` only accepts non-empty inputs
100    let (input, mut fields) = many0(field_raw_opt)(input).unwrap();
101    // SAFETY: `rest` (last case) always succeeds.
102    let (input, terminator) = alt((
103        // empty line
104        map(obs_crlf, |_| None),
105        // The empty line is optional if there is no body following the headers,
106        // so we must also accept EOF.
107        map(eof, |_| None),
108        // For best-effort parsing, we also try to parse any remaining bytes before
109        // EOF (as if EOF was a CRLF).
110        map(consumed(pair(field_name, rest)), |(_i, (name, body))| {
111            #[cfg(feature = "tracing-recover")]
112            warn!(input = %bytes_to_trace_string(_i), "raw field before EOF");
113            Some(FieldRaw { name, body })
114        }),
115        map(rest, |_i: &[u8]| {
116            #[cfg(feature = "tracing-unsupported")]
117            warn!(input = %bytes_to_trace_string(_i), "raw bytes before EOF");
118            None
119        }),
120    ))(input)
121    .unwrap();
122
123    fields.push(terminator);
124
125    // drop `None`s ("bad" fields)
126    let fields = fields.into_iter().flatten().collect();
127
128    (input, fields)
129}
130
131// NOTE: field_raw only recognizes non-empty inputs.
132fn field_raw(input: &[u8]) -> IResult<&[u8], FieldRaw<'_>> {
133    map(pair(field_name, foldable_line(false)), |(name, body)| {
134        FieldRaw { name, body }
135    })(input)
136}
137
138// A best-effort version of `field_raw` that also recognizes lines that cannot
139// be parsed as a field name and body. (It returns `None` in this case.)
140// NOTE: `field_raw_opt` only recognizes non-empty inputs.
141// NOTE: furthermore, in the "best effort" case, `foldable_line` only
142// recognizes non-empty lines; this is important so that it does not consume the
143// final empty line (obs_crlf) that terminates `header_kv`.
144fn field_raw_opt(input: &[u8]) -> IResult<&[u8], Option<FieldRaw<'_>>> {
145    alt((
146        map(field_raw, Some),
147        // best-effort: a (non-empty) foldable line that cannot even be parsed as
148        // a field name and body. We drop it afterwards.
149        map(foldable_line(true), |_i| {
150            #[cfg(feature = "tracing-unsupported")]
151            warn!(input = %bytes_to_trace_string(_i), "malformed raw header line");
152            None
153        }),
154    ))(input)
155}
156
157/// Header field name
158/// ```abnf
159/// field-name =   1*ftext
160/// ftext      =   %d33-57 /          ; Printable US-ASCII
161///                %d59-126           ;  characters not including
162///                                   ;  ":".
163/// followed by *WSP in the obsolete syntax
164/// ```
165pub fn field_name(input: &[u8]) -> IResult<&[u8], FieldName<'_>> {
166    terminated(
167        take_while1(is_ftext).map(|s| FieldName(Cow::Borrowed(s))),
168        tuple((space0, tag(b":"))),
169    )(input)
170}
171
172fn is_ftext(c: u8) -> bool {
173    (0x21..=0x7E).contains(&c) && c != 0x3A
174}
175
176// Parse a raw header field as an unstructured header
177
178#[derive(Debug, PartialEq, Clone, ContainsUtf8, ToStatic)]
179#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
180pub struct Unstructured<'a> {
181    pub name: FieldName<'a>,
182    pub body: misc_token::Unstructured<'a>,
183    pub raw_body: RawInput<'a>,
184}
185
186impl<'a> Unstructured<'a> {
187    // TODO: don't throw away the errors
188    pub fn from_raw(f: &FieldRaw<'a>) -> Option<Self> {
189        let (_, body) = all_consuming(misc_token::unstructured)(f.body).ok()?;
190        Some(Unstructured {
191            name: f.name.clone(),
192            body,
193            raw_body: f.body.into(),
194        })
195    }
196}
197impl<'a> Print for Unstructured<'a> {
198    fn print(&self, fmt: &mut impl Formatter) {
199        print_unstructured(fmt, &self.name.0, &self.body)
200    }
201}
202
203// Helper to print structured headers
204
205pub fn print<T: Print>(fmt: &mut impl Formatter, name: &[u8], body: T) {
206    fmt.write_bytes(name);
207    fmt.write_bytes(b":");
208    fmt.write_fws();
209    body.print(fmt);
210    fmt.write_crlf();
211}
212
213pub fn print_unstructured<'a>(
214    fmt: &mut impl Formatter,
215    name: &[u8],
216    body: &misc_token::Unstructured<'a>,
217) {
218    fmt.write_bytes(name);
219    fmt.write_bytes(b":");
220    body.print(fmt);
221    fmt.write_crlf();
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227    use misc_token::{UnstrToken, UnstrTxtKind};
228
229    #[test]
230    fn test_field_raw_good() {
231        let (rest, f) = field_raw(b"X-Unknown: something something\r\n").unwrap();
232        assert!(rest.is_empty());
233        assert_eq!(
234            f,
235            FieldRaw {
236                name: FieldName(b"X-Unknown".into()),
237                body: &b" something something"[..],
238            }
239        );
240
241        let (rest, f) = field_raw(b"X-Foo:\r\n").unwrap();
242        assert!(rest.is_empty());
243        assert_eq!(
244            f,
245            FieldRaw {
246                name: FieldName(b"X-Foo".into()),
247                body: &b""[..],
248            }
249        );
250
251        // with line folding
252        let (rest, f) = field_raw(b"From:\r\n foo@example.com\r\n abcd\r\n").unwrap();
253        assert!(rest.is_empty());
254        assert_eq!(
255            f,
256            FieldRaw {
257                name: FieldName(b"From".into()),
258                body: &b"\r\n foo@example.com\r\n abcd"[..],
259            }
260        );
261    }
262
263    #[test]
264    fn test_unstructured() {
265        let u = Unstructured::from_raw(&FieldRaw {
266            name: FieldName(b"X-Unknown".into()),
267            body: &b" something something"[..],
268        })
269        .unwrap();
270        assert_eq!(
271            u,
272            Unstructured {
273                name: FieldName(b"X-Unknown".into()),
274                body: misc_token::Unstructured(vec![
275                    UnstrToken::from_plain(" ", UnstrTxtKind::Fws),
276                    UnstrToken::from_plain("something", UnstrTxtKind::Txt),
277                    UnstrToken::from_plain(" ", UnstrTxtKind::Fws),
278                    UnstrToken::from_plain("something", UnstrTxtKind::Txt),
279                ]),
280                raw_body: b" something something".into(),
281            }
282        )
283    }
284
285    #[test]
286    fn test_no_body() {
287        let (rest, fields) = header_kv(b"X-Foo: something something\r\nX-Bar: something else\r\n");
288        assert!(rest.is_empty());
289        assert_eq!(
290            fields,
291            vec![
292                FieldRaw {
293                    name: FieldName(b"X-Foo".into()),
294                    body: b" something something"
295                },
296                FieldRaw {
297                    name: FieldName(b"X-Bar".into()),
298                    body: b" something else"
299                },
300            ]
301        )
302    }
303
304    #[test]
305    fn test_no_headers() {
306        let (rest, fields) = header_kv(b"\r\nthe rest");
307        assert_eq!(rest, b"the rest");
308        assert_eq!(fields, vec![]);
309
310        let (rest, fields) = header_kv(b"\nthe rest");
311        assert_eq!(rest, b"the rest");
312        assert_eq!(fields, vec![]);
313
314        let (rest, fields) = header_kv(b"\n\t\t\t");
315        assert_eq!(rest, b"\t\t\t");
316        assert_eq!(fields, vec![]);
317
318        let (rest, fields) = header_kv(b"\n\t\t\t\r\n");
319        assert_eq!(rest, b"\t\t\t\r\n");
320        assert_eq!(fields, vec![]);
321    }
322
323    #[test]
324    fn test_best_effort_good_before_eof() {
325        let (rest, fields) = header_kv(b"X-Foo: something something\r\nX-Bar: something else");
326        assert!(rest.is_empty());
327        assert_eq!(
328            fields,
329            vec![
330                FieldRaw {
331                    name: FieldName(b"X-Foo".into()),
332                    body: b" something something"
333                },
334                FieldRaw {
335                    name: FieldName(b"X-Bar".into()),
336                    body: b" something else"
337                },
338            ]
339        )
340    }
341
342    #[test]
343    fn test_best_effort_bad_before_eof() {
344        let (rest, fields) = header_kv(b"X-Foo: something something\r\nrandom junk");
345        assert!(rest.is_empty());
346        assert_eq!(
347            fields,
348            vec![FieldRaw {
349                name: FieldName(b"X-Foo".into()),
350                body: b" something something"
351            },]
352        )
353    }
354}