eml_parser/
parser.rs

1use std::{fs, iter::Peekable, path::Path, sync::LazyLock};
2
3use regex::Regex;
4
5use crate::eml::*;
6use crate::errors::EmlError;
7
8#[allow(non_camel_case_types, clippy::upper_case_acronyms)]
9#[derive(Debug)]
10enum LwspState {
11    ReadingContent,
12    LF,     // Found a line feed (without first seeing a carriage return). EOLs are messed up.
13    CR,     // Found a carriage return
14    CRLF,   // Found a carriage return followed by a line feed
15    CRLFCR, // Found a CRLF followed by a new CR
16    EndOfHeader_LFLF,
17    EndOfHeader_CRCR,
18    EndOfHeader_CRLFCRLF,
19}
20
21#[allow(clippy::upper_case_acronyms)]
22#[derive(Debug)]
23enum InputType {
24    CR,
25    LF,
26    WSP,
27    NonWsp,
28}
29
30#[derive(Debug)]
31enum BodyHandling {
32    None,
33    Preview(usize),
34    All,
35}
36
37#[derive(Debug)]
38pub struct EmlParser {
39    content: String,
40    position: usize,
41
42    body_handling: BodyHandling,
43}
44
45/// for matching: "John Smith" <jsmith@example.com>
46static NAME_ADDR_RE: LazyLock<Regex> =
47    LazyLock::new(|| Regex::new(r#"^"(.?+)" <\s*([^>]+)\s*>[ ,]*"#).unwrap());
48
49/// for matching the email in brackets without name: <jsmith@example.com>
50static ADDR_RE1: LazyLock<Regex> =
51    LazyLock::new(|| Regex::new(r#"^\s*<\s*([^>]+)\s*>[ ,]*"#).unwrap());
52
53/// for matching the email without brackets: jsmith@example.com
54static ADDR_RE2: LazyLock<Regex> =
55    LazyLock::new(|| Regex::new(r#"^\s*([^"<>@]+@[^"<>@\s,]+)[ ,]*"#).unwrap());
56
57impl EmlParser {
58    /// Read an .eml file from disk, parsing its contents.
59    /// Note that the current implementation loads the entire file to memory since `std::fs::File`
60    /// doesn't provide an iterator over `char` that could give a `Peekable`.
61    // One possible TODO is rolling something like https://github.com/C4K3/peekable-reader-rs into
62    // this project.
63    pub fn from_file(filename: impl AsRef<Path>) -> Result<Self, EmlError> {
64        let content = fs::read_to_string(filename)?;
65
66        Ok(EmlParser {
67            content,
68            position: 0,
69            body_handling: BodyHandling::All,
70        })
71    }
72
73    pub fn from_string(content: String) -> Self {
74        EmlParser {
75            content,
76            position: 0,
77            body_handling: BodyHandling::All,
78        }
79    }
80
81    // Builder pattern methods
82    pub fn ignore_body(mut self) -> Self {
83        self.body_handling = BodyHandling::None;
84        self
85    }
86
87    pub fn with_body(mut self) -> Self {
88        self.body_handling = BodyHandling::All;
89        self
90    }
91
92    pub fn with_body_preview(mut self, bytes: usize) -> Self {
93        self.body_handling = BodyHandling::Preview(bytes);
94        self
95    }
96
97    pub fn parse(&mut self) -> Result<Eml, EmlError> {
98        if self.content.is_empty() {
99            return Err(EmlError::UnexpectedEndOfStream(String::from("Empty input")));
100        }
101
102        let content = self.content.to_string(); // making a copy so we can have a mutable reference
103        let chars = content.chars();
104        let mut char_input = chars.peekable();
105        let eml = self.parse_email(&mut char_input)?;
106
107        Ok(eml)
108    }
109
110    fn parse_email<T: Iterator<Item = char>>(
111        &mut self,
112        char_input: &mut Peekable<T>,
113    ) -> Result<Eml, EmlError> {
114        let headers = self.parse_header_fields(char_input)?;
115
116        let mut result = Eml {
117            body: self.parse_body(),
118            ..Default::default()
119        };
120
121        for header in headers {
122            match (&header.name[..], &header.value) {
123                ("To", _) => result.to = Some(header.value),
124                ("From", _) => result.from = Some(header.value),
125                ("Subject", HeaderFieldValue::Unstructured(subj)) => {
126                    result.subject = Some((*subj).to_string())
127                }
128                _ => result.headers.push(header),
129            }
130        }
131
132        Ok(result)
133    }
134
135    fn parse_header_fields<T: Iterator<Item = char>>(
136        &mut self,
137        char_input: &mut Peekable<T>,
138    ) -> Result<Vec<HeaderField>, EmlError> {
139        use HeaderFieldValue::*;
140        let mut headers = Vec::new();
141
142        while let Some((name, value, eoh)) = self.read_raw_header_field(char_input)? {
143            // Attempt to structure this header value
144            let value = match (&name[..], value) {
145                ("From", v)
146                | ("To", v)
147                | ("Reply-To", v)
148                | ("Delivered-To", v)
149                | ("X-Original-To", v)
150                | ("Return-Path", v) => EmlParser::parse_email_address(v),
151                (_, v) if v.is_empty() => Empty,
152                (_, v) => match rfc2047_decoder::decode(&v) {
153                    Ok(decoded) => Unstructured(decoded),
154                    Err(_) => Unstructured(v),
155                },
156            };
157            headers.push(HeaderField { name, value });
158
159            if eoh {
160                break;
161            }
162        }
163        Ok(headers)
164    }
165
166    fn parse_email_address(value: String) -> HeaderFieldValue {
167        // Email address header values can span multiple lines. Clean those up first
168        let mut remaining = value.replace(['\n', '\r'], "");
169
170        let mut found_addresses = Vec::new();
171
172        while !remaining.is_empty() {
173            if let Some(cap) = NAME_ADDR_RE.captures(&remaining) {
174                let name = cap.get(1).unwrap().as_str().to_string();
175                let address = cap.get(2).unwrap().as_str().to_string();
176                found_addresses.push(EmailAddress::NameAndEmailAddress { name, address });
177
178                let entire_match = cap.get(0).unwrap();
179                remaining = remaining[entire_match.end()..].to_string();
180            } else if let Some(cap) = ADDR_RE1.captures(&remaining) {
181                let address = cap.get(1).unwrap().as_str().to_string();
182                found_addresses.push(EmailAddress::AddressOnly { address });
183
184                let entire_match = cap.get(0).unwrap();
185                remaining = remaining[entire_match.end()..].to_string();
186            } else if let Some(cap) = ADDR_RE2.captures(&remaining) {
187                let address = cap.get(1).unwrap().as_str().to_string();
188                found_addresses.push(EmailAddress::AddressOnly { address });
189
190                let entire_match = cap.get(0).unwrap();
191                remaining = remaining[entire_match.end()..].to_string();
192            } else {
193                // Something weird
194                return HeaderFieldValue::Unstructured(value);
195            }
196        }
197
198        //match found_addresses {
199        if found_addresses.len() == 1 {
200            HeaderFieldValue::SingleEmailAddress(found_addresses.into_iter().next().unwrap())
201        } else {
202            HeaderFieldValue::MultipleEmailAddresses(found_addresses)
203        }
204    }
205
206    fn read_raw_header_field<T: Iterator<Item = char>>(
207        &mut self,
208        char_input: &mut Peekable<T>,
209    ) -> Result<Option<(String, String, bool)>, EmlError> {
210        match char_input.peek() {
211            Some('\n') | Some('\r') => return Ok(None), // finding a CR or LF when looking for a header means the body is about to start
212            Some(_) => {}
213            None => {
214                return Err(EmlError::UnexpectedEndOfStream(String::from(
215                    "Expected the beginning of a header field name",
216                )))
217            }
218        };
219
220        if let Some(name) = self.read_field_name(char_input)? {
221            match char_input.peek() {
222                Some(':') => {
223                    self.position += 1;
224                    char_input.next();
225                }
226                Some(c) => {
227                    return Err(EmlError::UnexpectedContent(format!(
228                        "Expected ':' to terminate header field '{}'; got '{}' (byte value {})",
229                        name, c, *c as u8
230                    )))
231                }
232                None => {
233                    return Err(EmlError::UnexpectedEndOfStream(format!(
234                        "Expected ':' to terminate header field '{}'",
235                        name
236                    )))
237                }
238            };
239
240            match char_input.peek() {
241                Some(' ') => {
242                    self.position += 1;
243                    char_input.next();
244                }
245                Some(_) => {}
246                None => {
247                    return Err(EmlError::UnexpectedEndOfStream(format!(
248                        "Expected non-empty content for header field '{}'",
249                        name
250                    )))
251                }
252            };
253
254            let (value, eoh) = self.read_field_body(char_input)?;
255
256            Ok(Some((name, value, eoh)))
257        } else {
258            Ok(None)
259        }
260    }
261
262    // 1*<any CHAR, excluding CTLs, SPACE, and ":">
263    fn read_field_name<T: Iterator<Item = char>>(
264        &mut self,
265        char_input: &mut Peekable<T>,
266    ) -> Result<Option<String>, EmlError> {
267        let start_position = self.position;
268        let mut end_position = self.position;
269
270        while let Some(c) = char_input.peek() {
271            if c == &'\n' || c == &'\r' {
272                // we shouldn't see CR or LF in a field name; if we do, it's likely the end of the
273                // header
274                return Ok(None);
275            } else if c != &' ' && c != &':' && !c.is_control() {
276                end_position += c.len_utf8();
277                char_input.next();
278            } else {
279                break;
280            }
281        }
282
283        if end_position == self.content.len() {
284            Err(EmlError::UnexpectedEndOfStream(String::from(
285                "Expected content for header field",
286            )))
287        } else {
288            self.position = end_position;
289            Ok(Some(String::from(
290                &self.content[start_position..end_position],
291            )))
292        }
293    }
294
295    /// Read until we've found a CRLF that does NOT have white whitespace after it.
296    ///
297    /// On success, this returns the body and a bool indicating end-of-header.
298    fn read_field_body<T: Iterator<Item = char>>(
299        &mut self,
300        char_input: &mut Peekable<T>,
301    ) -> Result<(String, bool), EmlError> {
302        let start_position = self.position;
303        let mut end_position = self.position;
304        let mut state = LwspState::ReadingContent;
305
306        while let Some(next_char) = char_input.peek() {
307            let ws = EmlParser::next_char_type(*next_char);
308            let len = next_char.len_utf8();
309
310            match (&state, ws) {
311                (LwspState::ReadingContent, InputType::WSP)
312                | (LwspState::ReadingContent, InputType::NonWsp) => {
313                    // While reading input, anything not CR or LF gets included
314                    char_input.next();
315                    end_position += len;
316                }
317
318                (LwspState::ReadingContent, InputType::CR) => {
319                    state = LwspState::CR;
320                    char_input.next();
321                    end_position += len;
322                }
323
324                (LwspState::ReadingContent, InputType::LF) => {
325                    state = LwspState::LF;
326                    char_input.next();
327                    end_position += len;
328                }
329
330                (LwspState::LF, InputType::WSP)
331                | (LwspState::CR, InputType::WSP)
332                | (LwspState::CRLF, InputType::WSP) => {
333                    // A newline followed by whitespace is the definition of linear whitespace.
334                    // This is an input that spans multiple lines; for example:
335                    //       X-Received: by 0000:111:222e:: with SMTP id abcdef;
336                    //               Mon, 13 Apr 2020 14:04:07 -0700 (PDT)
337                    // In this case, we return to the ReadingContent state
338                    state = LwspState::ReadingContent;
339                    char_input.next();
340                    end_position += len;
341                }
342
343                (LwspState::LF, InputType::NonWsp)
344                | (LwspState::CR, InputType::NonWsp)
345                | (LwspState::CRLF, InputType::NonWsp) => {
346                    // A newline followed by non-whitespace means we're at the end of this
347                    // header item.
348                    break;
349                }
350
351                (LwspState::LF, InputType::LF) => {
352                    // Found the end of the header in the form of LF + LF
353                    state = LwspState::EndOfHeader_LFLF;
354                    char_input.next();
355                    end_position += len;
356                    break;
357                }
358                (LwspState::CR, InputType::CR) => {
359                    // Found the end of the header in the form of CR + CR
360                    state = LwspState::EndOfHeader_CRCR;
361                    char_input.next();
362                    end_position += len;
363                    break;
364                }
365                (LwspState::CRLFCR, InputType::LF) => {
366                    // Found the end of the header in the form of CRLF + CRLF
367                    state = LwspState::EndOfHeader_CRLFCRLF;
368                    char_input.next();
369                    end_position += len;
370                    break;
371                }
372
373                (LwspState::CR, InputType::LF) => {
374                    // CR+LF will probably lead to CRLF+CRLF
375                    state = LwspState::CRLF;
376                    char_input.next();
377                    end_position += len;
378                }
379
380                (LwspState::CRLF, InputType::CR) => {
381                    // Approaching CRLF+CRLF
382                    state = LwspState::CRLFCR;
383                    char_input.next();
384                    end_position += len;
385                }
386
387                // Rather strict handling of line endings when we're at the border of the header
388                // and body. According to RFC0822, the body "is separated from the headers by a
389                // null line (i.e., a line with nothing preceding the CRLF)."
390                // In reality, we'll see "\n\n" or possibly even "\r\r" separating lines.
391                // It's not unreasonable to think that we'd see other unusual input such as
392                // "\r\n\n" separating the header from the body, but for now, I'm only accepting
393                // "\r\r", "\n\n", and "\r\n\r\n". The following situations are all erroneous:
394                // We should really just see
395                (LwspState::CRLFCR, _) => {
396                    // CRLF + CR shouldn't be followed by anything but a line feed.
397                    return Err(EmlError::UnexpectedContent(String::from(
398                        "Found CRLF+CR in header without expected LF",
399                    )));
400                }
401
402                (LwspState::CRLF, InputType::LF) => {
403                    // CRLF should have had an additional CR before the LF.
404                    return Err(EmlError::UnexpectedContent(String::from(
405                        "Found CRLF+LF in header without expected CR first",
406                    )));
407                }
408
409                (LwspState::LF, InputType::CR) => {
410                    // LF after non-breaking character should be followed by another LF, not CR.
411                    return Err(EmlError::UnexpectedContent(String::from(
412                        "Found LF+CR in header as line delimeter",
413                    )));
414                }
415
416                // These match arms won't be hit because we only set the state above before breaking from the loop
417                (LwspState::EndOfHeader_LFLF, _)
418                | (LwspState::EndOfHeader_CRCR, _)
419                | (LwspState::EndOfHeader_CRLFCRLF, _) => unreachable!(),
420            }
421        }
422
423        self.position = end_position;
424
425        // Depending on the state (end of individual header value or the full thing, type of line ending), the return value
426        // has a different end position
427        let value_end = end_position
428            - match state {
429                LwspState::LF => 1,
430                LwspState::CR => 1,
431                LwspState::CRLF => 2,
432                LwspState::EndOfHeader_LFLF => 2,
433                LwspState::EndOfHeader_CRCR => 2,
434                LwspState::EndOfHeader_CRLFCRLF => 4,
435                LwspState::ReadingContent | LwspState::CRLFCR => unreachable!(),
436            };
437
438        let end_of_header = matches!(
439            state,
440            LwspState::EndOfHeader_LFLF
441                | LwspState::EndOfHeader_CRCR
442                | LwspState::EndOfHeader_CRLFCRLF
443        );
444
445        Ok((
446            String::from(&self.content[start_position..value_end]),
447            end_of_header,
448        ))
449    }
450
451    fn next_char_type(c: char) -> InputType {
452        match c {
453            '\n' => InputType::LF,
454            '\r' => InputType::CR,
455            ' ' | '\t' => InputType::WSP,
456            // According to RFC0822, linear whitespace is CRLF + (space or tab).
457            // There's no clear indication how a form feed (0xC, dec 12) should play into this.
458            // Hopefully this isn't an issue, but I am explicitly defining non-CR, non-LF
459            // whitespace as being no different than space and tab.
460            c if c.is_ascii_whitespace() => InputType::WSP,
461            _ => InputType::NonWsp,
462        }
463    }
464
465    fn parse_body(&mut self) -> Option<String> {
466        match self.body_handling {
467            BodyHandling::None => None,
468            BodyHandling::Preview(bytes) => {
469                let bytes_remaining = self.content.len() - self.position;
470                let bytes = bytes.min(bytes_remaining);
471
472                Some(String::from(
473                    &self.content
474                        [self.position..self.content.floor_char_boundary(self.position + bytes)],
475                ))
476            }
477            BodyHandling::All => Some(String::from(&self.content[self.position..])),
478        }
479    }
480}
481
482#[cfg(test)]
483mod tests {
484    use super::*;
485
486    const TEST_HEADER: &str = r#"Delivered-To: john.public@example.com
487Received: by 2002:ac9:700e:0:0:0:0:0 with SMTP id w14csp4493771ocr;
488        Mon, 13 Apr 2020 14:04:07 -0700 (PDT)
489X-Google-Smtp-Source: APiQypIbRnWumT0t4TOJHlvDOVkxfqZ8A8HBzdR39kgdjVQQfKUsY/DkKFeZI53Ux1Z3reMRqaCl
490X-Received: by 2002:a37:aa8e:: with SMTP id t136mr9744838qke.175.1586811847065;
491        Mon, 13 Apr 2020 14:04:07 -0700 (PDT)
492foo: bar
493
494This is the start of the body
495"#;
496
497    #[test]
498    fn basic_test() {
499        let eml = EmlParser::from_string(TEST_HEADER.to_string())
500            .with_body()
501            .parse();
502
503        assert!(eml.is_ok());
504        let eml = eml.unwrap();
505
506        assert_eq!(5, eml.headers.len());
507
508        let delivered_to: &HeaderField = &eml.headers[0];
509        assert_eq!("Delivered-To", delivered_to.name);
510        assert_eq!(
511            HeaderFieldValue::SingleEmailAddress(EmailAddress::AddressOnly {
512                address: ("john.public@example.com".to_string())
513            }),
514            delivered_to.value
515        );
516
517        let received: &HeaderField = &eml.headers[1];
518        assert_eq!("Received", received.name);
519        assert_eq!(
520            HeaderFieldValue::Unstructured(
521                r#"by 2002:ac9:700e:0:0:0:0:0 with SMTP id w14csp4493771ocr;
522        Mon, 13 Apr 2020 14:04:07 -0700 (PDT)"#
523                    .to_string()
524            ),
525            received.value
526        );
527
528        assert_eq!("X-Google-Smtp-Source".to_string(), eml.headers[2].name);
529        assert_eq!(
530            HeaderFieldValue::Unstructured(
531                "APiQypIbRnWumT0t4TOJHlvDOVkxfqZ8A8HBzdR39kgdjVQQfKUsY/DkKFeZI53Ux1Z3reMRqaCl"
532                    .to_string()
533            ),
534            eml.headers[2].value
535        );
536
537        assert_eq!("X-Received".to_string(), eml.headers[3].name);
538        assert_eq!(
539            HeaderFieldValue::Unstructured(
540                r#"by 2002:a37:aa8e:: with SMTP id t136mr9744838qke.175.1586811847065;
541        Mon, 13 Apr 2020 14:04:07 -0700 (PDT)"#
542                    .to_string()
543            ),
544            eml.headers[3].value
545        );
546
547        assert_eq!("foo".to_string(), eml.headers[4].name);
548        assert_eq!(
549            HeaderFieldValue::Unstructured("bar".to_string()),
550            eml.headers[4].value
551        );
552
553        assert!(eml.body.is_some());
554        let body = eml.body.unwrap();
555        assert_eq!("This is the start of the body\n", body);
556    }
557
558    #[test]
559    fn basic_test_with_truncated_body() {
560        let eml: Eml = EmlParser::from_string(TEST_HEADER.to_string())
561            .with_body_preview(15)
562            .parse()
563            .unwrap(); // Result
564
565        let body = eml.body.unwrap();
566        let expected = &"This is the start of the body\n"[0..15];
567        assert_eq!(expected, body);
568    }
569
570    #[test]
571    fn basic_test_with_truncation_gt_body_length() {
572        let eml: Eml = EmlParser::from_string(TEST_HEADER.to_string())
573            .with_body_preview(150)
574            .parse()
575            .unwrap(); // Result
576
577        assert_eq!(5, eml.headers.len());
578
579        let body = eml.body.unwrap();
580        assert_eq!("This is the start of the body\n", body);
581    }
582
583    #[test]
584    fn body_truncated_in_multibyte_char() {
585        let result = EmlParser::from_string("Foo: ok\n\nBá".to_string())
586            .with_body_preview(2)
587            .parse()
588            .unwrap();
589
590        let body = result.body.unwrap();
591        assert_eq!("B", body);
592    }
593
594    #[test]
595    fn parse_emails() {
596        let parsed =
597            EmlParser::parse_email_address(r#""John Smith" <jsmith@example.com>"#.to_string());
598
599        let jsmith = EmailAddress::NameAndEmailAddress {
600            name: "John Smith".to_string(),
601            address: "jsmith@example.com".to_string(),
602        };
603        let expected = HeaderFieldValue::SingleEmailAddress(jsmith);
604
605        assert_eq!(parsed, expected);
606    }
607
608    #[test]
609    fn parse_and_display_emails() {
610        let single = r#""John Q. Public" < john@example.com>, "#.to_string();
611        let parsed = EmlParser::parse_email_address(single);
612
613        match &parsed {
614            HeaderFieldValue::SingleEmailAddress(EmailAddress::NameAndEmailAddress {
615                name,
616                address,
617            }) => {
618                assert_eq!(name, "John Q. Public");
619                assert_eq!(address, "john@example.com");
620            }
621            _ => panic!("Expected SingleEmailAddress, got something else"),
622        };
623
624        assert_eq!(parsed.to_string(), r#""John Q. Public" <john@example.com>"#);
625    }
626
627    #[test]
628    fn test_errors() {
629        let filename = "nonexistent.eml";
630        let parsed = EmlParser::from_file(filename);
631        assert!(parsed.is_err());
632
633        let errval = parsed.unwrap_err();
634        assert!(matches!(errval, EmlError::IoError(_inner)));
635    }
636
637    #[test]
638    fn last_header_empty() {
639        let eml: Eml = EmlParser::from_string("Foo: ok\nBar: \n\nHello".to_string())
640            .with_body()
641            .parse()
642            .unwrap();
643
644        assert_eq!(2, eml.headers.len());
645
646        let foo = &eml.headers[0];
647        let HeaderField { name, value } = foo;
648        assert_eq!("Foo", name);
649        assert_eq!(&HeaderFieldValue::Unstructured("ok".to_string()), value);
650
651        let bar = &eml.headers[1];
652        let HeaderField { name, value } = bar;
653        assert_eq!("Bar", name);
654        assert_eq!(&HeaderFieldValue::Empty, value);
655
656        assert_eq!(Some("Hello".to_string()), eml.body);
657    }
658
659    #[test]
660    fn last_header_get_full_value() {
661        let eml: Eml = EmlParser::from_string("Foo: ok\nBar: super\n\nHello".to_string())
662            .with_body()
663            .parse()
664            .unwrap();
665
666        assert_eq!(2, eml.headers.len());
667
668        let foo = &eml.headers[0];
669        let HeaderField { name, value } = foo;
670        assert_eq!("Foo", name);
671        assert_eq!(&HeaderFieldValue::Unstructured("ok".to_string()), value);
672
673        let bar = &eml.headers[1];
674        let HeaderField { name, value } = bar;
675        assert_eq!("Bar", name);
676        assert_eq!(&HeaderFieldValue::Unstructured("super".to_string()), value);
677
678        assert_eq!(Some("Hello".to_string()), eml.body);
679    }
680
681    /// See https://github.com/aeshirey/EmlParser/issues/14
682    #[test]
683    fn nonascii() {
684        // This previously gave a incorrect results of "tés" and " ba"
685        let result = EmlParser::from_string("Foo: tést\nBar: bar\n\nHello".to_string())
686            .ignore_body()
687            .parse()
688            .expect("Should parse");
689
690        let headers = result.headers;
691        assert_eq!(2, headers.len());
692
693        let HeaderField { name, value } = &headers[0];
694        assert_eq!("Foo", name);
695        assert_eq!("tést", value.to_string());
696
697        let HeaderField { name, value } = &headers[1];
698        assert_eq!("Bar", name);
699        assert_eq!("bar", value.to_string());
700
701        // This previously crashed due to the letter + diacritic being at the end of a header value
702        let result = EmlParser::from_string("Foo: testé\nBar: bar\n\nHello".to_string())
703            .ignore_body()
704            .parse()
705            .expect("Should parse");
706
707        let headers = result.headers;
708        assert_eq!(2, headers.len());
709
710        let HeaderField { name, value } = &headers[0];
711        assert_eq!("Foo", name);
712        assert_eq!("testé", value.to_string());
713
714        let HeaderField { name, value } = &headers[1];
715        assert_eq!("Bar", name);
716        assert_eq!("bar", value.to_string());
717
718        // This previously crashed due to the letter + diacritic being at the end of a header name
719        let result = EmlParser::from_string("ō: test\n\n".to_string())
720            .ignore_body()
721            .parse()
722            .unwrap();
723
724        let headers = result.headers;
725        assert_eq!(1, headers.len());
726
727        let HeaderField { name, value } = &headers[0];
728        assert_eq!("ō", name);
729        assert_eq!("test", value.to_string());
730    }
731
732    #[test]
733    fn test_parse_phishing_emails() {
734        for n in 0..10 {
735            let filename = format!("test_emails/{n}.eml");
736
737            let mut e = EmlParser::from_file(&filename).expect("Load file");
738            let _parsed = e.parse().expect("Parse file");
739        }
740    }
741
742    #[test]
743    fn test_parse_rfc2047() {
744        let mut e = EmlParser::from_file("test_emails/rfc2047.eml").unwrap();
745        let parsed = e.parse().expect("Parse rfc2047.eml");
746        let schöne = HeaderFieldValue::Unstructured("Schöne Grüße".to_string());
747
748        for h in parsed.headers {
749            if h.name == "Salutation" {
750                assert_eq!(h.value, schöne);
751            }
752        }
753    }
754}