Skip to main content

email/
rfc5322.rs

1//! Module with helpers for dealing with RFC 5322
2
3use super::header::{Header, HeaderMap};
4use super::results::{ParsingError, ParsingResult};
5use super::rfc2047::decode_rfc2047;
6
7pub const MIME_LINE_LENGTH: usize = 78;
8
9trait Rfc5322Character {
10    /// Is considered a special character by RFC 5322 Section 3.2.3
11    fn is_special(&self) -> bool;
12    /// Is considered to be a VCHAR by RFC 5234 Appendix B.1
13    fn is_vchar(&self) -> bool;
14    /// Is considered to be field text as defined by RFC 5322 Section 3.6.8
15    fn is_ftext(&self) -> bool;
16
17    fn is_atext(&self) -> bool {
18        self.is_vchar() && !self.is_special()
19    }
20}
21
22impl Rfc5322Character for char {
23    fn is_ftext(&self) -> bool {
24        match *self {
25            '!'..='9' | ';'..='~' => true,
26            _ => false,
27        }
28    }
29
30    fn is_special(&self) -> bool {
31        match *self {
32            '(' | ')' | '<' | '>' | '[' | ']' | ':' | ';' | '@' | '\\' | ',' | '.' | '\"' | ' ' => {
33                true
34            }
35            _ => false,
36        }
37    }
38
39    fn is_vchar(&self) -> bool {
40        match *self {
41            '!'..='~' => true,
42            _ => false,
43        }
44    }
45}
46
47/// RFC 5322 base parser for parsing
48///  `atom`, `dot-atom`, `quoted-string`, `phrase`, `message`
49///
50/// This should prove useful for parsing other things that appear in RFC 5322,
51/// as most are based off these core items.
52///
53/// It also implements a stack for tracking the position.
54/// This allows the simple implementation of backtracking, by pushing the position
55/// before a test and popping it if the test should fail.
56/// [unstable]
57pub struct Rfc5322Parser<'s> {
58    s: &'s str,
59    pos: usize,
60    pos_stack: Vec<usize>,
61}
62
63impl<'s> Rfc5322Parser<'s> {
64    /// Make a new parser, initialized with the given string.
65    /// [unstable]
66    pub fn new(source: &'s str) -> Rfc5322Parser<'s> {
67        Rfc5322Parser {
68            s: source,
69            pos: 0,
70            pos_stack: Vec::new(),
71        }
72    }
73
74    /// Push the current position onto the stack.
75    /// [unstable]
76    pub fn push_position(&mut self) {
77        self.pos_stack.push(self.pos);
78    }
79
80    /// Move the position back to the last entry pushed
81    /// [unstable]
82    pub fn pop_position(&mut self) {
83        match self.pos_stack.pop() {
84            Some(pos) => {
85                self.pos = pos;
86            }
87            None => panic!("Popped position stack too far"),
88        }
89    }
90
91    /// Consume a message from the input.
92    ///
93    /// Returns as a map of the headers and the body text.
94    ///
95    /// A message is defined as:
96    ///
97    /// `fields = *field
98    /// body = text
99    /// message = fields CRLF body`
100    /// [unstable]
101    pub fn consume_message(&mut self) -> Option<(HeaderMap, String)> {
102        let mut headers = HeaderMap::new();
103        while !self.eof() {
104            let header = self.consume_header();
105            if let Some(header) = header {
106                headers.insert(header);
107            } else {
108                // Check end of headers as marked by CRLF
109                if !self.eof() && self.peek_linebreak() {
110                    assert!(self.consume_linebreak());
111                }
112
113                break;
114            }
115        }
116
117        // Whatever remains is the body
118        let body = self.s[self.pos..].to_string();
119        self.pos = self.s.len();
120
121        Some((headers, body))
122    }
123
124    /// Consume a header from the input.
125    ///
126    /// A header is defined as:
127    ///
128    /// `ftext = "!".."9" / ";".."~"
129    /// field-name = 1*ftext
130    /// field = field-name *LWSP ":" unstructured`
131    /// [unstable]
132    pub fn consume_header(&mut self) -> Option<Header> {
133        let last_pos = self.pos;
134        // Parse field-name
135        let field_name = self.consume_while(|c| c.is_ftext());
136        self.consume_linear_whitespace();
137        if field_name.is_empty() || self.eof() || self.peek() != ':' {
138            // Fail to parse if we didn't see a field, we're at the end of input
139            // or we haven't just seen a ":"
140            self.pos = last_pos;
141            None
142        } else {
143            // Consume the ":" and any leading whitespace
144            self.consume_char();
145            self.consume_linear_whitespace();
146            let field_value = self.consume_unstructured();
147
148            // don't just panic!()
149            if !self.consume_linebreak() {
150                return None;
151            };
152
153            Some(Header::new(field_name, field_value))
154        }
155    }
156
157    /// Consume an unstructured from the input.
158    /// [unstable]
159    pub fn consume_unstructured(&mut self) -> String {
160        let mut result = String::new();
161        while !self.eof() {
162            if self.peek_linebreak() {
163                // Check for folding whitespace, if it wasn't, then
164                // we're done parsing
165                if !self.consume_folding_whitespace() {
166                    break;
167                }
168            }
169
170            result.push_str(&self.consume_while(|c| c.is_vchar() || c == ' ' || c == '\t')[..])
171        }
172        result
173    }
174
175    /// Consume folding whitespace.
176    ///
177    /// This is a CRLF followed by one or more whitespace character.
178    ///
179    /// Returns true if whitespace was consume
180    /// [unstable]
181    pub fn consume_folding_whitespace(&mut self) -> bool {
182        // Remember where we were, in case this isn't folding whitespace
183        let current_position = self.pos;
184        let is_fws = if !self.eof() && self.consume_linebreak() {
185            match self.consume_char() {
186                Some(' ') | Some('\t') => true,
187                _ => false,
188            }
189        } else {
190            false
191        };
192
193        if is_fws {
194            // This was a folding whitespace, so consume all linear whitespace
195            self.consume_linear_whitespace();
196        } else {
197            // Reset back if we didn't see a folding whitespace
198            self.pos = current_position;
199        }
200
201        is_fws
202    }
203
204    /// Consume a word from the input.
205    ///
206    /// A word is defined as:
207    ///
208    /// `word = atom / quoted-string`
209    ///
210    /// If `allow_dot_atom` is true, then `atom` can be a `dot-atom` in this phrase.
211    /// [unstable]
212    pub fn consume_word(&mut self, allow_dot_atom: bool) -> Option<String> {
213        let p = self.peek();
214        if p == '"' {
215            // Word is a quoted string
216            self.consume_quoted_string()
217        } else {
218            // Word is an atom (or not a word)
219            self.consume_atom(allow_dot_atom)
220        }
221    }
222
223    /// Consume a phrase from the input.
224    ///
225    /// A phrase is defined as:
226    ///
227    /// `phrase = 1*word`
228    ///
229    /// If `allow_dot_atom` is true, then `atom` can be a `dot-atom` in this phrase.
230    /// [unstable]
231    pub fn consume_phrase(&mut self, allow_dot_atom: bool) -> Option<String> {
232        let mut phrase = String::new();
233
234        while !self.eof() {
235            self.consume_linear_whitespace();
236
237            let word = match self.consume_word(allow_dot_atom) {
238                Some(x) => x,
239                None => break, // If it's not a word, it's no longer
240                               // in a phrase, so stop.
241            };
242
243            let w_slice = &word[..];
244            // RFC 2047 encoded words start with =?, end with ?=
245            let decoded_word = if w_slice.starts_with("=?") && w_slice.ends_with("?=") {
246                match decode_rfc2047(w_slice) {
247                    Some(w) => w,
248                    None => w_slice.to_string(),
249                }
250            } else {
251                w_slice.to_string()
252            };
253
254            // Make sure we put a leading space on, if this isn't the first insertion
255            if !phrase.is_empty() {
256                phrase.push_str(" ");
257            }
258            phrase.push_str(&decoded_word[..]);
259        }
260
261        if !phrase.is_empty() {
262            Some(phrase)
263        } else {
264            None
265        }
266    }
267
268    /// Consume a quoted string from the input
269    /// [unstable]
270    pub fn consume_quoted_string(&mut self) -> Option<String> {
271        if self.peek() != '"' {
272            // Fail if we were called wrong
273            None
274        } else {
275            let mut quoted_string = String::new();
276            let mut inside_escape = false;
277            let mut terminated = false;
278            // Consume the leading "
279            self.consume_char();
280            while !terminated && !self.eof() {
281                match self.peek() {
282                    '\\' if !inside_escape => {
283                        // If we were not already being escaped, consume the
284                        // escape character and mark that we're being escaped.
285                        self.consume_char();
286                        inside_escape = true;
287                    }
288                    '"' if !inside_escape => {
289                        // If this is a DQUOTE and we haven't seen an escape character,
290                        // consume it and mark that we should break from the loop
291                        self.consume_char();
292                        terminated = true;
293                    }
294                    _ => {
295                        // Any old character gets pushed in
296                        if let Some(c) = self.consume_char() {
297                            quoted_string.push(c);
298                            // Clear any escape character state we have
299                            inside_escape = false;
300                        }
301                        // TODO: Should this return a Result<> instead of an Option<>?
302                        else {
303                            return None;
304                        }
305                    }
306                }
307            }
308
309            if inside_escape || !terminated {
310                // Return an error state if we're still expecting a character
311                None
312            } else {
313                Some(quoted_string)
314            }
315        }
316    }
317
318    /// Consume an atom from the input.
319    ///
320    /// If `allow_dot` is true, then also allow '.' to be considered as an
321    /// atext character.
322    /// [unstable]
323    pub fn consume_atom(&mut self, allow_dot: bool) -> Option<String> {
324        if self.eof() || !self.peek().is_atext() {
325            None
326        } else {
327            Some(self.consume_while(|c| c.is_atext() || (allow_dot && c == '.')))
328        }
329    }
330
331    /// Consume LWSP (Linear whitespace)
332    /// [unstable]
333    pub fn consume_linear_whitespace(&mut self) {
334        self.consume_while(|c| c == '\t' || c == ' ');
335    }
336
337    /// Consume a single character from the input.
338    #[inline]
339    /// [unstable]
340    pub fn consume_char(&mut self) -> Option<char> {
341        if self.eof() {
342            return None;
343        }
344        let c = self.peek();
345        self.pos += c.len_utf8();
346        Some(c)
347    }
348
349    // Consume a linebreak: \r\n, \r or \n
350    /// [unstable]
351    pub fn consume_linebreak(&mut self) -> bool {
352        if self.eof() {
353            return false;
354        }
355
356        let start_pos = self.pos;
357
358        match self.consume_char() {
359            Some('\r') => {
360                // Try to consume a single \n following the \r
361                if !self.eof() && self.peek() == '\n' {
362                    self.consume_char();
363                }
364                true
365            }
366            Some('\n') => true,
367            _ => {
368                self.pos = start_pos;
369                false
370            }
371        }
372    }
373
374    // Peek at the current character and determine whether it's (part of) a linebreak
375    /// [unstable]
376    pub fn peek_linebreak(&mut self) -> bool {
377        match self.peek() {
378            '\r' | '\n' => true,
379            _ => false,
380        }
381    }
382
383    /// Consume a set of characters, each passed to `test` until this function
384    /// returns false.
385    ///
386    /// The position after calling this function will be pointing to the character
387    /// which caused a false result from `test`.
388    ///
389    /// Returns the string of characters that returned true for the test function.
390    #[inline]
391    /// [unstable]
392    pub fn consume_while<F: Fn(char) -> bool>(&mut self, test: F) -> String {
393        let start_pos = self.pos;
394        while !self.eof() && test(self.peek()) {
395            self.consume_char();
396        }
397        self.s[start_pos..self.pos].to_string()
398    }
399
400    /// Peek at the current character.
401    ///
402    /// Note that this does not do any bounds checking.
403    #[inline]
404    /// [unstable]
405    pub fn peek(&self) -> char {
406        self.s[self.pos..].chars().next().unwrap()
407    }
408
409    /// Check that `!self.eof() && self.peek() == c`
410    #[inline]
411    /// [unstable]
412    pub fn assert_char(&self, c: char) -> ParsingResult<()> {
413        self.assert_not_eof()?;
414
415        let actual_c = self.peek();
416        if c == actual_c {
417            Ok(())
418        } else {
419            Err(ParsingError::new(format!(
420                "Expected {}, got {}",
421                c, actual_c
422            )))
423        }
424    }
425
426    /// Check that we have not reached the end of the input.
427    #[inline]
428    /// [unstable]
429    pub fn assert_not_eof(&self) -> ParsingResult<()> {
430        if self.eof() {
431            Err(ParsingError::new("Reached EOF.".to_string()))
432        } else {
433            Ok(())
434        }
435    }
436
437    /// Get the unconsumed string. Should only be used for debugging purposes!
438    #[inline]
439    /// [unstable]
440    pub fn peek_to_end(&self) -> &str {
441        &self.s[self.pos..]
442    }
443
444    /// Returns true if we have reached the end of the input.
445    #[inline]
446    /// [unstable]
447    pub fn eof(&self) -> bool {
448        self.pos >= self.s.len()
449    }
450}
451
452/// Type for constructing RFC 5322 messages
453pub struct Rfc5322Builder {
454    result: String,
455}
456
457impl Rfc5322Builder {
458    /// Make a new builder, with an empty string
459    pub fn new() -> Rfc5322Builder {
460        Rfc5322Builder {
461            result: "".to_string(),
462        }
463    }
464
465    pub fn result(&self) -> &String {
466        &self.result
467    }
468
469    pub fn emit_raw(&mut self, s: &str) {
470        self.result.push_str(s);
471    }
472
473    pub fn emit_folded(&mut self, s: &str) {
474        let mut cur_len = 0;
475        let mut last_space = 0;
476        let mut last_cut = 0;
477
478        for (pos, c) in s.char_indices() {
479            match c {
480                ' ' => {
481                    last_space = pos;
482                }
483                '\r' => {
484                    cur_len = 0;
485                }
486                '\n' => {
487                    cur_len = 0;
488                }
489                _ => {}
490            }
491
492            cur_len += 1;
493            // We've reached our line length, so
494            if cur_len >= MIME_LINE_LENGTH && last_space > 0 {
495                // Emit the string from the last place we cut it to the
496                // last space that we saw
497                self.emit_raw(&s[last_cut..last_space]);
498                // ... and get us ready to put out the continuation
499                self.emit_raw("\r\n\t");
500
501                // Reset our counters
502                cur_len = 0;
503                last_cut = last_space + s[last_space..].chars().next().unwrap().len_utf8();
504                last_space = 0;
505            }
506        }
507
508        // Finally, emit everything left in the string
509        self.emit_raw(&s[last_cut..]);
510    }
511}
512
513impl Default for Rfc5322Builder {
514    fn default() -> Self {
515        Rfc5322Builder::new()
516    }
517}
518
519#[cfg(test)]
520mod tests {
521    use super::*;
522
523    struct PhraseTestCase<'s> {
524        input: &'s str,
525        output: &'s str,
526        name: &'s str,
527    }
528
529    #[test]
530    fn test_parser() {
531        let mut parser = Rfc5322Parser::new("");
532        assert!(parser.consume_message().is_some());
533
534        let mut parser = Rfc5322Parser::new("\r\n");
535        assert!(parser.consume_message().is_some());
536
537        let mut parser = Rfc5322Parser::new("From: Garbage@-\r\n");
538        assert!(parser.consume_message().is_some());
539
540        let mut parser = Rfc5322Parser::new("From: Garbage@");
541        assert!(parser.consume_message().is_some());
542
543        let mut parser = Rfc5322Parser::new("From: Garnage@-");
544        assert!(parser.consume_message().is_some());
545    }
546
547    #[test]
548    fn test_consume_phrase() {
549        let tests = [
550            PhraseTestCase {
551                input: "\"test phrase\"", output: "test phrase",
552                name: "Simple quoted-string"
553            },
554            PhraseTestCase {
555                input: "\"test \\\"phrase\\\"\"", output: "test \"phrase\"",
556                name: "quoted-string with escape character"
557            },
558            PhraseTestCase {
559                input: "\"=?utf-8?q?encoded=20q-string?=\"", output: "encoded q-string",
560                name: "Encoded quoted-string"
561            },
562            PhraseTestCase {
563                input: "atom test", output: "atom test",
564                name: "Collection of atoms"
565            },
566            PhraseTestCase {
567                input: "=?utf-8?q?encoded=20atom?=", output: "encoded atom",
568                name: "Encoded atom"
569            },
570            PhraseTestCase {
571                input: "Mix of atoms \"and quoted strings\"", output: "Mix of atoms and quoted strings",
572                name: "Mix of atoms and quoted strings"
573            },
574            PhraseTestCase {
575                input: "=?utf-8?q?encoded=20atoms?= mixed with \"unencoded\" \"=?utf-8?b?YW5kIGVuY29kZWQgcS1zdHJpbmdz?=\"",
576                output: "encoded atoms mixed with unencoded and encoded q-strings",
577                name: "Mix of atoms, q-strings of differing encodings"
578            },
579            PhraseTestCase {
580                input: "\"John Smith\" <test@example.org>", output: "John Smith",
581                name: "Stop consuming phrase at \"special\" character",
582            }
583        ];
584
585        for t in tests.iter() {
586            let mut p = Rfc5322Parser::new(t.input);
587            let phrase = p.consume_phrase(false);
588            assert!(phrase.is_some(), format!("{} returned Some", t.name));
589            let test_name = format!("{} == {} for {}", phrase.clone().unwrap(), t.output, t.name);
590            assert!(phrase.unwrap() == t.output.to_string(), test_name);
591        }
592    }
593
594    struct MessageTestCase<'s> {
595        input: &'s str,
596        headers: Vec<(&'s str, &'s str)>,
597        body: &'s str,
598    }
599
600    #[test]
601    fn test_consume_message() {
602        let tests = vec![
603            MessageTestCase {
604                input: "From: \"Joe Blogs\" <joe@example.org>\r\n\r\nBody",
605                headers: vec![
606                    ("From", "\"Joe Blogs\" <joe@example.org>"),
607                ],
608                body: "Body",
609            },
610            // Support parsing messages with \n instead of \r\n
611            MessageTestCase {
612                input: "From: \"Joe Blogs\" <joe@example.org>\n\nBody",
613                headers: vec![
614                    ("From", "\"Joe Blogs\" <joe@example.org>"),
615                ],
616                body: "Body",
617            },
618            MessageTestCase {
619                input: "From: \"Joe Blogs\" <joe@example.org>\r\n\r\nMultiline\r\nBody",
620                headers: vec![
621                    ("From", "\"Joe Blogs\" <joe@example.org>"),
622                ],
623                body: "Multiline\r\nBody",
624            },
625            MessageTestCase {
626                input: "From: \"Joe Blogs\" <joe@example.org>\r\nTo: \"John Doe\" <john@example.org>\r\n\r\nMultiple headers",
627                headers: vec![
628                    ("From", "\"Joe Blogs\" <joe@example.org>"),
629                    ("To", "\"John Doe\" <john@example.org>"),
630                ],
631                body: "Multiple headers",
632            },
633            MessageTestCase {
634                input: "Folded-Header: Some content that is \r\n\t wrapped with a tab.\r\n\r\nFolding whitespace test",
635                headers: vec![
636                    ("Folded-Header", "Some content that is wrapped with a tab."),
637                ],
638                body: "Folding whitespace test",
639            },
640            MessageTestCase {
641                input: "Folded-Header: Some content that is \r\n  wrapped with spaces.\r\n\r\nFolding whitespace test",
642                headers: vec![
643                    ("Folded-Header", "Some content that is wrapped with spaces."),
644                ],
645                body: "Folding whitespace test",
646            },
647        ];
648
649        for test in tests.iter() {
650            let mut p = Rfc5322Parser::new(test.input);
651            let message = p.consume_message();
652            match message {
653                Some((headers, body)) => {
654                    assert_eq!(body, test.body.to_string());
655                    for &(header_title, header_value) in test.headers.iter() {
656                        let matching_headers = headers.find(&header_title.to_string()).unwrap();
657                        assert!(
658                            matching_headers
659                                .iter()
660                                .filter(|h| {
661                                    let val: String = h.get_value().unwrap();
662                                    val == header_value.to_string()
663                                })
664                                .count()
665                                > 0
666                        );
667                    }
668                }
669                None => panic!("Failed to parse message"),
670            };
671        }
672    }
673
674    #[test]
675    fn test_builder_folding() {
676        struct BuildFoldTest<'s> {
677            input: &'s str,
678            expected: &'s str,
679        }
680
681        let tests = vec![
682            BuildFoldTest {
683                input: "A long line that should get folded on a space at some point around here, possibly at this point.",
684                expected: "A long line that should get folded on a space at some point around here,\r\n\
685                \tpossibly at this point.",
686            },
687            BuildFoldTest {
688                input: "A long line that should get folded on a space at some point around here, possibly at this point. And yet more content that will get folded onto another line.",
689                expected: "A long line that should get folded on a space at some point around here,\r\n\
690                \tpossibly at this point. And yet more content that will get folded onto another\r\n\
691                \tline.",
692            },
693        ];
694
695        for test in tests.into_iter() {
696            let mut gen = Rfc5322Builder::new();
697            gen.emit_folded(test.input);
698            assert_eq!(gen.result(), &test.expected.to_string());
699        }
700    }
701}