mail_parser/parsers/fields/
unstructured.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use std::borrow::Cow;
8
9use crate::{parsers::MessageStream, HeaderValue};
10struct UnstructuredParser<'x> {
11    token_start: usize,
12    token_end: usize,
13    tokens: Vec<Cow<'x, str>>,
14    last_is_encoded: bool,
15}
16
17impl<'x> UnstructuredParser<'x> {
18    fn add_token(&mut self, stream: &MessageStream<'x>) {
19        if self.token_start > 0 {
20            if !self.tokens.is_empty() {
21                self.tokens.push(" ".into());
22            }
23            self.tokens.push(String::from_utf8_lossy(
24                stream.bytes(self.token_start - 1..self.token_end),
25            ));
26
27            self.token_start = 0;
28            self.last_is_encoded = false;
29        }
30    }
31
32    fn add_rfc2047(&mut self, token: String) {
33        if !self.last_is_encoded {
34            self.tokens.push(" ".into());
35        }
36        self.tokens.push(token.into());
37        self.last_is_encoded = true;
38    }
39}
40
41impl<'x> MessageStream<'x> {
42    pub fn parse_unstructured(&mut self) -> HeaderValue<'x> {
43        let mut parser = UnstructuredParser {
44            token_start: 0,
45            token_end: 0,
46            tokens: Vec::new(),
47            last_is_encoded: true,
48        };
49
50        while let Some(ch) = self.next() {
51            match ch {
52                b'\n' => {
53                    parser.add_token(self);
54
55                    if !self.try_next_is_space() {
56                        return match parser.tokens.len() {
57                            1 => HeaderValue::Text(parser.tokens.pop().unwrap()),
58                            0 => HeaderValue::Empty,
59                            _ => HeaderValue::Text(parser.tokens.concat().into()),
60                        };
61                    } else {
62                        continue;
63                    }
64                }
65                b' ' | b'\t' | b'\r' => {
66                    continue;
67                }
68                b'=' if self.peek_char(b'?') => {
69                    self.checkpoint();
70                    if let Some(token) = self.decode_rfc2047() {
71                        parser.add_token(self);
72                        parser.add_rfc2047(token);
73                        continue;
74                    }
75                    self.restore();
76                }
77                _ => (),
78            }
79
80            if parser.token_start == 0 {
81                parser.token_start = self.offset();
82            }
83
84            parser.token_end = self.offset();
85        }
86
87        HeaderValue::Empty
88    }
89}
90
91#[cfg(test)]
92mod tests {
93    use crate::parsers::{fields::load_tests, MessageStream};
94
95    #[test]
96    fn parse_unstructured() {
97        for test in load_tests::<String>("unstructured.json") {
98            assert_eq!(
99                MessageStream::new(test.header.as_bytes())
100                    .parse_unstructured()
101                    .unwrap_text(),
102                test.expected,
103                "failed for {:?}",
104                test.header
105            );
106        }
107    }
108}