1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/*
 * Copyright Stalwart Labs Ltd. See the COPYING
 * file at the top-level directory of this distribution.
 *
 * Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 * https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 * <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
 * option. This file may not be copied, modified, or distributed
 * except according to those terms.
 */

use std::borrow::Cow;

use crate::{parsers::MessageStream, HeaderValue};
struct UnstructuredParser<'x> {
    token_start: usize,
    token_end: usize,
    tokens: Vec<Cow<'x, str>>,
    last_is_encoded: bool,
}

impl<'x> UnstructuredParser<'x> {
    fn add_token(&mut self, stream: &MessageStream<'x>) {
        if self.token_start > 0 {
            if !self.tokens.is_empty() {
                self.tokens.push(" ".into());
            }
            self.tokens.push(String::from_utf8_lossy(
                stream.bytes(self.token_start - 1..self.token_end),
            ));

            self.token_start = 0;
            self.last_is_encoded = false;
        }
    }

    fn add_rfc2047(&mut self, token: String) {
        if !self.last_is_encoded {
            self.tokens.push(" ".into());
        }
        self.tokens.push(token.into());
        self.last_is_encoded = true;
    }
}

impl<'x> MessageStream<'x> {
    pub fn parse_unstructured(&mut self) -> HeaderValue<'x> {
        let mut parser = UnstructuredParser {
            token_start: 0,
            token_end: 0,
            tokens: Vec::new(),
            last_is_encoded: true,
        };

        while let Some(ch) = self.next() {
            match ch {
                b'\n' => {
                    parser.add_token(self);

                    if !self.try_next_is_space() {
                        return match parser.tokens.len() {
                            1 => HeaderValue::Text(parser.tokens.pop().unwrap()),
                            0 => HeaderValue::Empty,
                            _ => HeaderValue::Text(parser.tokens.concat().into()),
                        };
                    } else {
                        continue;
                    }
                }
                b' ' | b'\t' | b'\r' => {
                    continue;
                }
                b'=' if self.peek_char(b'?') => {
                    self.checkpoint();
                    if let Some(token) = self.decode_rfc2047() {
                        parser.add_token(self);
                        parser.add_rfc2047(token);
                        continue;
                    }
                    self.restore();
                }
                _ => (),
            }

            if parser.token_start == 0 {
                parser.token_start = self.offset();
            }

            parser.token_end = self.offset();
        }

        HeaderValue::Empty
    }
}

#[cfg(test)]
mod tests {
    use crate::parsers::{fields::load_tests, MessageStream};

    #[test]
    fn parse_unstructured() {
        for test in load_tests::<String>("unstructured.json") {
            assert_eq!(
                MessageStream::new(test.header.as_bytes())
                    .parse_unstructured()
                    .unwrap_text(),
                test.expected,
                "failed for {:?}",
                test.header
            );
        }
    }
}