mail_parser/parsers/
header.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use std::borrow::Cow;
8
9use crate::{Header, HeaderName, MessageParser};
10
11use super::MessageStream;
12
13impl<'x> MessageStream<'x> {
14    pub fn parse_headers(&mut self, conf: &MessageParser, headers: &mut Vec<Header<'x>>) -> bool {
15        loop {
16            loop {
17                match self.peek() {
18                    Some(b'\n') => {
19                        self.next();
20                        return true;
21                    }
22                    None => return false,
23                    Some(ch) if !ch.is_ascii_whitespace() => {
24                        break;
25                    }
26                    _ => {
27                        self.next();
28                    }
29                }
30            }
31
32            let offset_field = self.offset();
33
34            if let Some(header_name) = self.parse_header_name() {
35                let from_offset = self.offset();
36                let value = if conf.header_map.is_empty() {
37                    match &header_name {
38                        HeaderName::Subject
39                        | HeaderName::Comments
40                        | HeaderName::ContentDescription
41                        | HeaderName::ContentLocation
42                        | HeaderName::ContentTransferEncoding => self.parse_unstructured(),
43                        HeaderName::From
44                        | HeaderName::To
45                        | HeaderName::Cc
46                        | HeaderName::Bcc
47                        | HeaderName::ReplyTo
48                        | HeaderName::Sender
49                        | HeaderName::ResentTo
50                        | HeaderName::ResentFrom
51                        | HeaderName::ResentBcc
52                        | HeaderName::ResentCc
53                        | HeaderName::ResentSender
54                        | HeaderName::ListArchive
55                        | HeaderName::ListHelp
56                        | HeaderName::ListId
57                        | HeaderName::ListOwner
58                        | HeaderName::ListPost
59                        | HeaderName::ListSubscribe
60                        | HeaderName::ListUnsubscribe => self.parse_address(),
61                        HeaderName::Date | HeaderName::ResentDate => self.parse_date(),
62                        HeaderName::MessageId
63                        | HeaderName::References
64                        | HeaderName::InReplyTo
65                        | HeaderName::ReturnPath
66                        | HeaderName::ContentId
67                        | HeaderName::ResentMessageId => self.parse_id(),
68                        HeaderName::Keywords | HeaderName::ContentLanguage => {
69                            self.parse_comma_separared()
70                        }
71                        HeaderName::Received => self.parse_received(),
72                        HeaderName::MimeVersion => self.parse_raw(),
73                        HeaderName::ContentType | HeaderName::ContentDisposition => {
74                            self.parse_content_type()
75                        }
76                        _ => self.parse_raw(),
77                    }
78                } else {
79                    (conf
80                        .header_map
81                        .get(&header_name)
82                        .unwrap_or(&conf.def_hdr_parse_fnc))(self)
83                };
84
85                headers.push(Header {
86                    name: header_name,
87                    value,
88                    offset_field: offset_field as u32,
89                    offset_start: from_offset as u32,
90                    offset_end: self.offset() as u32,
91                });
92            } else if self.is_eof() {
93                return false;
94            }
95        }
96    }
97
98    pub fn parse_header_name(&mut self) -> Option<HeaderName<'x>> {
99        let mut token_start: usize = 0;
100        let mut token_end: usize = 0;
101        let mut token_len: usize = 0;
102
103        let mut header = [0u8; 30];
104
105        while let Some(&ch) = self.next() {
106            match ch {
107                b':' => {
108                    if token_start != 0 {
109                        break;
110                    }
111                }
112                b'\n' => {
113                    return None;
114                }
115                _ => {
116                    if !ch.is_ascii_whitespace() {
117                        if token_start == 0 {
118                            token_start = self.offset();
119                            token_end = token_start;
120                        } else {
121                            token_end = self.offset();
122                        }
123
124                        if let Some(header) = header.get_mut(token_len) {
125                            *header = ch.to_ascii_lowercase();
126                            token_len += 1;
127                        }
128                    }
129                }
130            }
131        }
132
133        if token_start != 0 {
134            header_map(&header[..token_len])
135                .unwrap_or_else(|| {
136                    HeaderName::Other(String::from_utf8_lossy(
137                        self.bytes(token_start - 1..token_end),
138                    ))
139                })
140                .into()
141        } else {
142            None
143        }
144    }
145}
146
147impl<'x> HeaderName<'x> {
148    /// Parse a header name
149    pub fn parse(data: impl Into<Cow<'x, str>>) -> Option<HeaderName<'x>> {
150        let data = data.into();
151
152        if !data.is_empty() {
153            let mut data_lc = String::with_capacity(data.len());
154            for ch in data.chars() {
155                match ch {
156                    'A'..='Z' => data_lc.push(ch.to_ascii_lowercase()),
157                    'a'..='z' | '0'..='9' | '-' | '_' => data_lc.push(ch),
158                    _ => return None,
159                }
160            }
161            header_map(data_lc.as_bytes())
162                .unwrap_or(HeaderName::Other(data))
163                .into()
164        } else {
165            None
166        }
167    }
168}
169
170fn header_map(name: &[u8]) -> Option<HeaderName<'static>> {
171    hashify::tiny_map! {name,
172    "arc-authentication-results" => HeaderName::ArcAuthenticationResults,
173    "arc-seal" => HeaderName::ArcSeal,
174    "arc-message-signature" => HeaderName::ArcMessageSignature,
175    "bcc" => HeaderName::Bcc,
176    "cc" => HeaderName::Cc,
177    "comments" => HeaderName::Comments,
178    "content-description" => HeaderName::ContentDescription,
179    "content-disposition" => HeaderName::ContentDisposition,
180    "content-id" => HeaderName::ContentId,
181    "content-language" => HeaderName::ContentLanguage,
182    "content-location" => HeaderName::ContentLocation,
183    "content-transfer-encoding" => HeaderName::ContentTransferEncoding,
184    "content-type" => HeaderName::ContentType,
185    "date" => HeaderName::Date,
186    "dkim-signature" => HeaderName::DkimSignature,
187    "from" => HeaderName::From,
188    "in-reply-to" => HeaderName::InReplyTo,
189    "keywords" => HeaderName::Keywords,
190    "list-archive" => HeaderName::ListArchive,
191    "list-help" => HeaderName::ListHelp,
192    "list-id" => HeaderName::ListId,
193    "list-owner" => HeaderName::ListOwner,
194    "list-post" => HeaderName::ListPost,
195    "list-subscribe" => HeaderName::ListSubscribe,
196    "list-unsubscribe" => HeaderName::ListUnsubscribe,
197    "message-id" => HeaderName::MessageId,
198    "mime-version" => HeaderName::MimeVersion,
199    "received" => HeaderName::Received,
200    "references" => HeaderName::References,
201    "reply-to" => HeaderName::ReplyTo,
202    "resent-bcc" => HeaderName::ResentBcc,
203    "resent-cc" => HeaderName::ResentCc,
204    "resent-date" => HeaderName::ResentDate,
205    "resent-from" => HeaderName::ResentFrom,
206    "resent-message-id" => HeaderName::ResentMessageId,
207    "resent-sender" => HeaderName::ResentSender,
208    "resent-to" => HeaderName::ResentTo,
209    "return-path" => HeaderName::ReturnPath,
210    "sender" => HeaderName::Sender,
211    "subject" => HeaderName::Subject,
212    "to" => HeaderName::To,
213    }
214}
215
216#[cfg(test)]
217mod tests {
218    use crate::{parsers::MessageStream, HeaderName};
219
220    #[test]
221    fn header_name_parse() {
222        let inputs = [
223            ("From: ", HeaderName::From),
224            ("receiVED: ", HeaderName::Received),
225            (" subject   : ", HeaderName::Subject),
226            (
227                "X-Custom-Field : ",
228                HeaderName::Other("X-Custom-Field".into()),
229            ),
230            (" T : ", HeaderName::Other("T".into())),
231            ("mal formed: ", HeaderName::Other("mal formed".into())),
232            ("MIME-version : ", HeaderName::MimeVersion),
233        ];
234
235        for (input, expected_result) in inputs {
236            assert_eq!(
237                expected_result,
238                MessageStream::new(input.as_bytes())
239                    .parse_header_name()
240                    .unwrap(),
241                "Failed to parse '{input:?}'",
242            );
243        }
244    }
245}