Skip to main content

mail_parser/parsers/fields/
address.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use std::borrow::Cow;
8
9use crate::{parsers::MessageStream, Addr, Address, Group, HeaderValue};
10
11#[derive(PartialEq, Clone, Copy, Debug)]
12enum AddressState {
13    Address,
14    Name,
15    Quote,
16    Comment,
17}
18
19pub struct AddressParser<'x> {
20    token_start: usize,
21    token_end: usize,
22
23    is_token_email: bool,
24    is_token_start: bool,
25    is_escaped: bool,
26
27    name_tokens: Vec<Cow<'x, str>>,
28    mail_tokens: Vec<Cow<'x, str>>,
29    comment_tokens: Vec<Cow<'x, str>>,
30
31    state: AddressState,
32    state_stack: Vec<AddressState>,
33
34    addresses: Vec<Addr<'x>>,
35    group_name: Option<Cow<'x, str>>,
36    group_comment: Option<Cow<'x, str>>,
37    result: Vec<Group<'x>>,
38}
39
40impl<'x> AddressParser<'x> {
41    pub fn add_token(&mut self, stream: &MessageStream<'x>, add_trail_space: bool) {
42        if self.token_start > 0 {
43            let token = String::from_utf8_lossy(&stream.data[self.token_start - 1..self.token_end]);
44            let mut add_space = false;
45            let list = match self.state {
46                AddressState::Address => &mut self.mail_tokens,
47                AddressState::Name => {
48                    if self.is_token_email {
49                        &mut self.mail_tokens
50                    } else {
51                        add_space = true;
52                        &mut self.name_tokens
53                    }
54                }
55                AddressState::Quote => &mut self.name_tokens,
56                AddressState::Comment => {
57                    add_space = true;
58                    &mut self.comment_tokens
59                }
60            };
61
62            if add_space && !list.is_empty() {
63                list.push(" ".into());
64            }
65
66            list.push(token);
67
68            if add_trail_space {
69                list.push(" ".into());
70            }
71
72            self.token_start = 0;
73            self.is_token_email = false;
74            self.is_token_start = true;
75            self.is_escaped = false;
76        }
77    }
78
79    pub fn add_address(&mut self) {
80        let has_mail = !self.mail_tokens.is_empty();
81        let has_name = !self.name_tokens.is_empty();
82        let has_comment = !self.comment_tokens.is_empty();
83
84        self.addresses.push(if has_mail && has_name && has_comment {
85            Addr {
86                name: Some(
87                    format!(
88                        "{} ({})",
89                        concat_tokens(&mut self.name_tokens),
90                        concat_tokens(&mut self.comment_tokens)
91                    )
92                    .into(),
93                ),
94                address: concat_tokens(&mut self.mail_tokens).into(),
95            }
96        } else if has_name && has_mail {
97            Addr {
98                name: concat_tokens(&mut self.name_tokens).into(),
99                address: concat_tokens(&mut self.mail_tokens).into(),
100            }
101        } else if has_mail && has_comment {
102            Addr {
103                name: concat_tokens(&mut self.comment_tokens).into(),
104                address: concat_tokens(&mut self.mail_tokens).into(),
105            }
106        } else if has_mail {
107            Addr {
108                name: None,
109                address: concat_tokens(&mut self.mail_tokens).into(),
110            }
111        } else if has_name && has_comment {
112            Addr {
113                name: concat_tokens(&mut self.comment_tokens).into(),
114                address: concat_tokens(&mut self.name_tokens).into(),
115            }
116        } else if has_name {
117            Addr {
118                name: concat_tokens(&mut self.name_tokens).into(),
119                address: None,
120            }
121        } else if has_comment {
122            Addr {
123                name: concat_tokens(&mut self.comment_tokens).into(),
124                address: None,
125            }
126        } else {
127            return;
128        });
129    }
130
131    pub fn add_group_details(&mut self) {
132        if !self.name_tokens.is_empty() {
133            self.group_name = concat_tokens(&mut self.name_tokens).into();
134        }
135
136        if !self.comment_tokens.is_empty() {
137            self.group_comment = concat_tokens(&mut self.comment_tokens).into();
138        }
139
140        if !self.mail_tokens.is_empty() {
141            if let Some(group_name) = &self.group_name {
142                self.group_name =
143                    Some(format!("{} {}", group_name, concat_tokens(&mut self.mail_tokens)).into());
144            } else {
145                self.group_name = concat_tokens(&mut self.mail_tokens).into();
146            }
147        }
148    }
149
150    pub fn add_group(&mut self) {
151        let has_name = self.group_name.is_some();
152        let has_comment = self.group_comment.is_some();
153        let has_addresses = !self.addresses.is_empty();
154
155        self.result
156            .push(if has_name && has_addresses && has_comment {
157                Group {
158                    name: Some(
159                        format!(
160                            "{} ({})",
161                            self.group_name.take().unwrap(),
162                            self.group_comment.take().unwrap()
163                        )
164                        .into(),
165                    ),
166                    addresses: std::mem::take(&mut self.addresses),
167                }
168            } else if has_addresses && has_name {
169                Group {
170                    name: self.group_name.take(),
171                    addresses: std::mem::take(&mut self.addresses),
172                }
173            } else if has_addresses {
174                Group {
175                    name: self.group_comment.take(),
176                    addresses: std::mem::take(&mut self.addresses),
177                }
178            } else if has_name {
179                Group {
180                    name: self.group_name.take(),
181                    addresses: Vec::new(),
182                }
183            } else {
184                return;
185            });
186    }
187}
188
189impl<'x> MessageStream<'x> {
190    pub fn parse_address(&mut self) -> HeaderValue<'x> {
191        let mut parser = AddressParser {
192            token_start: 0,
193            token_end: 0,
194
195            is_token_email: false,
196            is_token_start: true,
197            is_escaped: false,
198
199            name_tokens: Vec::with_capacity(3),
200            mail_tokens: Vec::with_capacity(3),
201            comment_tokens: Vec::with_capacity(3),
202
203            state: AddressState::Name,
204            state_stack: Vec::with_capacity(5),
205
206            addresses: Vec::new(),
207            group_name: None,
208            group_comment: None,
209            result: Vec::new(),
210        };
211
212        while let Some(ch) = self.next() {
213            match ch {
214                b'\n' => {
215                    parser.add_token(self, false);
216                    if self.try_next_is_space() {
217                        if !parser.is_token_start {
218                            parser.is_token_start = true;
219                        }
220                        continue;
221                    } else {
222                        break;
223                    }
224                }
225                b'\\' if parser.state != AddressState::Name && !parser.is_escaped => {
226                    if parser.token_start > 0 {
227                        if parser.state == AddressState::Quote {
228                            parser.token_end = self.offset() - 1;
229                        }
230                        parser.add_token(self, false);
231                    }
232                    parser.is_escaped = true;
233                    continue;
234                }
235                b',' if parser.state == AddressState::Name => {
236                    parser.add_token(self, false);
237                    parser.add_address();
238                    continue;
239                }
240                b'<' if parser.state == AddressState::Name => {
241                    parser.is_token_email = false;
242                    parser.add_token(self, false);
243                    parser.state_stack.push(AddressState::Name);
244                    parser.state = AddressState::Address;
245                    continue;
246                }
247                b'>' if parser.state == AddressState::Address => {
248                    parser.add_token(self, false);
249                    parser.state = parser.state_stack.pop().unwrap();
250                    continue;
251                }
252                b'"' if !parser.is_escaped => match parser.state {
253                    AddressState::Name => {
254                        parser.state_stack.push(AddressState::Name);
255                        parser.state = AddressState::Quote;
256                        parser.add_token(self, false);
257                        continue;
258                    }
259                    AddressState::Quote => {
260                        parser.add_token(self, false);
261                        parser.state = parser.state_stack.pop().unwrap();
262                        continue;
263                    }
264                    _ => (),
265                },
266                b'@' if parser.state == AddressState::Name => {
267                    parser.is_token_email = true;
268                }
269                b'=' if parser.is_token_start && !parser.is_escaped && self.peek_char(b'?') => {
270                    self.checkpoint();
271                    if let Some(token) = self.decode_rfc2047() {
272                        let add_space = parser.state != AddressState::Quote; // Make borrow-checker happy
273                        parser.add_token(self, add_space);
274                        (if parser.state != AddressState::Comment {
275                            &mut parser.name_tokens
276                        } else {
277                            &mut parser.comment_tokens
278                        })
279                        .push(token.into());
280                        continue;
281                    }
282                    self.restore();
283                }
284                b' ' | b'\t' => {
285                    if !parser.is_token_start {
286                        parser.is_token_start = true;
287                    }
288                    if parser.is_escaped {
289                        parser.is_escaped = false;
290                    }
291                    if parser.state == AddressState::Quote {
292                        if parser.token_start == 0 {
293                            parser.token_start = self.offset();
294                            parser.token_end = parser.token_start;
295                        } else {
296                            parser.token_end = self.offset();
297                        }
298                    }
299                    continue;
300                }
301                b'\r' => continue,
302                b'(' if parser.state != AddressState::Quote && !parser.is_escaped => {
303                    parser.state_stack.push(parser.state);
304                    if parser.state != AddressState::Comment {
305                        parser.add_token(self, false);
306                        parser.state = AddressState::Comment;
307                        continue;
308                    }
309                }
310                b')' if parser.state == AddressState::Comment && !parser.is_escaped => {
311                    let new_state = parser.state_stack.pop().unwrap();
312                    if parser.state != new_state {
313                        parser.add_token(self, false);
314                        parser.state = new_state;
315                        continue;
316                    }
317                }
318                b':' if parser.state == AddressState::Name && !parser.is_escaped => {
319                    parser.add_group();
320                    parser.add_token(self, false);
321                    parser.add_group_details();
322                    continue;
323                }
324                b';' if parser.state == AddressState::Name => {
325                    parser.add_token(self, false);
326                    parser.add_address();
327                    parser.add_group();
328                    continue;
329                }
330                _ => (),
331            }
332
333            if parser.is_escaped {
334                parser.is_escaped = false;
335            }
336
337            if parser.is_token_start {
338                parser.is_token_start = false;
339            }
340
341            if parser.token_start == 0 {
342                parser.token_start = self.offset();
343                parser.token_end = parser.token_start;
344            } else {
345                parser.token_end = self.offset();
346            }
347        }
348
349        parser.add_address();
350
351        if parser.group_name.is_some() || !parser.result.is_empty() {
352            parser.add_group();
353            HeaderValue::Address(Address::Group(parser.result))
354        } else if !parser.addresses.is_empty() {
355            HeaderValue::Address(Address::List(parser.addresses))
356        } else {
357            HeaderValue::Empty
358        }
359    }
360}
361
362fn concat_tokens<'x>(tokens: &mut Vec<Cow<'x, str>>) -> Cow<'x, str> {
363    if tokens.len() == 1 {
364        tokens.pop().unwrap()
365    } else {
366        let result = tokens.concat();
367        tokens.clear();
368        result.into()
369    }
370}
371
372pub fn parse_address_local_part(addr: &str) -> Option<&str> {
373    let addr = addr.as_bytes();
374    let mut iter = addr.iter().enumerate();
375    while let Some((pos, &ch)) = iter.next() {
376        if ch == b'@' {
377            return if pos > 0 && iter.next().is_some() {
378                std::str::from_utf8(addr.get(..pos)?).ok()
379            } else {
380                None
381            };
382        } else if !ch.is_ascii() {
383            return None;
384        }
385    }
386
387    None
388}
389
390pub fn parse_address_domain(addr: &str) -> Option<&str> {
391    let addr = addr.as_bytes();
392    for (pos, &ch) in addr.iter().enumerate() {
393        if ch == b'@' {
394            return if pos > 0 && pos + 1 < addr.len() {
395                std::str::from_utf8(addr.get(pos + 1..)?).ok()
396            } else {
397                None
398            };
399        } else if !ch.is_ascii() {
400            return None;
401        }
402    }
403
404    None
405}
406
407pub fn parse_address_user_part(addr: &str) -> Option<&str> {
408    let addr = addr.as_bytes();
409
410    let mut iter = addr.iter().enumerate();
411    while let Some((pos, &ch)) = iter.next() {
412        if ch == b'+' {
413            if pos > 0 {
414                while let Some((_, &ch)) = iter.next() {
415                    if ch == b'@' && iter.next().is_some() {
416                        return std::str::from_utf8(addr.get(..pos)?).ok();
417                    }
418                }
419            }
420            return None;
421        } else if ch == b'@' {
422            return if pos > 0 && iter.next().is_some() {
423                std::str::from_utf8(addr.get(..pos)?).ok()
424            } else {
425                None
426            };
427        } else if !ch.is_ascii() {
428            return None;
429        }
430    }
431
432    None
433}
434
435pub fn parse_address_detail_part(addr: &str) -> Option<&str> {
436    let addr = addr.as_bytes();
437    let mut plus_pos = usize::MAX;
438
439    let mut iter = addr.iter().enumerate();
440    while let Some((pos, &ch)) = iter.next() {
441        if ch == b'+' {
442            plus_pos = pos + 1;
443        } else if ch == b'@' {
444            if plus_pos != usize::MAX && iter.next().is_some() {
445                return std::str::from_utf8(addr.get(plus_pos..pos)?).ok();
446            } else {
447                return None;
448            }
449        } else if !ch.is_ascii() {
450            return None;
451        }
452    }
453
454    None
455}
456
457#[cfg(test)]
458mod tests {
459    use crate::parsers::{fields::load_tests, MessageStream};
460
461    #[test]
462    fn parse_addresses() {
463        for test in load_tests("address.json") {
464            assert_eq!(
465                MessageStream::new(test.header.as_bytes())
466                    .parse_address()
467                    .unwrap_address(),
468                test.expected,
469                "failed for {:?}",
470                test.header
471            );
472        }
473    }
474}