mail_parser/parsers/fields/
address.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use std::borrow::Cow;
8
9use crate::{parsers::MessageStream, Addr, Address, Group, HeaderValue};
10
11#[derive(PartialEq, Clone, Copy, Debug)]
12enum AddressState {
13    Address,
14    Name,
15    Quote,
16    Comment,
17}
18
19pub struct AddressParser<'x> {
20    token_start: usize,
21    token_end: usize,
22
23    is_token_email: bool,
24    is_token_start: bool,
25    is_escaped: bool,
26
27    name_tokens: Vec<Cow<'x, str>>,
28    mail_tokens: Vec<Cow<'x, str>>,
29    comment_tokens: Vec<Cow<'x, str>>,
30
31    state: AddressState,
32    state_stack: Vec<AddressState>,
33
34    addresses: Vec<Addr<'x>>,
35    group_name: Option<Cow<'x, str>>,
36    group_comment: Option<Cow<'x, str>>,
37    result: Vec<Group<'x>>,
38}
39
40impl<'x> AddressParser<'x> {
41    pub fn add_token(&mut self, stream: &MessageStream<'x>, add_trail_space: bool) {
42        if self.token_start > 0 {
43            let token = String::from_utf8_lossy(&stream.data[self.token_start - 1..self.token_end]);
44            let mut add_space = false;
45            let list = match self.state {
46                AddressState::Address => &mut self.mail_tokens,
47                AddressState::Name => {
48                    if self.is_token_email {
49                        &mut self.mail_tokens
50                    } else {
51                        add_space = true;
52                        &mut self.name_tokens
53                    }
54                }
55                AddressState::Quote => &mut self.name_tokens,
56                AddressState::Comment => {
57                    add_space = true;
58                    &mut self.comment_tokens
59                }
60            };
61
62            if add_space && !list.is_empty() {
63                list.push(" ".into());
64            }
65
66            list.push(token);
67
68            if add_trail_space {
69                list.push(" ".into());
70            }
71
72            self.token_start = 0;
73            self.is_token_email = false;
74            self.is_token_start = true;
75            self.is_escaped = false;
76        }
77    }
78
79    pub fn add_address(&mut self) {
80        let has_mail = !self.mail_tokens.is_empty();
81        let has_name = !self.name_tokens.is_empty();
82        let has_comment = !self.comment_tokens.is_empty();
83
84        self.addresses.push(if has_mail && has_name && has_comment {
85            Addr {
86                name: Some(
87                    format!(
88                        "{} ({})",
89                        concat_tokens(&mut self.name_tokens),
90                        concat_tokens(&mut self.comment_tokens)
91                    )
92                    .into(),
93                ),
94                address: concat_tokens(&mut self.mail_tokens).into(),
95            }
96        } else if has_name && has_mail {
97            Addr {
98                name: concat_tokens(&mut self.name_tokens).into(),
99                address: concat_tokens(&mut self.mail_tokens).into(),
100            }
101        } else if has_mail && has_comment {
102            Addr {
103                name: concat_tokens(&mut self.comment_tokens).into(),
104                address: concat_tokens(&mut self.mail_tokens).into(),
105            }
106        } else if has_mail {
107            Addr {
108                name: None,
109                address: concat_tokens(&mut self.mail_tokens).into(),
110            }
111        } else if has_name && has_comment {
112            Addr {
113                name: concat_tokens(&mut self.comment_tokens).into(),
114                address: concat_tokens(&mut self.name_tokens).into(),
115            }
116        } else if has_name {
117            Addr {
118                name: concat_tokens(&mut self.name_tokens).into(),
119                address: None,
120            }
121        } else if has_comment {
122            Addr {
123                name: concat_tokens(&mut self.comment_tokens).into(),
124                address: None,
125            }
126        } else {
127            return;
128        });
129    }
130
131    pub fn add_group_details(&mut self) {
132        if !self.name_tokens.is_empty() {
133            self.group_name = concat_tokens(&mut self.name_tokens).into();
134        }
135
136        if !self.comment_tokens.is_empty() {
137            self.group_comment = concat_tokens(&mut self.comment_tokens).into();
138        }
139
140        if !self.mail_tokens.is_empty() {
141            if self.group_name.is_none() {
142                self.group_name = concat_tokens(&mut self.mail_tokens).into();
143            } else {
144                self.group_name = Some(
145                    (self.group_name.as_ref().unwrap().as_ref().to_owned()
146                        + " "
147                        + concat_tokens(&mut self.mail_tokens).as_ref())
148                    .into(),
149                );
150            }
151        }
152    }
153
154    pub fn add_group(&mut self) {
155        let has_name = self.group_name.is_some();
156        let has_comment = self.group_comment.is_some();
157        let has_addresses = !self.addresses.is_empty();
158
159        self.result
160            .push(if has_name && has_addresses && has_comment {
161                Group {
162                    name: Some(
163                        format!(
164                            "{} ({})",
165                            self.group_name.take().unwrap(),
166                            self.group_comment.take().unwrap()
167                        )
168                        .into(),
169                    ),
170                    addresses: std::mem::take(&mut self.addresses),
171                }
172            } else if has_addresses && has_name {
173                Group {
174                    name: self.group_name.take(),
175                    addresses: std::mem::take(&mut self.addresses),
176                }
177            } else if has_addresses {
178                Group {
179                    name: self.group_comment.take(),
180                    addresses: std::mem::take(&mut self.addresses),
181                }
182            } else if has_name {
183                Group {
184                    name: self.group_name.take(),
185                    addresses: Vec::new(),
186                }
187            } else {
188                return;
189            });
190    }
191}
192
193impl<'x> MessageStream<'x> {
194    pub fn parse_address(&mut self) -> HeaderValue<'x> {
195        let mut parser = AddressParser {
196            token_start: 0,
197            token_end: 0,
198
199            is_token_email: false,
200            is_token_start: true,
201            is_escaped: false,
202
203            name_tokens: Vec::with_capacity(3),
204            mail_tokens: Vec::with_capacity(3),
205            comment_tokens: Vec::with_capacity(3),
206
207            state: AddressState::Name,
208            state_stack: Vec::with_capacity(5),
209
210            addresses: Vec::new(),
211            group_name: None,
212            group_comment: None,
213            result: Vec::new(),
214        };
215
216        while let Some(ch) = self.next() {
217            match ch {
218                b'\n' => {
219                    parser.add_token(self, false);
220                    if self.try_next_is_space() {
221                        if !parser.is_token_start {
222                            parser.is_token_start = true;
223                        }
224                        continue;
225                    } else {
226                        break;
227                    }
228                }
229                b'\\' if parser.state != AddressState::Name && !parser.is_escaped => {
230                    if parser.token_start > 0 {
231                        if parser.state == AddressState::Quote {
232                            parser.token_end = self.offset() - 1;
233                        }
234                        parser.add_token(self, false);
235                    }
236                    parser.is_escaped = true;
237                    continue;
238                }
239                b',' if parser.state == AddressState::Name => {
240                    parser.add_token(self, false);
241                    parser.add_address();
242                    continue;
243                }
244                b'<' if parser.state == AddressState::Name => {
245                    parser.is_token_email = false;
246                    parser.add_token(self, false);
247                    parser.state_stack.push(AddressState::Name);
248                    parser.state = AddressState::Address;
249                    continue;
250                }
251                b'>' if parser.state == AddressState::Address => {
252                    parser.add_token(self, false);
253                    parser.state = parser.state_stack.pop().unwrap();
254                    continue;
255                }
256                b'"' if !parser.is_escaped => match parser.state {
257                    AddressState::Name => {
258                        parser.state_stack.push(AddressState::Name);
259                        parser.state = AddressState::Quote;
260                        parser.add_token(self, false);
261                        continue;
262                    }
263                    AddressState::Quote => {
264                        parser.add_token(self, false);
265                        parser.state = parser.state_stack.pop().unwrap();
266                        continue;
267                    }
268                    _ => (),
269                },
270                b'@' if parser.state == AddressState::Name => {
271                    parser.is_token_email = true;
272                }
273                b'=' if parser.is_token_start && !parser.is_escaped && self.peek_char(b'?') => {
274                    self.checkpoint();
275                    if let Some(token) = self.decode_rfc2047() {
276                        let add_space = parser.state != AddressState::Quote; // Make borrow-checker happy
277                        parser.add_token(self, add_space);
278                        (if parser.state != AddressState::Comment {
279                            &mut parser.name_tokens
280                        } else {
281                            &mut parser.comment_tokens
282                        })
283                        .push(token.into());
284                        continue;
285                    }
286                    self.restore();
287                }
288                b' ' | b'\t' => {
289                    if !parser.is_token_start {
290                        parser.is_token_start = true;
291                    }
292                    if parser.is_escaped {
293                        parser.is_escaped = false;
294                    }
295                    if parser.state == AddressState::Quote {
296                        if parser.token_start == 0 {
297                            parser.token_start = self.offset();
298                            parser.token_end = parser.token_start;
299                        } else {
300                            parser.token_end = self.offset();
301                        }
302                    }
303                    continue;
304                }
305                b'\r' => continue,
306                b'(' if parser.state != AddressState::Quote && !parser.is_escaped => {
307                    parser.state_stack.push(parser.state);
308                    if parser.state != AddressState::Comment {
309                        parser.add_token(self, false);
310                        parser.state = AddressState::Comment;
311                        continue;
312                    }
313                }
314                b')' if parser.state == AddressState::Comment && !parser.is_escaped => {
315                    let new_state = parser.state_stack.pop().unwrap();
316                    if parser.state != new_state {
317                        parser.add_token(self, false);
318                        parser.state = new_state;
319                        continue;
320                    }
321                }
322                b':' if parser.state == AddressState::Name && !parser.is_escaped => {
323                    parser.add_group();
324                    parser.add_token(self, false);
325                    parser.add_group_details();
326                    continue;
327                }
328                b';' if parser.state == AddressState::Name => {
329                    parser.add_token(self, false);
330                    parser.add_address();
331                    parser.add_group();
332                    continue;
333                }
334                _ => (),
335            }
336
337            if parser.is_escaped {
338                parser.is_escaped = false;
339            }
340
341            if parser.is_token_start {
342                parser.is_token_start = false;
343            }
344
345            if parser.token_start == 0 {
346                parser.token_start = self.offset();
347                parser.token_end = parser.token_start;
348            } else {
349                parser.token_end = self.offset();
350            }
351        }
352
353        parser.add_address();
354
355        if parser.group_name.is_some() || !parser.result.is_empty() {
356            parser.add_group();
357            HeaderValue::Address(Address::Group(parser.result))
358        } else if !parser.addresses.is_empty() {
359            HeaderValue::Address(Address::List(parser.addresses))
360        } else {
361            HeaderValue::Empty
362        }
363    }
364}
365
366fn concat_tokens<'x>(tokens: &mut Vec<Cow<'x, str>>) -> Cow<'x, str> {
367    if tokens.len() == 1 {
368        tokens.pop().unwrap()
369    } else {
370        let result = tokens.concat();
371        tokens.clear();
372        result.into()
373    }
374}
375
376pub fn parse_address_local_part(addr: &str) -> Option<&str> {
377    let addr = addr.as_bytes();
378    let mut iter = addr.iter().enumerate();
379    while let Some((pos, &ch)) = iter.next() {
380        if ch == b'@' {
381            return if pos > 0 && iter.next().is_some() {
382                std::str::from_utf8(addr.get(..pos)?).ok()
383            } else {
384                None
385            };
386        } else if !ch.is_ascii() {
387            return None;
388        }
389    }
390
391    None
392}
393
394pub fn parse_address_domain(addr: &str) -> Option<&str> {
395    let addr = addr.as_bytes();
396    for (pos, &ch) in addr.iter().enumerate() {
397        if ch == b'@' {
398            return if pos > 0 && pos + 1 < addr.len() {
399                std::str::from_utf8(addr.get(pos + 1..)?).ok()
400            } else {
401                None
402            };
403        } else if !ch.is_ascii() {
404            return None;
405        }
406    }
407
408    None
409}
410
411pub fn parse_address_user_part(addr: &str) -> Option<&str> {
412    let addr = addr.as_bytes();
413
414    let mut iter = addr.iter().enumerate();
415    while let Some((pos, &ch)) = iter.next() {
416        if ch == b'+' {
417            if pos > 0 {
418                while let Some((_, &ch)) = iter.next() {
419                    if ch == b'@' && iter.next().is_some() {
420                        return std::str::from_utf8(addr.get(..pos)?).ok();
421                    }
422                }
423            }
424            return None;
425        } else if ch == b'@' {
426            return if pos > 0 && iter.next().is_some() {
427                std::str::from_utf8(addr.get(..pos)?).ok()
428            } else {
429                None
430            };
431        } else if !ch.is_ascii() {
432            return None;
433        }
434    }
435
436    None
437}
438
439pub fn parse_address_detail_part(addr: &str) -> Option<&str> {
440    let addr = addr.as_bytes();
441    let mut plus_pos = usize::MAX;
442
443    let mut iter = addr.iter().enumerate();
444    while let Some((pos, &ch)) = iter.next() {
445        if ch == b'+' {
446            plus_pos = pos + 1;
447        } else if ch == b'@' {
448            if plus_pos != usize::MAX && iter.next().is_some() {
449                return std::str::from_utf8(addr.get(plus_pos..pos)?).ok();
450            } else {
451                return None;
452            }
453        } else if !ch.is_ascii() {
454            return None;
455        }
456    }
457
458    None
459}
460
461#[cfg(test)]
462mod tests {
463    use crate::parsers::{fields::load_tests, MessageStream};
464
465    #[test]
466    fn parse_addresses() {
467        for test in load_tests("address.json") {
468            assert_eq!(
469                MessageStream::new(test.header.as_bytes())
470                    .parse_address()
471                    .unwrap_address(),
472                test.expected,
473                "failed for {:?}",
474                test.header
475            );
476        }
477    }
478}