mail_parser/parsers/fields/
address.rs1use std::borrow::Cow;
8
9use crate::{parsers::MessageStream, Addr, Address, Group, HeaderValue};
10
11#[derive(PartialEq, Clone, Copy, Debug)]
12enum AddressState {
13 Address,
14 Name,
15 Quote,
16 Comment,
17}
18
19pub struct AddressParser<'x> {
20 token_start: usize,
21 token_end: usize,
22
23 is_token_email: bool,
24 is_token_start: bool,
25 is_escaped: bool,
26
27 name_tokens: Vec<Cow<'x, str>>,
28 mail_tokens: Vec<Cow<'x, str>>,
29 comment_tokens: Vec<Cow<'x, str>>,
30
31 state: AddressState,
32 state_stack: Vec<AddressState>,
33
34 addresses: Vec<Addr<'x>>,
35 group_name: Option<Cow<'x, str>>,
36 group_comment: Option<Cow<'x, str>>,
37 result: Vec<Group<'x>>,
38}
39
40impl<'x> AddressParser<'x> {
41 pub fn add_token(&mut self, stream: &MessageStream<'x>, add_trail_space: bool) {
42 if self.token_start > 0 {
43 let token = String::from_utf8_lossy(&stream.data[self.token_start - 1..self.token_end]);
44 let mut add_space = false;
45 let list = match self.state {
46 AddressState::Address => &mut self.mail_tokens,
47 AddressState::Name => {
48 if self.is_token_email {
49 &mut self.mail_tokens
50 } else {
51 add_space = true;
52 &mut self.name_tokens
53 }
54 }
55 AddressState::Quote => &mut self.name_tokens,
56 AddressState::Comment => {
57 add_space = true;
58 &mut self.comment_tokens
59 }
60 };
61
62 if add_space && !list.is_empty() {
63 list.push(" ".into());
64 }
65
66 list.push(token);
67
68 if add_trail_space {
69 list.push(" ".into());
70 }
71
72 self.token_start = 0;
73 self.is_token_email = false;
74 self.is_token_start = true;
75 self.is_escaped = false;
76 }
77 }
78
79 pub fn add_address(&mut self) {
80 let has_mail = !self.mail_tokens.is_empty();
81 let has_name = !self.name_tokens.is_empty();
82 let has_comment = !self.comment_tokens.is_empty();
83
84 self.addresses.push(if has_mail && has_name && has_comment {
85 Addr {
86 name: Some(
87 format!(
88 "{} ({})",
89 concat_tokens(&mut self.name_tokens),
90 concat_tokens(&mut self.comment_tokens)
91 )
92 .into(),
93 ),
94 address: concat_tokens(&mut self.mail_tokens).into(),
95 }
96 } else if has_name && has_mail {
97 Addr {
98 name: concat_tokens(&mut self.name_tokens).into(),
99 address: concat_tokens(&mut self.mail_tokens).into(),
100 }
101 } else if has_mail && has_comment {
102 Addr {
103 name: concat_tokens(&mut self.comment_tokens).into(),
104 address: concat_tokens(&mut self.mail_tokens).into(),
105 }
106 } else if has_mail {
107 Addr {
108 name: None,
109 address: concat_tokens(&mut self.mail_tokens).into(),
110 }
111 } else if has_name && has_comment {
112 Addr {
113 name: concat_tokens(&mut self.comment_tokens).into(),
114 address: concat_tokens(&mut self.name_tokens).into(),
115 }
116 } else if has_name {
117 Addr {
118 name: concat_tokens(&mut self.name_tokens).into(),
119 address: None,
120 }
121 } else if has_comment {
122 Addr {
123 name: concat_tokens(&mut self.comment_tokens).into(),
124 address: None,
125 }
126 } else {
127 return;
128 });
129 }
130
131 pub fn add_group_details(&mut self) {
132 if !self.name_tokens.is_empty() {
133 self.group_name = concat_tokens(&mut self.name_tokens).into();
134 }
135
136 if !self.comment_tokens.is_empty() {
137 self.group_comment = concat_tokens(&mut self.comment_tokens).into();
138 }
139
140 if !self.mail_tokens.is_empty() {
141 if let Some(group_name) = &self.group_name {
142 self.group_name =
143 Some(format!("{} {}", group_name, concat_tokens(&mut self.mail_tokens)).into());
144 } else {
145 self.group_name = concat_tokens(&mut self.mail_tokens).into();
146 }
147 }
148 }
149
150 pub fn add_group(&mut self) {
151 let has_name = self.group_name.is_some();
152 let has_comment = self.group_comment.is_some();
153 let has_addresses = !self.addresses.is_empty();
154
155 self.result
156 .push(if has_name && has_addresses && has_comment {
157 Group {
158 name: Some(
159 format!(
160 "{} ({})",
161 self.group_name.take().unwrap(),
162 self.group_comment.take().unwrap()
163 )
164 .into(),
165 ),
166 addresses: std::mem::take(&mut self.addresses),
167 }
168 } else if has_addresses && has_name {
169 Group {
170 name: self.group_name.take(),
171 addresses: std::mem::take(&mut self.addresses),
172 }
173 } else if has_addresses {
174 Group {
175 name: self.group_comment.take(),
176 addresses: std::mem::take(&mut self.addresses),
177 }
178 } else if has_name {
179 Group {
180 name: self.group_name.take(),
181 addresses: Vec::new(),
182 }
183 } else {
184 return;
185 });
186 }
187}
188
189impl<'x> MessageStream<'x> {
190 pub fn parse_address(&mut self) -> HeaderValue<'x> {
191 let mut parser = AddressParser {
192 token_start: 0,
193 token_end: 0,
194
195 is_token_email: false,
196 is_token_start: true,
197 is_escaped: false,
198
199 name_tokens: Vec::with_capacity(3),
200 mail_tokens: Vec::with_capacity(3),
201 comment_tokens: Vec::with_capacity(3),
202
203 state: AddressState::Name,
204 state_stack: Vec::with_capacity(5),
205
206 addresses: Vec::new(),
207 group_name: None,
208 group_comment: None,
209 result: Vec::new(),
210 };
211
212 while let Some(ch) = self.next() {
213 match ch {
214 b'\n' => {
215 parser.add_token(self, false);
216 if self.try_next_is_space() {
217 if !parser.is_token_start {
218 parser.is_token_start = true;
219 }
220 continue;
221 } else {
222 break;
223 }
224 }
225 b'\\' if parser.state != AddressState::Name && !parser.is_escaped => {
226 if parser.token_start > 0 {
227 if parser.state == AddressState::Quote {
228 parser.token_end = self.offset() - 1;
229 }
230 parser.add_token(self, false);
231 }
232 parser.is_escaped = true;
233 continue;
234 }
235 b',' if parser.state == AddressState::Name => {
236 parser.add_token(self, false);
237 parser.add_address();
238 continue;
239 }
240 b'<' if parser.state == AddressState::Name => {
241 parser.is_token_email = false;
242 parser.add_token(self, false);
243 parser.state_stack.push(AddressState::Name);
244 parser.state = AddressState::Address;
245 continue;
246 }
247 b'>' if parser.state == AddressState::Address => {
248 parser.add_token(self, false);
249 parser.state = parser.state_stack.pop().unwrap();
250 continue;
251 }
252 b'"' if !parser.is_escaped => match parser.state {
253 AddressState::Name => {
254 parser.state_stack.push(AddressState::Name);
255 parser.state = AddressState::Quote;
256 parser.add_token(self, false);
257 continue;
258 }
259 AddressState::Quote => {
260 parser.add_token(self, false);
261 parser.state = parser.state_stack.pop().unwrap();
262 continue;
263 }
264 _ => (),
265 },
266 b'@' if parser.state == AddressState::Name => {
267 parser.is_token_email = true;
268 }
269 b'=' if parser.is_token_start && !parser.is_escaped && self.peek_char(b'?') => {
270 self.checkpoint();
271 if let Some(token) = self.decode_rfc2047() {
272 let add_space = parser.state != AddressState::Quote; parser.add_token(self, add_space);
274 (if parser.state != AddressState::Comment {
275 &mut parser.name_tokens
276 } else {
277 &mut parser.comment_tokens
278 })
279 .push(token.into());
280 continue;
281 }
282 self.restore();
283 }
284 b' ' | b'\t' => {
285 if !parser.is_token_start {
286 parser.is_token_start = true;
287 }
288 if parser.is_escaped {
289 parser.is_escaped = false;
290 }
291 if parser.state == AddressState::Quote {
292 if parser.token_start == 0 {
293 parser.token_start = self.offset();
294 parser.token_end = parser.token_start;
295 } else {
296 parser.token_end = self.offset();
297 }
298 }
299 continue;
300 }
301 b'\r' => continue,
302 b'(' if parser.state != AddressState::Quote && !parser.is_escaped => {
303 parser.state_stack.push(parser.state);
304 if parser.state != AddressState::Comment {
305 parser.add_token(self, false);
306 parser.state = AddressState::Comment;
307 continue;
308 }
309 }
310 b')' if parser.state == AddressState::Comment && !parser.is_escaped => {
311 let new_state = parser.state_stack.pop().unwrap();
312 if parser.state != new_state {
313 parser.add_token(self, false);
314 parser.state = new_state;
315 continue;
316 }
317 }
318 b':' if parser.state == AddressState::Name && !parser.is_escaped => {
319 parser.add_group();
320 parser.add_token(self, false);
321 parser.add_group_details();
322 continue;
323 }
324 b';' if parser.state == AddressState::Name => {
325 parser.add_token(self, false);
326 parser.add_address();
327 parser.add_group();
328 continue;
329 }
330 _ => (),
331 }
332
333 if parser.is_escaped {
334 parser.is_escaped = false;
335 }
336
337 if parser.is_token_start {
338 parser.is_token_start = false;
339 }
340
341 if parser.token_start == 0 {
342 parser.token_start = self.offset();
343 parser.token_end = parser.token_start;
344 } else {
345 parser.token_end = self.offset();
346 }
347 }
348
349 parser.add_address();
350
351 if parser.group_name.is_some() || !parser.result.is_empty() {
352 parser.add_group();
353 HeaderValue::Address(Address::Group(parser.result))
354 } else if !parser.addresses.is_empty() {
355 HeaderValue::Address(Address::List(parser.addresses))
356 } else {
357 HeaderValue::Empty
358 }
359 }
360}
361
362fn concat_tokens<'x>(tokens: &mut Vec<Cow<'x, str>>) -> Cow<'x, str> {
363 if tokens.len() == 1 {
364 tokens.pop().unwrap()
365 } else {
366 let result = tokens.concat();
367 tokens.clear();
368 result.into()
369 }
370}
371
372pub fn parse_address_local_part(addr: &str) -> Option<&str> {
373 let addr = addr.as_bytes();
374 let mut iter = addr.iter().enumerate();
375 while let Some((pos, &ch)) = iter.next() {
376 if ch == b'@' {
377 return if pos > 0 && iter.next().is_some() {
378 std::str::from_utf8(addr.get(..pos)?).ok()
379 } else {
380 None
381 };
382 } else if !ch.is_ascii() {
383 return None;
384 }
385 }
386
387 None
388}
389
390pub fn parse_address_domain(addr: &str) -> Option<&str> {
391 let addr = addr.as_bytes();
392 for (pos, &ch) in addr.iter().enumerate() {
393 if ch == b'@' {
394 return if pos > 0 && pos + 1 < addr.len() {
395 std::str::from_utf8(addr.get(pos + 1..)?).ok()
396 } else {
397 None
398 };
399 } else if !ch.is_ascii() {
400 return None;
401 }
402 }
403
404 None
405}
406
407pub fn parse_address_user_part(addr: &str) -> Option<&str> {
408 let addr = addr.as_bytes();
409
410 let mut iter = addr.iter().enumerate();
411 while let Some((pos, &ch)) = iter.next() {
412 if ch == b'+' {
413 if pos > 0 {
414 while let Some((_, &ch)) = iter.next() {
415 if ch == b'@' && iter.next().is_some() {
416 return std::str::from_utf8(addr.get(..pos)?).ok();
417 }
418 }
419 }
420 return None;
421 } else if ch == b'@' {
422 return if pos > 0 && iter.next().is_some() {
423 std::str::from_utf8(addr.get(..pos)?).ok()
424 } else {
425 None
426 };
427 } else if !ch.is_ascii() {
428 return None;
429 }
430 }
431
432 None
433}
434
435pub fn parse_address_detail_part(addr: &str) -> Option<&str> {
436 let addr = addr.as_bytes();
437 let mut plus_pos = usize::MAX;
438
439 let mut iter = addr.iter().enumerate();
440 while let Some((pos, &ch)) = iter.next() {
441 if ch == b'+' {
442 plus_pos = pos + 1;
443 } else if ch == b'@' {
444 if plus_pos != usize::MAX && iter.next().is_some() {
445 return std::str::from_utf8(addr.get(plus_pos..pos)?).ok();
446 } else {
447 return None;
448 }
449 } else if !ch.is_ascii() {
450 return None;
451 }
452 }
453
454 None
455}
456
457#[cfg(test)]
458mod tests {
459 use crate::parsers::{fields::load_tests, MessageStream};
460
461 #[test]
462 fn parse_addresses() {
463 for test in load_tests("address.json") {
464 assert_eq!(
465 MessageStream::new(test.header.as_bytes())
466 .parse_address()
467 .unwrap_address(),
468 test.expected,
469 "failed for {:?}",
470 test.header
471 );
472 }
473 }
474}