mail_parser/parsers/fields/
address.rs1use std::borrow::Cow;
8
9use crate::{parsers::MessageStream, Addr, Address, Group, HeaderValue};
10
11#[derive(PartialEq, Clone, Copy, Debug)]
12enum AddressState {
13 Address,
14 Name,
15 Quote,
16 Comment,
17}
18
19pub struct AddressParser<'x> {
20 token_start: usize,
21 token_end: usize,
22
23 is_token_email: bool,
24 is_token_start: bool,
25 is_escaped: bool,
26
27 name_tokens: Vec<Cow<'x, str>>,
28 mail_tokens: Vec<Cow<'x, str>>,
29 comment_tokens: Vec<Cow<'x, str>>,
30
31 state: AddressState,
32 state_stack: Vec<AddressState>,
33
34 addresses: Vec<Addr<'x>>,
35 group_name: Option<Cow<'x, str>>,
36 group_comment: Option<Cow<'x, str>>,
37 result: Vec<Group<'x>>,
38}
39
40impl<'x> AddressParser<'x> {
41 pub fn add_token(&mut self, stream: &MessageStream<'x>, add_trail_space: bool) {
42 if self.token_start > 0 {
43 let token = String::from_utf8_lossy(&stream.data[self.token_start - 1..self.token_end]);
44 let mut add_space = false;
45 let list = match self.state {
46 AddressState::Address => &mut self.mail_tokens,
47 AddressState::Name => {
48 if self.is_token_email {
49 &mut self.mail_tokens
50 } else {
51 add_space = true;
52 &mut self.name_tokens
53 }
54 }
55 AddressState::Quote => &mut self.name_tokens,
56 AddressState::Comment => {
57 add_space = true;
58 &mut self.comment_tokens
59 }
60 };
61
62 if add_space && !list.is_empty() {
63 list.push(" ".into());
64 }
65
66 list.push(token);
67
68 if add_trail_space {
69 list.push(" ".into());
70 }
71
72 self.token_start = 0;
73 self.is_token_email = false;
74 self.is_token_start = true;
75 self.is_escaped = false;
76 }
77 }
78
79 pub fn add_address(&mut self) {
80 let has_mail = !self.mail_tokens.is_empty();
81 let has_name = !self.name_tokens.is_empty();
82 let has_comment = !self.comment_tokens.is_empty();
83
84 self.addresses.push(if has_mail && has_name && has_comment {
85 Addr {
86 name: Some(
87 format!(
88 "{} ({})",
89 concat_tokens(&mut self.name_tokens),
90 concat_tokens(&mut self.comment_tokens)
91 )
92 .into(),
93 ),
94 address: concat_tokens(&mut self.mail_tokens).into(),
95 }
96 } else if has_name && has_mail {
97 Addr {
98 name: concat_tokens(&mut self.name_tokens).into(),
99 address: concat_tokens(&mut self.mail_tokens).into(),
100 }
101 } else if has_mail && has_comment {
102 Addr {
103 name: concat_tokens(&mut self.comment_tokens).into(),
104 address: concat_tokens(&mut self.mail_tokens).into(),
105 }
106 } else if has_mail {
107 Addr {
108 name: None,
109 address: concat_tokens(&mut self.mail_tokens).into(),
110 }
111 } else if has_name && has_comment {
112 Addr {
113 name: concat_tokens(&mut self.comment_tokens).into(),
114 address: concat_tokens(&mut self.name_tokens).into(),
115 }
116 } else if has_name {
117 Addr {
118 name: concat_tokens(&mut self.name_tokens).into(),
119 address: None,
120 }
121 } else if has_comment {
122 Addr {
123 name: concat_tokens(&mut self.comment_tokens).into(),
124 address: None,
125 }
126 } else {
127 return;
128 });
129 }
130
131 pub fn add_group_details(&mut self) {
132 if !self.name_tokens.is_empty() {
133 self.group_name = concat_tokens(&mut self.name_tokens).into();
134 }
135
136 if !self.comment_tokens.is_empty() {
137 self.group_comment = concat_tokens(&mut self.comment_tokens).into();
138 }
139
140 if !self.mail_tokens.is_empty() {
141 if self.group_name.is_none() {
142 self.group_name = concat_tokens(&mut self.mail_tokens).into();
143 } else {
144 self.group_name = Some(
145 (self.group_name.as_ref().unwrap().as_ref().to_owned()
146 + " "
147 + concat_tokens(&mut self.mail_tokens).as_ref())
148 .into(),
149 );
150 }
151 }
152 }
153
154 pub fn add_group(&mut self) {
155 let has_name = self.group_name.is_some();
156 let has_comment = self.group_comment.is_some();
157 let has_addresses = !self.addresses.is_empty();
158
159 self.result
160 .push(if has_name && has_addresses && has_comment {
161 Group {
162 name: Some(
163 format!(
164 "{} ({})",
165 self.group_name.take().unwrap(),
166 self.group_comment.take().unwrap()
167 )
168 .into(),
169 ),
170 addresses: std::mem::take(&mut self.addresses),
171 }
172 } else if has_addresses && has_name {
173 Group {
174 name: self.group_name.take(),
175 addresses: std::mem::take(&mut self.addresses),
176 }
177 } else if has_addresses {
178 Group {
179 name: self.group_comment.take(),
180 addresses: std::mem::take(&mut self.addresses),
181 }
182 } else if has_name {
183 Group {
184 name: self.group_name.take(),
185 addresses: Vec::new(),
186 }
187 } else {
188 return;
189 });
190 }
191}
192
193impl<'x> MessageStream<'x> {
194 pub fn parse_address(&mut self) -> HeaderValue<'x> {
195 let mut parser = AddressParser {
196 token_start: 0,
197 token_end: 0,
198
199 is_token_email: false,
200 is_token_start: true,
201 is_escaped: false,
202
203 name_tokens: Vec::with_capacity(3),
204 mail_tokens: Vec::with_capacity(3),
205 comment_tokens: Vec::with_capacity(3),
206
207 state: AddressState::Name,
208 state_stack: Vec::with_capacity(5),
209
210 addresses: Vec::new(),
211 group_name: None,
212 group_comment: None,
213 result: Vec::new(),
214 };
215
216 while let Some(ch) = self.next() {
217 match ch {
218 b'\n' => {
219 parser.add_token(self, false);
220 if self.try_next_is_space() {
221 if !parser.is_token_start {
222 parser.is_token_start = true;
223 }
224 continue;
225 } else {
226 break;
227 }
228 }
229 b'\\' if parser.state != AddressState::Name && !parser.is_escaped => {
230 if parser.token_start > 0 {
231 if parser.state == AddressState::Quote {
232 parser.token_end = self.offset() - 1;
233 }
234 parser.add_token(self, false);
235 }
236 parser.is_escaped = true;
237 continue;
238 }
239 b',' if parser.state == AddressState::Name => {
240 parser.add_token(self, false);
241 parser.add_address();
242 continue;
243 }
244 b'<' if parser.state == AddressState::Name => {
245 parser.is_token_email = false;
246 parser.add_token(self, false);
247 parser.state_stack.push(AddressState::Name);
248 parser.state = AddressState::Address;
249 continue;
250 }
251 b'>' if parser.state == AddressState::Address => {
252 parser.add_token(self, false);
253 parser.state = parser.state_stack.pop().unwrap();
254 continue;
255 }
256 b'"' if !parser.is_escaped => match parser.state {
257 AddressState::Name => {
258 parser.state_stack.push(AddressState::Name);
259 parser.state = AddressState::Quote;
260 parser.add_token(self, false);
261 continue;
262 }
263 AddressState::Quote => {
264 parser.add_token(self, false);
265 parser.state = parser.state_stack.pop().unwrap();
266 continue;
267 }
268 _ => (),
269 },
270 b'@' if parser.state == AddressState::Name => {
271 parser.is_token_email = true;
272 }
273 b'=' if parser.is_token_start && !parser.is_escaped && self.peek_char(b'?') => {
274 self.checkpoint();
275 if let Some(token) = self.decode_rfc2047() {
276 let add_space = parser.state != AddressState::Quote; parser.add_token(self, add_space);
278 (if parser.state != AddressState::Comment {
279 &mut parser.name_tokens
280 } else {
281 &mut parser.comment_tokens
282 })
283 .push(token.into());
284 continue;
285 }
286 self.restore();
287 }
288 b' ' | b'\t' => {
289 if !parser.is_token_start {
290 parser.is_token_start = true;
291 }
292 if parser.is_escaped {
293 parser.is_escaped = false;
294 }
295 if parser.state == AddressState::Quote {
296 if parser.token_start == 0 {
297 parser.token_start = self.offset();
298 parser.token_end = parser.token_start;
299 } else {
300 parser.token_end = self.offset();
301 }
302 }
303 continue;
304 }
305 b'\r' => continue,
306 b'(' if parser.state != AddressState::Quote && !parser.is_escaped => {
307 parser.state_stack.push(parser.state);
308 if parser.state != AddressState::Comment {
309 parser.add_token(self, false);
310 parser.state = AddressState::Comment;
311 continue;
312 }
313 }
314 b')' if parser.state == AddressState::Comment && !parser.is_escaped => {
315 let new_state = parser.state_stack.pop().unwrap();
316 if parser.state != new_state {
317 parser.add_token(self, false);
318 parser.state = new_state;
319 continue;
320 }
321 }
322 b':' if parser.state == AddressState::Name && !parser.is_escaped => {
323 parser.add_group();
324 parser.add_token(self, false);
325 parser.add_group_details();
326 continue;
327 }
328 b';' if parser.state == AddressState::Name => {
329 parser.add_token(self, false);
330 parser.add_address();
331 parser.add_group();
332 continue;
333 }
334 _ => (),
335 }
336
337 if parser.is_escaped {
338 parser.is_escaped = false;
339 }
340
341 if parser.is_token_start {
342 parser.is_token_start = false;
343 }
344
345 if parser.token_start == 0 {
346 parser.token_start = self.offset();
347 parser.token_end = parser.token_start;
348 } else {
349 parser.token_end = self.offset();
350 }
351 }
352
353 parser.add_address();
354
355 if parser.group_name.is_some() || !parser.result.is_empty() {
356 parser.add_group();
357 HeaderValue::Address(Address::Group(parser.result))
358 } else if !parser.addresses.is_empty() {
359 HeaderValue::Address(Address::List(parser.addresses))
360 } else {
361 HeaderValue::Empty
362 }
363 }
364}
365
366fn concat_tokens<'x>(tokens: &mut Vec<Cow<'x, str>>) -> Cow<'x, str> {
367 if tokens.len() == 1 {
368 tokens.pop().unwrap()
369 } else {
370 let result = tokens.concat();
371 tokens.clear();
372 result.into()
373 }
374}
375
376pub fn parse_address_local_part(addr: &str) -> Option<&str> {
377 let addr = addr.as_bytes();
378 let mut iter = addr.iter().enumerate();
379 while let Some((pos, &ch)) = iter.next() {
380 if ch == b'@' {
381 return if pos > 0 && iter.next().is_some() {
382 std::str::from_utf8(addr.get(..pos)?).ok()
383 } else {
384 None
385 };
386 } else if !ch.is_ascii() {
387 return None;
388 }
389 }
390
391 None
392}
393
394pub fn parse_address_domain(addr: &str) -> Option<&str> {
395 let addr = addr.as_bytes();
396 for (pos, &ch) in addr.iter().enumerate() {
397 if ch == b'@' {
398 return if pos > 0 && pos + 1 < addr.len() {
399 std::str::from_utf8(addr.get(pos + 1..)?).ok()
400 } else {
401 None
402 };
403 } else if !ch.is_ascii() {
404 return None;
405 }
406 }
407
408 None
409}
410
411pub fn parse_address_user_part(addr: &str) -> Option<&str> {
412 let addr = addr.as_bytes();
413
414 let mut iter = addr.iter().enumerate();
415 while let Some((pos, &ch)) = iter.next() {
416 if ch == b'+' {
417 if pos > 0 {
418 while let Some((_, &ch)) = iter.next() {
419 if ch == b'@' && iter.next().is_some() {
420 return std::str::from_utf8(addr.get(..pos)?).ok();
421 }
422 }
423 }
424 return None;
425 } else if ch == b'@' {
426 return if pos > 0 && iter.next().is_some() {
427 std::str::from_utf8(addr.get(..pos)?).ok()
428 } else {
429 None
430 };
431 } else if !ch.is_ascii() {
432 return None;
433 }
434 }
435
436 None
437}
438
439pub fn parse_address_detail_part(addr: &str) -> Option<&str> {
440 let addr = addr.as_bytes();
441 let mut plus_pos = usize::MAX;
442
443 let mut iter = addr.iter().enumerate();
444 while let Some((pos, &ch)) = iter.next() {
445 if ch == b'+' {
446 plus_pos = pos + 1;
447 } else if ch == b'@' {
448 if plus_pos != usize::MAX && iter.next().is_some() {
449 return std::str::from_utf8(addr.get(plus_pos..pos)?).ok();
450 } else {
451 return None;
452 }
453 } else if !ch.is_ascii() {
454 return None;
455 }
456 }
457
458 None
459}
460
461#[cfg(test)]
462mod tests {
463 use crate::parsers::{fields::load_tests, MessageStream};
464
465 #[test]
466 fn parse_addresses() {
467 for test in load_tests("address.json") {
468 assert_eq!(
469 MessageStream::new(test.header.as_bytes())
470 .parse_address()
471 .unwrap_address(),
472 test.expected,
473 "failed for {:?}",
474 test.header
475 );
476 }
477 }
478}