1use std::{error::Error, fmt, mem::ManuallyDrop, str::Chars};
2
3use super::unicode;
4use super::AddrSpec;
5
6pub const fn is_ascii_control_and_not_htab(chr: char) -> bool {
7 chr.is_ascii_control() && chr != '\t'
8}
9
10pub const fn is_ascii_control_or_space(chr: char) -> bool {
11 chr.is_ascii_control() || chr == ' '
12}
13
14pub const fn is_not_atext(chr: char) -> bool {
15 is_ascii_control_or_space(chr)
16 || matches!(
17 chr,
18 '"' | '(' | ')' | ',' | ':' | '<' | '>' | '@' | '[' | ']' | '\\'
19 )
20}
21
22pub const fn is_not_dtext(chr: char) -> bool {
23 is_ascii_control_or_space(chr) || matches!(chr, '[' | ']' | '\\')
24}
25
26#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
28pub struct ParseError(pub(super) &'static str, pub(super) usize);
29
30impl ParseError {
31 #[inline]
33 pub fn message(&self) -> &'static str {
34 self.0
35 }
36
37 #[inline]
39 pub fn index(&self) -> usize {
40 self.1
41 }
42}
43
44impl fmt::Display for ParseError {
45 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
46 write!(
47 formatter,
48 "parse error at index {}: {}",
49 self.message(),
50 self.index()
51 )
52 }
53}
54
55impl Error for ParseError {}
56
57pub struct Parser<'a> {
58 input: &'a str,
59 iterator: Chars<'a>,
60}
61
62impl<'a> Parser<'a> {
63 #[inline]
64 pub fn new(input: &'a str) -> Parser<'a> {
65 Parser {
66 input,
67 iterator: input.chars(),
68 }
69 }
70
71 pub fn parse(mut self) -> Result<AddrSpec, ParseError> {
72 #[cfg(feature = "white-spaces")]
73 self.parse_cfws()?;
74 let local_part = self.parse_local_part()?;
75 #[cfg(feature = "white-spaces")]
76 self.parse_cfws()?;
77 self.skip_at()?;
78 #[cfg(feature = "white-spaces")]
79 self.parse_cfws()?;
80 #[allow(unused_variables)]
82 let (domain, literal) = self.parse_domain()?;
83 #[cfg(feature = "white-spaces")]
84 self.parse_cfws()?;
85 self.check_end("expected end of address")?;
86 Ok(AddrSpec {
87 local_part,
88 domain,
89 #[cfg(feature = "literals")]
90 literal,
91 })
92 }
93
94 #[cfg(feature = "white-spaces")]
95 fn parse_cfws(&mut self) -> Result<(), ParseError> {
96 self.skip_fws();
97 #[cfg(feature = "comments")]
98 while self.eat_chr('(') {
99 self.parse_comment()?;
100 self.skip_fws();
101 }
102 Ok(())
103 }
104
105 #[cfg(feature = "white-spaces")]
106 fn skip_fws(&mut self) {
107 self.skip_ws();
108 if !self.eat_str("\r\n") {
109 return;
110 }
111 self.skip_ws();
112 }
113
114 #[cfg(feature = "white-spaces")]
115 fn skip_ws(&mut self) {
116 loop {
117 if !self.eat_slice([' ', '\t']) {
118 break;
119 }
120 }
121 }
122
123 #[cfg(feature = "white-spaces")]
124 fn eat_slice<const N: usize>(&mut self, pattern: [char; N]) -> bool {
125 if self.iterator.as_str().starts_with(pattern) {
126 self.iterator.next();
127 return true;
128 }
129 false
130 }
131
132 #[cfg(feature = "white-spaces")]
133 fn eat_str(&mut self, pattern: &str) -> bool {
134 if let Some(input) = self.iterator.as_str().strip_prefix(pattern) {
135 self.iterator = input.chars();
136 return true;
137 }
138 false
139 }
140
141 fn eat_chr(&mut self, pattern: char) -> bool {
142 if self.iterator.as_str().starts_with(pattern) {
143 self.iterator.next();
144 return true;
145 }
146 false
147 }
148
149 #[cfg(feature = "comments")]
150 fn parse_comment(&mut self) -> Result<(), ParseError> {
151 #[cfg(feature = "white-spaces")]
152 self.skip_fws();
153
154 let mut nest_level = 1usize;
155 while let Some(chr) = self.iterator.next() {
156 match chr {
157 ')' => {
158 if nest_level == 1 {
159 return Ok(());
160 }
161 nest_level -= 1;
162 }
163 '\\' => {
164 self.parse_quoted_pair()?;
165 }
166 '(' => {
167 nest_level += 1;
168 }
169 chr => {
170 if is_ascii_control_or_space(chr) {
171 return Err(self.error("invalid character in comment", -1));
172 }
173 }
174 }
175
176 #[cfg(feature = "white-spaces")]
177 self.skip_fws();
178 }
179
180 Err(self.error("expected ')' for comment", 0))
181 }
182
183 fn parse_quoted_pair(&mut self) -> Result<char, ParseError> {
184 match self.iterator.next() {
185 Some(chr) if !is_ascii_control_and_not_htab(chr) => Ok(chr),
186 Some(_) => Err(self.error("invalid character in quoted pair", -1)),
187 None => Err(self.error("unexpected end of quoted pair", 0)),
188 }
189 }
190
191 fn parse_local_part(&mut self) -> Result<String, ParseError> {
192 if !self.eat_chr('"') {
193 return Ok(unicode::normalize(
194 self.parse_dot_atom("empty label in local part")?,
195 ));
196 }
197 Ok(unicode::normalize(self.parse_quoted_string(
198 "invalid character in quoted local part",
199 "expected '\"' for quoted local part",
200 )?))
201 }
202
203 pub fn parse_dot_atom(
204 &mut self,
205 empty_label_error_text: &'static str,
206 ) -> Result<&str, ParseError> {
207 let input = self.iterator.as_str();
208 let size = input.find(is_not_atext).unwrap_or(input.len());
209
210 let dot_atom = &input[..size];
211 if let Some(offset) = dot_atom
212 .split('.')
213 .find(|label| label.is_empty())
214 .map(|label| label.as_ptr() as usize - dot_atom.as_ptr() as usize)
215 {
216 return Err(self.error(empty_label_error_text, offset as isize));
217 }
218
219 self.iterator = input[size..].chars();
220 Ok(dot_atom)
221 }
222
223 fn parse_quoted_string(
224 &mut self,
225 invalid_character_error_text: &'static str,
226 expected_quote_error_text: &'static str,
227 ) -> Result<String, ParseError> {
228 #[cfg(feature = "white-spaces")]
229 self.skip_fws();
230
231 let mut quoted_string = unsafe { FixedVec::new(self.iterator.as_str().len()) };
232 while let Some(chr) = self.iterator.next() {
233 let chr = match chr {
234 '"' => return Ok(quoted_string.into()),
235 '\\' => self.parse_quoted_pair()?,
236 chr if is_ascii_control_or_space(chr) => {
237 return Err(self.error(invalid_character_error_text, -1))
238 }
239 chr => chr,
240 };
241 unsafe {
242 quoted_string.extend_char_unchecked(chr);
243 }
244
245 #[cfg(feature = "white-spaces")]
246 self.skip_fws();
247 }
248
249 Err(self.error(expected_quote_error_text, 0))
250 }
251
252 fn skip_at(&mut self) -> Result<(), ParseError> {
253 if self.eat_chr('@') {
254 return Ok(());
255 }
256 Err(self.error("expected '@'", 1))
257 }
258
259 fn parse_domain(&mut self) -> Result<(String, bool), ParseError> {
260 #[cfg(feature = "literals")]
261 if self.eat_chr('[') {
262 return Ok((unicode::normalize(self.parse_domain_literal()?), true));
263 }
264 Ok((
265 unicode::normalize(self.parse_dot_atom("empty label in domain")?),
266 false,
267 ))
268 }
269
270 #[cfg(all(feature = "literals", not(feature = "white-spaces")))]
271 fn parse_domain_literal(&mut self) -> Result<&str, ParseError> {
272 let input = self.iterator.as_str();
273 let size = input.find(is_not_dtext).unwrap_or(input.len());
274
275 self.iterator = input[size..].chars();
276 if !self.eat_chr(']') {
277 return Err(self.error("expected ']' for domain literal", 0));
278 }
279
280 Ok(&input[..size])
281 }
282
283 #[cfg(all(feature = "literals", feature = "white-spaces"))]
284 fn parse_domain_literal(&mut self) -> Result<String, ParseError> {
285 #[cfg(feature = "white-spaces")]
286 self.skip_fws();
287
288 let mut domain = unsafe { FixedVec::new(self.iterator.as_str().len()) };
289 while let Some(chr) = self.iterator.next() {
290 let chr = match chr {
291 ']' => return Ok(domain.into()),
292 chr if is_not_dtext(chr) => {
293 return Err(self.error("invalid character in literal domain", -1))
294 }
295 chr => chr,
296 };
297 unsafe {
298 domain.extend_char_unchecked(chr);
299 }
300
301 #[cfg(feature = "white-spaces")]
302 self.skip_fws();
303 }
304
305 Err(self.error("expected ']' for domain literal", 0))
306 }
307
308 #[inline]
309 pub fn check_end(self, message: &'static str) -> Result<(), ParseError> {
310 if self.iterator.as_str().is_empty() {
311 return Ok(());
312 }
313 Err(self.error(message, 0))
314 }
315
316 fn error(&self, message: &'static str, offset: isize) -> ParseError {
317 ParseError(
318 message,
319 (self.input.len() - self.iterator.as_str().len())
320 .checked_add_signed(offset)
321 .unwrap(),
322 )
323 }
324}
325
326pub struct FixedVec<T> {
327 ptr: *mut T,
328 len: usize,
329 cap: usize,
330}
331
332impl<T> FixedVec<T> {
333 pub unsafe fn new(cap: usize) -> Self {
334 Self {
335 ptr: unsafe { std::alloc::alloc(std::alloc::Layout::array::<T>(cap).unwrap()).cast() },
336 len: 0,
337 cap,
338 }
339 }
340
341 unsafe fn extend_unchecked(&mut self, slice: &[T]) {
342 unsafe {
343 std::ptr::copy_nonoverlapping(slice.as_ptr(), self.ptr.add(self.len), slice.len());
344 }
345 self.len += slice.len();
346 debug_assert!(self.len <= self.cap);
347 }
348}
349
350impl FixedVec<u8> {
351 unsafe fn extend_char_unchecked(&mut self, chr: char) {
352 self.extend_unchecked(chr.encode_utf8(&mut [0; 4]).as_bytes())
353 }
354}
355
356impl<T> Drop for FixedVec<T> {
357 fn drop(&mut self) {
358 unsafe {
359 std::alloc::dealloc(
360 self.ptr.cast(),
361 std::alloc::Layout::array::<T>(self.cap).unwrap(),
362 )
363 }
364 }
365}
366
367impl From<FixedVec<u8>> for String {
368 fn from(val: FixedVec<u8>) -> Self {
369 let val = ManuallyDrop::new(val);
370 unsafe { String::from_raw_parts(val.ptr, val.len, val.cap) }
371 }
372}
373
374#[cfg(test)]
375mod tests {
376 mod dot_atoms {
377 use super::super::{ParseError, Parser};
378
379 #[test]
380 fn test_parse_local_part() {
381 assert_eq!(&Parser::new("test").parse_local_part().unwrap(), "test")
382 }
383
384 #[test]
385 fn test_parse_empty_local_part() {
386 assert_eq!(
387 Parser::new("").parse_local_part().unwrap_err(),
388 ParseError("empty label in local part", 0)
389 )
390 }
391
392 #[test]
393 fn test_parse_local_part_with_empty_label_in_front() {
394 assert_eq!(
395 Parser::new(".test").parse_local_part().unwrap_err(),
396 ParseError("empty label in local part", 0)
397 )
398 }
399
400 #[test]
401 fn test_parse_local_part_with_empty_label_in_middle() {
402 assert_eq!(
403 Parser::new("te..st").parse_local_part().unwrap_err(),
404 ParseError("empty label in local part", 3)
405 )
406 }
407
408 #[test]
409 fn test_parse_local_part_with_empty_label_in_back() {
410 assert_eq!(
411 Parser::new("test.").parse_local_part().unwrap_err(),
412 ParseError("empty label in local part", 5)
413 )
414 }
415
416 #[test]
417 fn test_parse_domain() {
418 assert_eq!(
419 Parser::new("test").parse_domain().unwrap(),
420 ("test".to_string(), false)
421 )
422 }
423
424 #[test]
425 fn test_parse_empty_domain() {
426 assert_eq!(
427 Parser::new("").parse_domain().unwrap_err(),
428 ParseError("empty label in domain", 0)
429 )
430 }
431
432 #[test]
433 fn test_parse_domain_with_empty_label_in_front() {
434 assert_eq!(
435 Parser::new(".test").parse_domain().unwrap_err(),
436 ParseError("empty label in domain", 0)
437 )
438 }
439
440 #[test]
441 fn test_parse_domain_with_empty_label_in_middle() {
442 assert_eq!(
443 Parser::new("te..st").parse_domain().unwrap_err(),
444 ParseError("empty label in domain", 3)
445 )
446 }
447
448 #[test]
449 fn test_parse_domain_with_empty_label_in_back() {
450 assert_eq!(
451 Parser::new("test.").parse_domain().unwrap_err(),
452 ParseError("empty label in domain", 5)
453 )
454 }
455 }
456
457 #[cfg(feature = "literals")]
458 mod literals {
459 use super::super::{ParseError, Parser};
460
461 #[test]
462 fn test_parse_literal_domain() {
463 assert_eq!(
464 Parser::new("[test]").parse_domain().unwrap(),
465 ("test".to_string(), true)
466 )
467 }
468
469 #[test]
470 fn test_parse_literal_domain_without_bracket() {
471 assert_eq!(
472 Parser::new("[test").parse_domain().unwrap_err(),
473 ParseError("expected ']' for domain literal", 5)
474 )
475 }
476
477 #[test]
478 fn test_parse_empty_literal_domain() {
479 assert_eq!(
480 Parser::new("[]").parse_domain().unwrap(),
481 ("".to_string(), true)
482 )
483 }
484
485 #[test]
486 fn test_parse_empty_literal_domain_without_bracket() {
487 assert_eq!(
488 Parser::new("[").parse_domain().unwrap_err(),
489 ParseError("expected ']' for domain literal", 1)
490 )
491 }
492
493 #[cfg(not(feature = "white-spaces"))]
494 #[test]
495 fn test_parse_literal_domain_with_white_spaces() {
496 assert_eq!(
497 Parser::new("[te st]").parse_domain().unwrap_err(),
498 ParseError("expected ']' for domain literal", 3)
499 )
500 }
501
502 #[cfg(feature = "white-spaces")]
503 #[test]
504 fn test_parse_literal_domain_with_white_spaces() {
505 assert_eq!(
506 Parser::new("[te st]").parse_domain().unwrap(),
507 ("test".to_string(), true)
508 )
509 }
510
511 #[cfg(feature = "white-spaces")]
512 #[test]
513 fn test_parse_literal_domain_with_fws_in_front() {
514 assert_eq!(
515 Parser::new("[\r\ntest]").parse_domain().unwrap(),
516 ("test".to_string(), true)
517 )
518 }
519
520 #[cfg(feature = "white-spaces")]
521 #[test]
522 fn test_parse_literal_domain_with_fws_in_middle() {
523 assert_eq!(
524 Parser::new("[te\r\nst]").parse_domain().unwrap(),
525 ("test".to_string(), true)
526 )
527 }
528
529 #[cfg(feature = "white-spaces")]
530 #[test]
531 fn test_parse_literal_domain_with_fws_in_back() {
532 assert_eq!(
533 Parser::new("[test\r\n]").parse_domain().unwrap(),
534 ("test".to_string(), true)
535 )
536 }
537 }
538}