addr_spec/
parser.rs

1use std::{error::Error, fmt, mem::ManuallyDrop, str::Chars};
2
3use super::unicode;
4use super::AddrSpec;
5
6pub const fn is_ascii_control_and_not_htab(chr: char) -> bool {
7    chr.is_ascii_control() && chr != '\t'
8}
9
10pub const fn is_ascii_control_or_space(chr: char) -> bool {
11    chr.is_ascii_control() || chr == ' '
12}
13
14pub const fn is_not_atext(chr: char) -> bool {
15    is_ascii_control_or_space(chr)
16        || matches!(
17            chr,
18            '"' | '(' | ')' | ',' | ':' | '<' | '>' | '@' | '[' | ']' | '\\'
19        )
20}
21
22pub const fn is_not_dtext(chr: char) -> bool {
23    is_ascii_control_or_space(chr) || matches!(chr, '[' | ']' | '\\')
24}
25
26/// A error that can occur when parsing or creating an address specification.
27#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
28pub struct ParseError(pub(super) &'static str, pub(super) usize);
29
30impl ParseError {
31    /// Returns a static error message.
32    #[inline]
33    pub fn message(&self) -> &'static str {
34        self.0
35    }
36
37    /// Returns the byte index where the error occurred.
38    #[inline]
39    pub fn index(&self) -> usize {
40        self.1
41    }
42}
43
44impl fmt::Display for ParseError {
45    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
46        write!(
47            formatter,
48            "parse error at index {}: {}",
49            self.message(),
50            self.index()
51        )
52    }
53}
54
55impl Error for ParseError {}
56
57pub struct Parser<'a> {
58    input: &'a str,
59    iterator: Chars<'a>,
60}
61
62impl<'a> Parser<'a> {
63    #[inline]
64    pub fn new(input: &'a str) -> Parser<'a> {
65        Parser {
66            input,
67            iterator: input.chars(),
68        }
69    }
70
71    pub fn parse(mut self) -> Result<AddrSpec, ParseError> {
72        #[cfg(feature = "white-spaces")]
73        self.parse_cfws()?;
74        let local_part = self.parse_local_part()?;
75        #[cfg(feature = "white-spaces")]
76        self.parse_cfws()?;
77        self.skip_at()?;
78        #[cfg(feature = "white-spaces")]
79        self.parse_cfws()?;
80        // `literal` only used when feature is enabled
81        #[allow(unused_variables)]
82        let (domain, literal) = self.parse_domain()?;
83        #[cfg(feature = "white-spaces")]
84        self.parse_cfws()?;
85        self.check_end("expected end of address")?;
86        Ok(AddrSpec {
87            local_part,
88            domain,
89            #[cfg(feature = "literals")]
90            literal,
91        })
92    }
93
94    #[cfg(feature = "white-spaces")]
95    fn parse_cfws(&mut self) -> Result<(), ParseError> {
96        self.skip_fws();
97        #[cfg(feature = "comments")]
98        while self.eat_chr('(') {
99            self.parse_comment()?;
100            self.skip_fws();
101        }
102        Ok(())
103    }
104
105    #[cfg(feature = "white-spaces")]
106    fn skip_fws(&mut self) {
107        self.skip_ws();
108        if !self.eat_str("\r\n") {
109            return;
110        }
111        self.skip_ws();
112    }
113
114    #[cfg(feature = "white-spaces")]
115    fn skip_ws(&mut self) {
116        loop {
117            if !self.eat_slice([' ', '\t']) {
118                break;
119            }
120        }
121    }
122
123    #[cfg(feature = "white-spaces")]
124    fn eat_slice<const N: usize>(&mut self, pattern: [char; N]) -> bool {
125        if self.iterator.as_str().starts_with(pattern) {
126            self.iterator.next();
127            return true;
128        }
129        false
130    }
131
132    #[cfg(feature = "white-spaces")]
133    fn eat_str(&mut self, pattern: &str) -> bool {
134        if let Some(input) = self.iterator.as_str().strip_prefix(pattern) {
135            self.iterator = input.chars();
136            return true;
137        }
138        false
139    }
140
141    fn eat_chr(&mut self, pattern: char) -> bool {
142        if self.iterator.as_str().starts_with(pattern) {
143            self.iterator.next();
144            return true;
145        }
146        false
147    }
148
149    #[cfg(feature = "comments")]
150    fn parse_comment(&mut self) -> Result<(), ParseError> {
151        #[cfg(feature = "white-spaces")]
152        self.skip_fws();
153
154        let mut nest_level = 1usize;
155        while let Some(chr) = self.iterator.next() {
156            match chr {
157                ')' => {
158                    if nest_level == 1 {
159                        return Ok(());
160                    }
161                    nest_level -= 1;
162                }
163                '\\' => {
164                    self.parse_quoted_pair()?;
165                }
166                '(' => {
167                    nest_level += 1;
168                }
169                chr => {
170                    if is_ascii_control_or_space(chr) {
171                        return Err(self.error("invalid character in comment", -1));
172                    }
173                }
174            }
175
176            #[cfg(feature = "white-spaces")]
177            self.skip_fws();
178        }
179
180        Err(self.error("expected ')' for comment", 0))
181    }
182
183    fn parse_quoted_pair(&mut self) -> Result<char, ParseError> {
184        match self.iterator.next() {
185            Some(chr) if !is_ascii_control_and_not_htab(chr) => Ok(chr),
186            Some(_) => Err(self.error("invalid character in quoted pair", -1)),
187            None => Err(self.error("unexpected end of quoted pair", 0)),
188        }
189    }
190
191    fn parse_local_part(&mut self) -> Result<String, ParseError> {
192        if !self.eat_chr('"') {
193            return Ok(unicode::normalize(
194                self.parse_dot_atom("empty label in local part")?,
195            ));
196        }
197        Ok(unicode::normalize(self.parse_quoted_string(
198            "invalid character in quoted local part",
199            "expected '\"' for quoted local part",
200        )?))
201    }
202
203    pub fn parse_dot_atom(
204        &mut self,
205        empty_label_error_text: &'static str,
206    ) -> Result<&str, ParseError> {
207        let input = self.iterator.as_str();
208        let size = input.find(is_not_atext).unwrap_or(input.len());
209
210        let dot_atom = &input[..size];
211        if let Some(offset) = dot_atom
212            .split('.')
213            .find(|label| label.is_empty())
214            .map(|label| label.as_ptr() as usize - dot_atom.as_ptr() as usize)
215        {
216            return Err(self.error(empty_label_error_text, offset as isize));
217        }
218
219        self.iterator = input[size..].chars();
220        Ok(dot_atom)
221    }
222
223    fn parse_quoted_string(
224        &mut self,
225        invalid_character_error_text: &'static str,
226        expected_quote_error_text: &'static str,
227    ) -> Result<String, ParseError> {
228        #[cfg(feature = "white-spaces")]
229        self.skip_fws();
230
231        let mut quoted_string = unsafe { FixedVec::new(self.iterator.as_str().len()) };
232        while let Some(chr) = self.iterator.next() {
233            let chr = match chr {
234                '"' => return Ok(quoted_string.into()),
235                '\\' => self.parse_quoted_pair()?,
236                chr if is_ascii_control_or_space(chr) => {
237                    return Err(self.error(invalid_character_error_text, -1))
238                }
239                chr => chr,
240            };
241            unsafe {
242                quoted_string.extend_char_unchecked(chr);
243            }
244
245            #[cfg(feature = "white-spaces")]
246            self.skip_fws();
247        }
248
249        Err(self.error(expected_quote_error_text, 0))
250    }
251
252    fn skip_at(&mut self) -> Result<(), ParseError> {
253        if self.eat_chr('@') {
254            return Ok(());
255        }
256        Err(self.error("expected '@'", 1))
257    }
258
259    fn parse_domain(&mut self) -> Result<(String, bool), ParseError> {
260        #[cfg(feature = "literals")]
261        if self.eat_chr('[') {
262            return Ok((unicode::normalize(self.parse_domain_literal()?), true));
263        }
264        Ok((
265            unicode::normalize(self.parse_dot_atom("empty label in domain")?),
266            false,
267        ))
268    }
269
270    #[cfg(all(feature = "literals", not(feature = "white-spaces")))]
271    fn parse_domain_literal(&mut self) -> Result<&str, ParseError> {
272        let input = self.iterator.as_str();
273        let size = input.find(is_not_dtext).unwrap_or(input.len());
274
275        self.iterator = input[size..].chars();
276        if !self.eat_chr(']') {
277            return Err(self.error("expected ']' for domain literal", 0));
278        }
279
280        Ok(&input[..size])
281    }
282
283    #[cfg(all(feature = "literals", feature = "white-spaces"))]
284    fn parse_domain_literal(&mut self) -> Result<String, ParseError> {
285        #[cfg(feature = "white-spaces")]
286        self.skip_fws();
287
288        let mut domain = unsafe { FixedVec::new(self.iterator.as_str().len()) };
289        while let Some(chr) = self.iterator.next() {
290            let chr = match chr {
291                ']' => return Ok(domain.into()),
292                chr if is_not_dtext(chr) => {
293                    return Err(self.error("invalid character in literal domain", -1))
294                }
295                chr => chr,
296            };
297            unsafe {
298                domain.extend_char_unchecked(chr);
299            }
300
301            #[cfg(feature = "white-spaces")]
302            self.skip_fws();
303        }
304
305        Err(self.error("expected ']' for domain literal", 0))
306    }
307
308    #[inline]
309    pub fn check_end(self, message: &'static str) -> Result<(), ParseError> {
310        if self.iterator.as_str().is_empty() {
311            return Ok(());
312        }
313        Err(self.error(message, 0))
314    }
315
316    fn error(&self, message: &'static str, offset: isize) -> ParseError {
317        ParseError(
318            message,
319            (self.input.len() - self.iterator.as_str().len())
320                .checked_add_signed(offset)
321                .unwrap(),
322        )
323    }
324}
325
326pub struct FixedVec<T> {
327    ptr: *mut T,
328    len: usize,
329    cap: usize,
330}
331
332impl<T> FixedVec<T> {
333    pub unsafe fn new(cap: usize) -> Self {
334        Self {
335            ptr: unsafe { std::alloc::alloc(std::alloc::Layout::array::<T>(cap).unwrap()).cast() },
336            len: 0,
337            cap,
338        }
339    }
340
341    unsafe fn extend_unchecked(&mut self, slice: &[T]) {
342        unsafe {
343            std::ptr::copy_nonoverlapping(slice.as_ptr(), self.ptr.add(self.len), slice.len());
344        }
345        self.len += slice.len();
346        debug_assert!(self.len <= self.cap);
347    }
348}
349
350impl FixedVec<u8> {
351    unsafe fn extend_char_unchecked(&mut self, chr: char) {
352        self.extend_unchecked(chr.encode_utf8(&mut [0; 4]).as_bytes())
353    }
354}
355
356impl<T> Drop for FixedVec<T> {
357    fn drop(&mut self) {
358        unsafe {
359            std::alloc::dealloc(
360                self.ptr.cast(),
361                std::alloc::Layout::array::<T>(self.cap).unwrap(),
362            )
363        }
364    }
365}
366
367impl From<FixedVec<u8>> for String {
368    fn from(val: FixedVec<u8>) -> Self {
369        let val = ManuallyDrop::new(val);
370        unsafe { String::from_raw_parts(val.ptr, val.len, val.cap) }
371    }
372}
373
374#[cfg(test)]
375mod tests {
376    mod dot_atoms {
377        use super::super::{ParseError, Parser};
378
379        #[test]
380        fn test_parse_local_part() {
381            assert_eq!(&Parser::new("test").parse_local_part().unwrap(), "test")
382        }
383
384        #[test]
385        fn test_parse_empty_local_part() {
386            assert_eq!(
387                Parser::new("").parse_local_part().unwrap_err(),
388                ParseError("empty label in local part", 0)
389            )
390        }
391
392        #[test]
393        fn test_parse_local_part_with_empty_label_in_front() {
394            assert_eq!(
395                Parser::new(".test").parse_local_part().unwrap_err(),
396                ParseError("empty label in local part", 0)
397            )
398        }
399
400        #[test]
401        fn test_parse_local_part_with_empty_label_in_middle() {
402            assert_eq!(
403                Parser::new("te..st").parse_local_part().unwrap_err(),
404                ParseError("empty label in local part", 3)
405            )
406        }
407
408        #[test]
409        fn test_parse_local_part_with_empty_label_in_back() {
410            assert_eq!(
411                Parser::new("test.").parse_local_part().unwrap_err(),
412                ParseError("empty label in local part", 5)
413            )
414        }
415
416        #[test]
417        fn test_parse_domain() {
418            assert_eq!(
419                Parser::new("test").parse_domain().unwrap(),
420                ("test".to_string(), false)
421            )
422        }
423
424        #[test]
425        fn test_parse_empty_domain() {
426            assert_eq!(
427                Parser::new("").parse_domain().unwrap_err(),
428                ParseError("empty label in domain", 0)
429            )
430        }
431
432        #[test]
433        fn test_parse_domain_with_empty_label_in_front() {
434            assert_eq!(
435                Parser::new(".test").parse_domain().unwrap_err(),
436                ParseError("empty label in domain", 0)
437            )
438        }
439
440        #[test]
441        fn test_parse_domain_with_empty_label_in_middle() {
442            assert_eq!(
443                Parser::new("te..st").parse_domain().unwrap_err(),
444                ParseError("empty label in domain", 3)
445            )
446        }
447
448        #[test]
449        fn test_parse_domain_with_empty_label_in_back() {
450            assert_eq!(
451                Parser::new("test.").parse_domain().unwrap_err(),
452                ParseError("empty label in domain", 5)
453            )
454        }
455    }
456
457    #[cfg(feature = "literals")]
458    mod literals {
459        use super::super::{ParseError, Parser};
460
461        #[test]
462        fn test_parse_literal_domain() {
463            assert_eq!(
464                Parser::new("[test]").parse_domain().unwrap(),
465                ("test".to_string(), true)
466            )
467        }
468
469        #[test]
470        fn test_parse_literal_domain_without_bracket() {
471            assert_eq!(
472                Parser::new("[test").parse_domain().unwrap_err(),
473                ParseError("expected ']' for domain literal", 5)
474            )
475        }
476
477        #[test]
478        fn test_parse_empty_literal_domain() {
479            assert_eq!(
480                Parser::new("[]").parse_domain().unwrap(),
481                ("".to_string(), true)
482            )
483        }
484
485        #[test]
486        fn test_parse_empty_literal_domain_without_bracket() {
487            assert_eq!(
488                Parser::new("[").parse_domain().unwrap_err(),
489                ParseError("expected ']' for domain literal", 1)
490            )
491        }
492
493        #[cfg(not(feature = "white-spaces"))]
494        #[test]
495        fn test_parse_literal_domain_with_white_spaces() {
496            assert_eq!(
497                Parser::new("[te st]").parse_domain().unwrap_err(),
498                ParseError("expected ']' for domain literal", 3)
499            )
500        }
501
502        #[cfg(feature = "white-spaces")]
503        #[test]
504        fn test_parse_literal_domain_with_white_spaces() {
505            assert_eq!(
506                Parser::new("[te st]").parse_domain().unwrap(),
507                ("test".to_string(), true)
508            )
509        }
510
511        #[cfg(feature = "white-spaces")]
512        #[test]
513        fn test_parse_literal_domain_with_fws_in_front() {
514            assert_eq!(
515                Parser::new("[\r\ntest]").parse_domain().unwrap(),
516                ("test".to_string(), true)
517            )
518        }
519
520        #[cfg(feature = "white-spaces")]
521        #[test]
522        fn test_parse_literal_domain_with_fws_in_middle() {
523            assert_eq!(
524                Parser::new("[te\r\nst]").parse_domain().unwrap(),
525                ("test".to_string(), true)
526            )
527        }
528
529        #[cfg(feature = "white-spaces")]
530        #[test]
531        fn test_parse_literal_domain_with_fws_in_back() {
532            assert_eq!(
533                Parser::new("[test\r\n]").parse_domain().unwrap(),
534                ("test".to_string(), true)
535            )
536        }
537    }
538}