smtp_message/
misc.rs

1use std::{
2    fmt,
3    io::IoSlice,
4    iter,
5    net::{Ipv4Addr, Ipv6Addr},
6    str,
7};
8
9use auto_enums::auto_enum;
10use lazy_static::lazy_static;
11use nom::{
12    branch::alt,
13    bytes::streaming::tag,
14    character::streaming::one_of,
15    combinator::{map, map_opt, opt, peek},
16    multi::separated_list1,
17    sequence::{pair, preceded, terminated},
18    IResult,
19};
20use regex_automata::{Regex, RegexBuilder, DFA};
21
22use crate::*;
23
24lazy_static! {
25    static ref HOSTNAME_ASCII: Regex = RegexBuilder::new().anchored(true).build(
26        r#"(?x)
27            \[IPv6: [:.[:xdigit:]]+ \] |             # Ipv6
28            \[ [.0-9]+ \] |                          # Ipv4
29            [[:alnum:]] ([-[:alnum:]]* [[:alnum:]])? # Ascii-only domain
30                ( \. [[:alnum:]] ([-[:alnum:]]* [[:alnum:]])? )*
31        "#
32    ).unwrap();
33
34    static ref HOSTNAME_UTF8: Regex = RegexBuilder::new().anchored(true).build(
35        r#"([-.[:alnum:]]|[[:^ascii:]])+"#
36    ).unwrap();
37
38    // Note: we have to disable the x flag here so that the # in the
39    // middle of the character class does not get construed as a
40    // comment
41    static ref LOCALPART_ASCII: Regex = RegexBuilder::new().anchored(true).build(
42        r#"(?x)
43            " ( [[:ascii:]&&[^\\"[:cntrl:]]] |       # Quoted-string localpart
44                \\ [[:ascii:]&&[:^cntrl:]] )+ " |
45            (?-x)[a-zA-Z0-9!#$%&'*+-/=?^_`{|}~]+(?x) # Dot-string localpart
46                ( \. (?-x)[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(?x) )*
47        "#
48    ).unwrap();
49
50    // Note: we have to disable the x flag here so that the # in the
51    // middle of the character class does not get construed as a
52    // comment
53    static ref LOCALPART_UTF8: Regex = RegexBuilder::new().anchored(true).build(
54        r#"(?x)
55            " ( [^\\"[:cntrl:]] | \\ [[:^cntrl:]] )+ " |                # Quoted-string localpart
56            ( (?-x)[a-zA-Z0-9!#$%&'*+-/=?^_`{|}~](?x) | [[:^ascii:]] )+ # Dot-string localpart
57                ( \. ( (?-x)[a-zA-Z0-9!#$%&'*+-/=?^_`{|}~](?x) | [[:^ascii:]] )+ )*
58        "#
59    ).unwrap();
60}
61
62// Implementation is similar to regex_automata's, but also returns the state
63// when a match wasn't found
64fn find_dfa<D: DFA>(dfa: &D, buf: &[u8]) -> Result<usize, D::ID> {
65    let mut state = dfa.start_state();
66    let mut last_match = if dfa.is_dead_state(state) {
67        return Err(state);
68    } else if dfa.is_match_state(state) {
69        Some(0)
70    } else {
71        None
72    };
73
74    for (i, &b) in buf.iter().enumerate() {
75        state = unsafe { dfa.next_state_unchecked(state, b) };
76        if dfa.is_match_or_dead_state(state) {
77            if dfa.is_dead_state(state) {
78                return last_match.ok_or(state);
79            }
80            last_match = Some(i + 1);
81        }
82    }
83
84    last_match.ok_or(state)
85}
86
87pub fn apply_regex(regex: &Regex) -> impl '_ + FnMut(&[u8]) -> IResult<&[u8], &[u8]> {
88    move |buf: &[u8]| {
89        let dfa = regex.forward();
90
91        let dfa_result = match dfa {
92            regex_automata::DenseDFA::Standard(r) => find_dfa(r, buf),
93            regex_automata::DenseDFA::ByteClass(r) => find_dfa(r, buf),
94            regex_automata::DenseDFA::Premultiplied(r) => find_dfa(r, buf),
95            regex_automata::DenseDFA::PremultipliedByteClass(r) => find_dfa(r, buf),
96            other => find_dfa(other, buf),
97        };
98
99        match dfa_result {
100            Ok(end) => Ok((&buf[end..], &buf[..end])),
101            Err(s) if dfa.is_dead_state(s) => Err(nom::Err::Error(nom::error::Error::new(
102                buf,
103                nom::error::ErrorKind::Verify,
104            ))),
105            Err(_) => Err(nom::Err::Incomplete(nom::Needed::Unknown)),
106        }
107    }
108}
109
110pub fn terminate<'a, 'b>(term: &'b [u8]) -> impl 'b + FnMut(&'a [u8]) -> IResult<&'a [u8], char>
111where
112    'a: 'b,
113{
114    peek(one_of(term))
115}
116
117#[derive(Clone, Copy, Debug, Eq, PartialEq)]
118pub enum NextCrLfState {
119    Start,
120    CrPassed,
121}
122
123/// Returns the index of the \n in the first \r\n of buf, or `None` if
124/// there was none yet. This will update `state`, the first call
125/// should pass in `NextCrLfState::Start`, and subsequent calls (until
126/// a non-`None` value is found) should just keep using the same
127/// reference.
128pub fn next_crlf(buf: &[u8], state: &mut NextCrLfState) -> Option<usize> {
129    if buf.is_empty() {
130        return None;
131    }
132    if *state == NextCrLfState::CrPassed && buf[0] == b'\n' {
133        return Some(0);
134    }
135    if let Some(p) = buf.windows(2).position(|s| s == b"\r\n") {
136        Some(p + 1)
137    } else {
138        *state = match buf[buf.len() - 1] {
139            b'\r' => NextCrLfState::CrPassed,
140            _ => NextCrLfState::Start,
141        };
142        None
143    }
144}
145
146// TODO: find out an AsciiString type, and use it here (and below)
147#[derive(Clone, Copy, Debug, Eq, PartialEq)]
148#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
149pub enum MaybeUtf8<S = String> {
150    Ascii(S),
151    Utf8(S),
152}
153
154impl MaybeUtf8<&str> {
155    pub fn to_owned(&self) -> MaybeUtf8<String> {
156        match self {
157            MaybeUtf8::Ascii(s) => MaybeUtf8::Ascii(s.to_string()),
158            MaybeUtf8::Utf8(s) => MaybeUtf8::Utf8(s.to_string()),
159        }
160    }
161}
162
163// TODO: make this a trait once returning existentials from trait methods is a
164// thing
165impl<S> MaybeUtf8<S>
166where
167    S: AsRef<str>,
168{
169    #[inline]
170    pub fn as_io_slices(&self) -> impl Iterator<Item = IoSlice> {
171        iter::once(match self {
172            MaybeUtf8::Ascii(s) => IoSlice::new(s.as_ref().as_ref()),
173            MaybeUtf8::Utf8(s) => IoSlice::new(s.as_ref().as_ref()),
174        })
175    }
176
177    #[inline]
178    pub fn as_str(&self) -> &str {
179        match self {
180            MaybeUtf8::Ascii(s) => s.as_ref(),
181            MaybeUtf8::Utf8(s) => s.as_ref(),
182        }
183    }
184}
185
186impl<'a, S> From<&'a str> for MaybeUtf8<S>
187where
188    S: From<&'a str>,
189{
190    #[inline]
191    fn from(s: &'a str) -> MaybeUtf8<S> {
192        if s.is_ascii() {
193            MaybeUtf8::Ascii(s.into())
194        } else {
195            MaybeUtf8::Utf8(s.into())
196        }
197    }
198}
199
200impl<T> MaybeUtf8<T> {
201    pub fn convert<U>(self) -> MaybeUtf8<U>
202    where
203        U: From<T>,
204    {
205        match self {
206            MaybeUtf8::Ascii(s) => MaybeUtf8::Ascii(s.into()),
207            MaybeUtf8::Utf8(s) => MaybeUtf8::Utf8(s.into()),
208        }
209    }
210}
211
212// TODO: Ideally the ipv6 and ipv4 variants would be parsed in the single regex
213// pass. However, that's hard to do, so let's just not do it for now and keep it
214// as an optimization. So for now, it's just as well to return the parsed IPs,
215// but some day they will probably be removed
216/// Note: comparison happens only on the `raw` field, meaning that if you modify
217/// or create a `Hostname` yourself it could have surprising results. But such a
218/// `Hostname` would then not actually represent a real hostname, so you
219/// probably would have had surprising results anyway.
220#[derive(Clone, Debug, Eq)]
221#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
222pub enum Hostname<S = String> {
223    Utf8Domain { raw: S, punycode: String },
224    AsciiDomain { raw: S },
225    Ipv6 { raw: S, ip: Ipv6Addr },
226    Ipv4 { raw: S, ip: Ipv4Addr },
227}
228
229macro_rules! parse_hostname {
230    (expected_length: $len:expr) => {
231        parse_hostname!(
232            @unsafe_impl,
233            apply_regex(&HOSTNAME_ASCII),
234            apply_regex(&HOSTNAME_UTF8),
235            |b: &[u8]| $len == b.len(),
236        )
237    };
238
239    (terminator: $until_term:ident) => {
240        parse_hostname!(
241            @unsafe_impl,
242            terminated(apply_regex(&HOSTNAME_ASCII), terminate($until_term)),
243            terminated(apply_regex(&HOSTNAME_UTF8), terminate($until_term)),
244            |_| true,
245        )
246    };
247
248    (@unsafe_impl, $ascii_regex:expr, $utf8_regex:expr, $check:expr,) => {
249        alt((
250            map_opt(
251                $ascii_regex,
252                |b: &[u8]| {
253                    if !$check(b) {
254                        return None;
255                    }
256
257                    // The three below unsafe are OK, thanks to our
258                    // regex validating that `b` is proper ascii
259                    // (and thus utf-8)
260                    let s = unsafe { str::from_utf8_unchecked(b) };
261
262                    if b[0] != b'[' {
263                        Some(Hostname::AsciiDomain { raw: s.into() })
264                    } else if b[1] == b'I' {
265                        let ip = unsafe { str::from_utf8_unchecked(&b[6..b.len() - 1]) };
266                        let ip = ip.parse::<Ipv6Addr>().ok()?;
267
268                        Some(Hostname::Ipv6 { raw: s.into(), ip })
269                    } else {
270                        let ip = unsafe { str::from_utf8_unchecked(&b[1..b.len() - 1]) };
271                        let ip = ip.parse::<Ipv4Addr>().ok()?;
272
273                        Some(Hostname::Ipv4 { raw: s.into(), ip })
274                    }
275                },
276            ),
277            map_opt(
278                $utf8_regex,
279                |b: &[u8]| {
280                    if !$check(b) {
281                        return None;
282                    }
283
284                    // The below unsafe is OK, thanks to our regex
285                    // never disabling the `u` flag and thus
286                    // validating that the match is proper utf-8
287                    let raw = unsafe { str::from_utf8_unchecked(b) };
288
289                    // TODO: looks like idna exposes only an
290                    // allocating method for validating an IDNA domain
291                    // name. Maybe it'd be possible to get them to
292                    // expose a validation-only function? Or maybe
293                    // not.
294                    let punycode = idna::Config::default()
295                        .use_std3_ascii_rules(true)
296                        .verify_dns_length(true)
297                        .check_hyphens(true)
298                        .to_ascii(raw)
299                        .ok()?;
300
301                    Some(Hostname::Utf8Domain {
302                        raw: raw.into(),
303                        punycode,
304                    })
305                },
306            ),
307        ))
308    };
309}
310
311impl<S> Hostname<S> {
312    pub fn parse<'a, 'b>(data: &'a [u8]) -> IResult<&'a [u8], Hostname<S>>
313    where
314        'a: 'b,
315        S: 'b + From<&'a str>,
316    {
317        parse_hostname!(expected_length: data.len())(data)
318    }
319
320    pub fn parse_until<'a, 'b>(
321        term: &'b [u8],
322    ) -> impl 'b + FnMut(&'a [u8]) -> IResult<&'a [u8], Hostname<S>>
323    where
324        'a: 'b,
325        S: 'b + From<&'a str>,
326    {
327        parse_hostname!(terminator: term)
328    }
329}
330
331impl<S> Hostname<S> {
332    #[inline]
333    pub fn raw(&self) -> &S {
334        match self {
335            Hostname::Utf8Domain { raw, .. } => raw,
336            Hostname::AsciiDomain { raw, .. } => raw,
337            Hostname::Ipv4 { raw, .. } => raw,
338            Hostname::Ipv6 { raw, .. } => raw,
339        }
340    }
341}
342
343impl<S> Hostname<S>
344where
345    S: AsRef<str>,
346{
347    #[inline]
348    pub fn as_io_slices(&self) -> impl Iterator<Item = IoSlice> {
349        iter::once(IoSlice::new(self.raw().as_ref().as_ref()))
350    }
351
352    #[inline]
353    pub fn to_ref(&self) -> Hostname<&str> {
354        match self {
355            Hostname::Utf8Domain { raw, punycode } => Hostname::Utf8Domain {
356                raw: raw.as_ref(),
357                punycode: punycode.clone(),
358            },
359            Hostname::AsciiDomain { raw } => Hostname::AsciiDomain { raw: raw.as_ref() },
360            Hostname::Ipv4 { raw, ip } => Hostname::Ipv4 {
361                raw: raw.as_ref(),
362                ip: *ip,
363            },
364            Hostname::Ipv6 { raw, ip } => Hostname::Ipv6 {
365                raw: raw.as_ref(),
366                ip: *ip,
367            },
368        }
369    }
370}
371
372impl<S: AsRef<str>> fmt::Display for Hostname<S> {
373    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
374        write!(f, "{}", self.raw().as_ref())
375    }
376}
377
378impl<S: PartialEq> std::cmp::PartialEq for Hostname<S> {
379    fn eq(&self, o: &Hostname<S>) -> bool {
380        self.raw() == o.raw()
381    }
382}
383
384impl<S: std::hash::Hash> std::hash::Hash for Hostname<S> {
385    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
386        self.raw().hash(state)
387    }
388}
389
390#[cfg(test)]
391impl<S: Eq + PartialEq> Hostname<S> {
392    fn deep_equal(&self, o: &Hostname<S>) -> bool {
393        match self {
394            Hostname::Utf8Domain { raw, punycode } => match o {
395                Hostname::Utf8Domain {
396                    raw: raw2,
397                    punycode: punycode2,
398                } => raw == raw2 && punycode == punycode2,
399                _ => false,
400            },
401            Hostname::AsciiDomain { raw } => match o {
402                Hostname::AsciiDomain { raw: raw2 } => raw == raw2,
403                _ => false,
404            },
405            Hostname::Ipv4 { raw, ip } => match o {
406                Hostname::Ipv4 { raw: raw2, ip: ip2 } => raw == raw2 && ip == ip2,
407                _ => false,
408            },
409            Hostname::Ipv6 { raw, ip } => match o {
410                Hostname::Ipv6 { raw: raw2, ip: ip2 } => raw == raw2 && ip == ip2,
411                _ => false,
412            },
413        }
414    }
415}
416
417impl Hostname<&str> {
418    pub fn into_owned(self) -> Hostname<String> {
419        match self {
420            Hostname::Utf8Domain { raw, punycode } => Hostname::Utf8Domain {
421                raw: (*raw).to_owned(),
422                punycode,
423            },
424            Hostname::AsciiDomain { raw } => Hostname::AsciiDomain {
425                raw: (*raw).to_owned(),
426            },
427            Hostname::Ipv4 { raw, ip } => Hostname::Ipv4 {
428                raw: (*raw).to_owned(),
429                ip,
430            },
431            Hostname::Ipv6 { raw, ip } => Hostname::Ipv6 {
432                raw: (*raw).to_owned(),
433                ip,
434            },
435        }
436    }
437}
438
439// TODO: consider adding `Sane` variant like OpenSMTPD does, that would not be
440// matched by weird characters
441#[derive(Clone, Copy, Debug, Eq)]
442#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
443pub enum Localpart<S = String> {
444    Ascii { raw: S },
445    QuotedAscii { raw: S },
446    Utf8 { raw: S },
447    QuotedUtf8 { raw: S },
448}
449
450impl<S> Localpart<S> {
451    pub fn parse_until<'a, 'b>(
452        term: &'b [u8],
453    ) -> impl 'b + FnMut(&'a [u8]) -> IResult<&'a [u8], Localpart<S>>
454    where
455        'a: 'b,
456        S: 'b + From<&'a str>,
457    {
458        alt((
459            map(
460                terminated(apply_regex(&LOCALPART_ASCII), terminate(term)),
461                |b: &[u8]| {
462                    // The below unsafe is OK, thanks to our regex
463                    // validating that `b` is proper ascii (and thus
464                    // utf-8)
465                    let s = unsafe { str::from_utf8_unchecked(b) };
466
467                    if b[0] != b'"' {
468                        Localpart::Ascii { raw: s.into() }
469                    } else {
470                        Localpart::QuotedAscii { raw: s.into() }
471                    }
472                },
473            ),
474            map(
475                terminated(apply_regex(&LOCALPART_UTF8), terminate(term)),
476                |b: &[u8]| {
477                    // The below unsafe is OK, thanks to our regex
478                    // validating that `b` is proper utf-8 by never disabling the `u` flag
479                    let s = unsafe { str::from_utf8_unchecked(b) };
480
481                    if b[0] != b'"' {
482                        Localpart::Utf8 { raw: s.into() }
483                    } else {
484                        Localpart::QuotedUtf8 { raw: s.into() }
485                    }
486                },
487            ),
488        ))
489    }
490}
491
492impl<S> Localpart<S> {
493    #[inline]
494    pub fn raw(&self) -> &S {
495        match self {
496            Localpart::Ascii { raw } => raw,
497            Localpart::QuotedAscii { raw } => raw,
498            Localpart::Utf8 { raw } => raw,
499            Localpart::QuotedUtf8 { raw } => raw,
500        }
501    }
502}
503
504impl<S> Localpart<S>
505where
506    S: AsRef<str>,
507{
508    #[inline]
509    pub fn as_io_slices(&self) -> impl Iterator<Item = IoSlice> {
510        iter::once(IoSlice::new(self.raw().as_ref().as_ref()))
511    }
512}
513
514impl<S: AsRef<str>> fmt::Display for Localpart<S> {
515    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
516        write!(f, "{}", self.raw().as_ref())
517    }
518}
519
520impl<S: PartialEq> std::cmp::PartialEq for Localpart<S> {
521    fn eq(&self, o: &Localpart<S>) -> bool {
522        self.raw() == o.raw()
523    }
524}
525
526fn unquoted<S>(s: &S) -> String
527where
528    S: AsRef<str>,
529{
530    #[derive(Clone, Copy)]
531    enum State {
532        Start,
533        Backslash,
534    }
535
536    s.as_ref()
537        .chars()
538        .skip(1)
539        .scan(State::Start, |state, x| match (*state, x) {
540            (State::Backslash, _) => {
541                *state = State::Start;
542                Some(Some(x))
543            }
544            (State::Start, '"') => Some(None),
545            (_, '\\') => {
546                *state = State::Backslash;
547                Some(None)
548            }
549            (_, _) => {
550                *state = State::Start;
551                Some(Some(x))
552            }
553        })
554        .flatten()
555        .collect()
556}
557
558impl<S> Localpart<S>
559where
560    S: AsRef<str>,
561{
562    #[inline]
563    pub fn unquote(&self) -> MaybeUtf8<String> {
564        match self {
565            Localpart::Ascii { raw } => MaybeUtf8::Ascii(raw.as_ref().to_owned()),
566            Localpart::Utf8 { raw } => MaybeUtf8::Utf8(raw.as_ref().to_owned()),
567            Localpart::QuotedAscii { raw } => MaybeUtf8::Ascii(unquoted(raw)),
568            Localpart::QuotedUtf8 { raw } => MaybeUtf8::Utf8(unquoted(raw)),
569        }
570    }
571
572    #[inline]
573    pub fn to_ref(&self) -> Localpart<&str> {
574        match self {
575            Localpart::Ascii { raw } => Localpart::Ascii { raw: raw.as_ref() },
576            Localpart::Utf8 { raw } => Localpart::Utf8 { raw: raw.as_ref() },
577            Localpart::QuotedAscii { raw } => Localpart::QuotedAscii { raw: raw.as_ref() },
578            Localpart::QuotedUtf8 { raw } => Localpart::QuotedUtf8 { raw: raw.as_ref() },
579        }
580    }
581}
582
583impl Localpart<&str> {
584    pub fn to_owned(&self) -> Localpart<String> {
585        match self {
586            Localpart::Ascii { raw } => Localpart::Ascii {
587                raw: (*raw).to_owned(),
588            },
589            Localpart::Utf8 { raw } => Localpart::Utf8 {
590                raw: (*raw).to_owned(),
591            },
592            Localpart::QuotedAscii { raw } => Localpart::QuotedAscii {
593                raw: (*raw).to_owned(),
594            },
595            Localpart::QuotedUtf8 { raw } => Localpart::QuotedUtf8 {
596                raw: (*raw).to_owned(),
597            },
598        }
599    }
600}
601
602#[derive(Clone, Debug, Eq, PartialEq)]
603#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
604// TODO: this can be serialized *much* better than the default serde serializers
605pub struct Email<S = String> {
606    pub localpart: Localpart<S>,
607    pub hostname: Option<Hostname<S>>,
608}
609
610impl<S> Email<S> {
611    /// term_with_atsign must be term + b"@"
612    #[inline]
613    pub fn parse_until<'a, 'b>(
614        term: &'b [u8],
615        term_with_atsign: &'b [u8],
616    ) -> impl 'b + FnMut(&'a [u8]) -> IResult<&'a [u8], Email<S>>
617    where
618        'a: 'b,
619        S: 'b + From<&'a str>,
620    {
621        map(
622            pair(
623                Localpart::parse_until(term_with_atsign),
624                opt(preceded(tag(b"@"), Hostname::parse_until(term))),
625            ),
626            |(localpart, hostname)| Email {
627                localpart,
628                hostname,
629            },
630        )
631    }
632
633    // TODO: test parse_bracketed?
634    #[inline]
635    pub fn parse_bracketed<'a>(
636        buf: &'a [u8],
637    ) -> Result<Email<S>, nom::Err<nom::error::Error<&'a [u8]>>>
638    where
639        S: From<&'a str>,
640    {
641        match preceded(
642            tag(b"<"),
643            terminated(Email::parse_until(b">", b"@>"), tag(b">")),
644        )(buf)
645        {
646            Err(e) => Err(e),
647            Ok((&[], r)) => Ok(r),
648            Ok((rem, _)) => Err(nom::Err::Failure(nom::error::Error::new(
649                rem,
650                nom::error::ErrorKind::TooLarge,
651            ))),
652        }
653    }
654}
655
656impl<S> Email<S>
657where
658    S: AsRef<str>,
659{
660    #[inline]
661    #[auto_enum]
662    pub fn as_io_slices(&self) -> impl Iterator<Item = IoSlice> {
663        #[auto_enum(Iterator)]
664        let hostname = match self.hostname {
665            Some(ref hostname) => iter::once(IoSlice::new(b"@")).chain(hostname.as_io_slices()),
666            None => iter::empty(),
667        };
668        self.localpart.as_io_slices().chain(hostname)
669    }
670
671    #[inline]
672    pub fn to_ref(&self) -> Email<&str> {
673        // TODO: figure out why self.hostname.map(|h| h.to_ref()) doesn't compile
674        match self.hostname {
675            None => Email {
676                localpart: self.localpart.to_ref(),
677                hostname: None,
678            },
679            Some(ref h) => Email {
680                localpart: self.localpart.to_ref(),
681                hostname: Some(h.to_ref()),
682            },
683        }
684    }
685}
686
687impl<S: AsRef<str>> fmt::Display for Email<S> {
688    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
689        if let Some(ref hostname) = self.hostname {
690            write!(f, "<{}@{}>", self.localpart, hostname)
691        } else {
692            write!(f, "<{}>", self.localpart)
693        }
694    }
695}
696
697impl Email<&str> {
698    pub fn into_owned(self) -> Email<String> {
699        Email {
700            localpart: self.localpart.to_owned(),
701            hostname: self.hostname.map(|h| h.into_owned()),
702        }
703    }
704}
705
706/// Note: for convenience this is not exactly like what is described by RFC5321,
707/// and it does not contain the Email. Indeed, paths are *very* rare nowadays.
708///
709/// `Path` as defined here is what is specified in RFC5321 as `A-d-l`
710#[derive(Clone, Debug, Eq, PartialEq)]
711pub struct Path<S = String> {
712    pub domains: Vec<Hostname<S>>,
713}
714
715impl<S> Path<S> {
716    /// term_with_comma must be the wanted terminator, with b"," added
717    #[inline]
718    pub fn parse_until<'a, 'b>(
719        term_with_comma: &'b [u8],
720    ) -> impl 'b + FnMut(&'a [u8]) -> IResult<&'a [u8], Path<S>>
721    where
722        'a: 'b,
723        S: 'b + From<&'a str>,
724    {
725        map(
726            separated_list1(
727                tag(b","),
728                preceded(tag(b"@"), Hostname::parse_until(term_with_comma)),
729            ),
730            |domains| Path { domains },
731        )
732    }
733}
734
735impl<S> Path<S>
736where
737    S: AsRef<str>,
738{
739    #[inline]
740    pub fn as_io_slices(&self) -> impl Iterator<Item = IoSlice> {
741        self.domains.iter().enumerate().flat_map(|(i, d)| {
742            iter::once(match i {
743                0 => IoSlice::new(b"@"),
744                _ => IoSlice::new(b",@"),
745            })
746            .chain(d.as_io_slices())
747        })
748    }
749}
750
751// TODO: add valid/incomplete/invalid tests for Path
752
753#[inline]
754fn unbracketed_email_with_path<'a, 'b, S>(
755    term: &'b [u8],
756    term_with_atsign: &'b [u8],
757) -> impl 'b + FnMut(&'a [u8]) -> IResult<&'a [u8], (Option<Path<S>>, Email<S>)>
758where
759    'a: 'b,
760    S: 'b + From<&'a str>,
761{
762    pair(
763        opt(terminated(Path::parse_until(b":,"), tag(b":"))),
764        Email::parse_until(term, term_with_atsign),
765    )
766}
767
768/// term
769/// term_with_atsign = term + b"@"
770/// term_with_bracket = term + b">"
771/// term_with_bracket_atsign = term + b"@>"
772#[inline]
773pub fn email_with_path<'a, 'b, S>(
774    term: &'b [u8],
775    term_with_atsign: &'b [u8],
776    term_with_bracket: &'b [u8],
777    term_with_bracket_atsign: &'b [u8],
778) -> impl 'b + FnMut(&'a [u8]) -> IResult<&'a [u8], (Option<Path<S>>, Email<S>)>
779where
780    'a: 'b,
781    S: 'b + From<&'a str>,
782{
783    alt((
784        preceded(
785            tag(b"<"),
786            terminated(
787                unbracketed_email_with_path(term_with_bracket, term_with_bracket_atsign),
788                tag(b">"),
789            ),
790        ),
791        unbracketed_email_with_path(term, term_with_atsign),
792    ))
793}
794
795#[cfg(test)]
796mod tests {
797    use super::*;
798
799    #[test]
800    fn next_crlf_works() {
801        let tests: &[(&[u8], NextCrLfState, Option<usize>, NextCrLfState)] = &[
802            (
803                b"hello world",
804                NextCrLfState::Start,
805                None,
806                NextCrLfState::Start,
807            ),
808            (
809                b"hello world\r",
810                NextCrLfState::Start,
811                None,
812                NextCrLfState::CrPassed,
813            ),
814            (
815                b"hello world\r\n",
816                NextCrLfState::Start,
817                Some(12),
818                NextCrLfState::Start,
819            ),
820            (
821                b"\nhello world",
822                NextCrLfState::CrPassed,
823                Some(0),
824                NextCrLfState::CrPassed,
825            ),
826            (
827                b"\r\nhello world",
828                NextCrLfState::CrPassed,
829                Some(1),
830                NextCrLfState::CrPassed,
831            ),
832        ];
833        for (inp, mut st, out, endst) in tests {
834            println!();
835            println!("Start: {:?}, input: {:?}", st, show_bytes(inp));
836            println!("---");
837            let res = next_crlf(inp, &mut st);
838            println!("Expected: {:?} / {:?}", out, endst);
839            println!("Got     : {:?} / {:?}", res, st);
840            assert_eq!(res, *out);
841            assert_eq!(st, *endst);
842        }
843    }
844
845    #[test]
846    fn hostname_valid() {
847        let tests: &[(&[u8], &[u8], Hostname<&str>)] = &[
848            (b"foo--bar>", b"", Hostname::AsciiDomain { raw: "foo--bar" }),
849            (b"foo.bar.baz>", b"", Hostname::AsciiDomain {
850                raw: "foo.bar.baz",
851            }),
852            (b"1.2.3.4>", b"", Hostname::AsciiDomain { raw: "1.2.3.4" }),
853            (b"[123.255.37.2]>", b"", Hostname::Ipv4 {
854                raw: "[123.255.37.2]",
855                ip: "123.255.37.2".parse().unwrap(),
856            }),
857            (b"[IPv6:0::ffff:8.7.6.5]>", b"", Hostname::Ipv6 {
858                raw: "[IPv6:0::ffff:8.7.6.5]",
859                ip: "0::ffff:8.7.6.5".parse().unwrap(),
860            }),
861            ("élégance.fr>".as_bytes(), b"", Hostname::Utf8Domain {
862                raw: "élégance.fr",
863                punycode: "xn--lgance-9uab.fr".into(),
864            }),
865            ("papier-maché.fr>".as_bytes(), b"", Hostname::Utf8Domain {
866                raw: "papier-maché.fr",
867                punycode: "xn--papier-mach-lbb.fr".into(),
868            }),
869        ];
870        for (inp, rem, out) in tests {
871            // Test parse_until
872            let parsed = terminated(Hostname::parse_until(b">"), tag(b">"))(inp);
873            println!(
874                "\nTest: {:?}\nParse_until result: {:?}\nExpected: {:?}",
875                show_bytes(inp),
876                parsed,
877                out
878            );
879            match parsed {
880                Ok((rest, host)) => assert!(rest == *rem && host.deep_equal(out)),
881                x => panic!("Unexpected result: {:?}", x),
882            }
883
884            // Test parse
885            let parsed = Hostname::parse(&inp[..inp.len() - 1]);
886            println!(
887                "\nTest: {:?}\nParse result: {:?}\nExpected: {:?}",
888                show_bytes(inp),
889                parsed,
890                out
891            );
892            match parsed {
893                Ok((rest, host)) => assert!(rest == *rem && host.deep_equal(out)),
894                x => panic!("Unexpected result: {:?}", x),
895            }
896        }
897    }
898
899    #[test]
900    fn hostname_incomplete() {
901        let tests: &[&[u8]] = &[b"[1.2", b"[IPv6:0::"];
902        for inp in tests {
903            // Test parse_until
904            let r = Hostname::<&str>::parse_until(b">")(inp);
905            println!("{:?}:  {:?}", show_bytes(inp), r);
906            assert!(r.unwrap_err().is_incomplete());
907
908            // Test parse
909            let r = Hostname::<&str>::parse(inp);
910            println!("{:?}:  {:?}", show_bytes(inp), r);
911            assert!(r.unwrap_err().is_incomplete());
912        }
913    }
914
915    #[test]
916    fn hostname_invalid() {
917        let tests: &[&[u8]] = &[
918            b"-foo.bar>",                 // No sub-domain starting with a dash
919            b"\xFF>",                     // No invalid utf-8
920            "élégance.-fr>".as_bytes(), // No dashes in utf-8 either
921            b"foo.bar!>",                 // For parse: reject when there is trailing data
922        ];
923        for inp in tests {
924            // Test parse_until
925            let r = Hostname::<String>::parse_until(b">")(inp);
926            println!("{:?}: {:?}", show_bytes(inp), r);
927            assert!(!r.unwrap_err().is_incomplete());
928
929            // Test parse
930            let r = Hostname::<String>::parse(&inp[..inp.len() - 1]);
931            println!("{:?}: {:?}", show_bytes(inp), r);
932            assert!(!r.unwrap_err().is_incomplete());
933        }
934    }
935
936    // TODO: test hostname_build
937
938    #[test]
939    fn localpart_valid() {
940        let tests: &[(&[u8], &[u8], Localpart<&str>)] = &[
941            (b"helloooo@", b"", Localpart::Ascii { raw: "helloooo" }),
942            (b"test.ing>", b"", Localpart::Ascii { raw: "test.ing" }),
943            (br#""hello"@"#, b"", Localpart::QuotedAscii {
944                raw: r#""hello""#,
945            }),
946            (
947                br#""hello world. This |$ a g#eat place to experiment !">"#,
948                b"",
949                Localpart::QuotedAscii {
950                    raw: r#""hello world. This |$ a g#eat place to experiment !""#,
951                },
952            ),
953            (
954                br#""\"escapes\", useless like h\ere, except for quotes and backslashes\\"@"#,
955                b"",
956                Localpart::QuotedAscii {
957                    raw: r#""\"escapes\", useless like h\ere, except for quotes and backslashes\\""#,
958                },
959            ),
960            // TODO: add Utf8 tests
961        ];
962        for (inp, rem, out) in tests {
963            println!("Test: {:?}", show_bytes(inp));
964            let r = terminated(Localpart::parse_until(b"@>"), alt((tag(b"@"), tag(b">"))))(inp);
965            println!("Result: {:?}", r);
966            match r {
967                Ok((rest, res)) if rest == *rem && res == *out => (),
968                x => panic!("Unexpected result: {:?}", x),
969            }
970        }
971    }
972
973    // TODO: add incomplete localpart tests
974
975    #[test]
976    fn localpart_invalid() {
977        let tests: &[&[u8]] = &[br#"""@"#, br#""""@"#, b"\r@"];
978        for inp in tests {
979            let r = Localpart::<&str>::parse_until(b"@>")(inp);
980            assert!(!r.unwrap_err().is_incomplete());
981        }
982    }
983
984    // TODO: add build localpart tests
985
986    #[test]
987    fn localpart_unquoting() {
988        let tests: &[(&[u8], MaybeUtf8<&str>)] = &[
989            (
990                b"t+e-s.t_i+n-g@foo.bar.baz ",
991                MaybeUtf8::Ascii("t+e-s.t_i+n-g"),
992            ),
993            (
994                br#""quoted\"example"@example.org "#,
995                MaybeUtf8::Ascii(r#"quoted"example"#),
996            ),
997            (
998                br#""escaped\\exa\mple"@example.org "#,
999                MaybeUtf8::Ascii(r#"escaped\example"#),
1000            ),
1001        ];
1002        for (inp, out) in tests {
1003            println!("Test: {:?}", show_bytes(inp));
1004            let res = Email::<&str>::parse_until(b" ", b" @")(inp).unwrap().1;
1005            println!("Result: {:?}", res);
1006            assert_eq!(res.localpart.unquote(), out.to_owned());
1007        }
1008    }
1009
1010    #[test]
1011    fn email_valid() {
1012        let tests: &[(&[u8], &[u8], Email<&str>)] = &[
1013            (b"t+e-s.t_i+n-g@foo.bar.baz>", b"", Email {
1014                localpart: Localpart::Ascii {
1015                    raw: "t+e-s.t_i+n-g",
1016                },
1017                hostname: Some(Hostname::AsciiDomain { raw: "foo.bar.baz" }),
1018            }),
1019            (br#""quoted\"example"@example.org>"#, b"", Email {
1020                localpart: Localpart::QuotedAscii {
1021                    raw: r#""quoted\"example""#,
1022                },
1023                hostname: Some(Hostname::AsciiDomain { raw: "example.org" }),
1024            }),
1025            (b"postmaster>", b"", Email {
1026                localpart: Localpart::Ascii { raw: "postmaster" },
1027                hostname: None,
1028            }),
1029            (b"test>", b"", Email {
1030                localpart: Localpart::Ascii { raw: "test" },
1031                hostname: None,
1032            }),
1033            (
1034                r#""quoted\"example"@exámple.org>"#.as_bytes(),
1035                b"",
1036                Email {
1037                    localpart: Localpart::QuotedAscii {
1038                        raw: r#""quoted\"example""#,
1039                    },
1040                    hostname: Some(Hostname::Utf8Domain {
1041                        raw: "exámple.org",
1042                        punycode: "foo".into(),
1043                    }),
1044                },
1045            ),
1046            ("tést>".as_bytes(), b"", Email {
1047                localpart: Localpart::Utf8 { raw: "tést" },
1048                hostname: None,
1049            }),
1050        ];
1051        for (inp, rem, out) in tests {
1052            println!("Test: {:?}", show_bytes(inp));
1053            let r = terminated(Email::parse_until(b">", b">@"), tag(b">"))(inp);
1054            println!("Result: {:?}", r);
1055            match r {
1056                Ok((rest, res)) if rest == *rem && res == *out => (),
1057                x => panic!("Unexpected result: {:?}", x),
1058            }
1059        }
1060    }
1061
1062    // TODO: add incomplete email tests
1063
1064    #[test]
1065    fn email_invalid() {
1066        let tests: &[&[u8]] = &[b"@foo.bar"];
1067        for inp in tests {
1068            let r = Email::<&str>::parse_until(b">", b">@")(inp);
1069            assert!(!r.unwrap_err().is_incomplete());
1070        }
1071    }
1072
1073    // TODO: add build email tests
1074
1075    #[test]
1076    fn unbracketed_email_with_path_valid() {
1077        let tests: &[(&[u8], &[u8], (Option<Path<&str>>, Email<&str>))] = &[
1078            (
1079                b"@foo.bar,@baz.quux:test@example.org>",
1080                b">",
1081                (
1082                    Some(Path {
1083                        domains: vec![
1084                            Hostname::AsciiDomain { raw: "foo.bar" },
1085                            Hostname::AsciiDomain { raw: "baz.quux" },
1086                        ],
1087                    }),
1088                    Email {
1089                        localpart: Localpart::Ascii { raw: "test" },
1090                        hostname: Some(Hostname::AsciiDomain { raw: "example.org" }),
1091                    },
1092                ),
1093            ),
1094            (
1095                b"foo.bar@baz.quux>",
1096                b">",
1097                (None, Email {
1098                    localpart: Localpart::Ascii { raw: "foo.bar" },
1099                    hostname: Some(Hostname::AsciiDomain { raw: "baz.quux" }),
1100                }),
1101            ),
1102        ];
1103        for (inp, rem, out) in tests {
1104            println!("Test: {:?}", show_bytes(inp));
1105            match unbracketed_email_with_path(b">", b">@")(inp) {
1106                Ok((rest, res)) if rest == *rem && res == *out => (),
1107                x => panic!("Unexpected result: {:?}", x),
1108            }
1109        }
1110    }
1111
1112    // TODO: test unbracketed_email_with_path with incomplete, invalid and build
1113
1114    #[test]
1115    fn email_with_path_valid() {
1116        let tests: &[(&[u8], (Option<Path<&str>>, Email<&str>))] = &[
1117            (
1118                b"@foo.bar,@baz.quux:test@example.org ",
1119                (
1120                    Some(Path {
1121                        domains: vec![
1122                            Hostname::AsciiDomain { raw: "foo.bar" },
1123                            Hostname::AsciiDomain { raw: "baz.quux" },
1124                        ],
1125                    }),
1126                    Email {
1127                        localpart: Localpart::Ascii { raw: "test" },
1128                        hostname: Some(Hostname::AsciiDomain { raw: "example.org" }),
1129                    },
1130                ),
1131            ),
1132            (
1133                b"<@foo.bar,@baz.quux:test@example.org> ",
1134                (
1135                    Some(Path {
1136                        domains: vec![
1137                            Hostname::AsciiDomain { raw: "foo.bar" },
1138                            Hostname::AsciiDomain { raw: "baz.quux" },
1139                        ],
1140                    }),
1141                    Email {
1142                        localpart: Localpart::Ascii { raw: "test" },
1143                        hostname: Some(Hostname::AsciiDomain { raw: "example.org" }),
1144                    },
1145                ),
1146            ),
1147            (
1148                b"<foo@bar.baz> ",
1149                (None, Email {
1150                    localpart: Localpart::Ascii { raw: "foo" },
1151                    hostname: Some(Hostname::AsciiDomain { raw: "bar.baz" }),
1152                }),
1153            ),
1154            (
1155                b"foo@bar.baz ",
1156                (None, Email {
1157                    localpart: Localpart::Ascii { raw: "foo" },
1158                    hostname: Some(Hostname::AsciiDomain { raw: "bar.baz" }),
1159                }),
1160            ),
1161            (
1162                b"foobar ",
1163                (None, Email {
1164                    localpart: Localpart::Ascii { raw: "foobar" },
1165                    hostname: None,
1166                }),
1167            ),
1168        ];
1169        for (inp, out) in tests {
1170            println!("Test: {:?}", show_bytes(inp));
1171            let r = email_with_path(b" ", b" @", b" >", b" @>")(inp);
1172            println!("Result: {:?}", r);
1173            match r {
1174                Ok((rest, res)) if rest == b" " && res == *out => (),
1175                x => panic!("Unexpected result: {:?}", x),
1176            }
1177        }
1178    }
1179
1180    // TODO: test unbracketed_email_with_path with incomplete and invalid
1181}