Skip to main content

eml_codec/imf/
mailbox.rs

1#[cfg(feature = "arbitrary")]
2use arbitrary::Arbitrary;
3use bounded_static::ToStatic;
4use nom::{
5    branch::alt,
6    bytes::complete::tag,
7    combinator::{all_consuming, into, map, map_opt, opt},
8    multi::{many0, many1, separated_list1},
9    sequence::{delimited, pair, preceded, terminated, tuple},
10    IResult,
11};
12use std::borrow::Cow;
13#[cfg(feature = "tracing")]
14use tracing::warn;
15
16use crate::i18n::ContainsUtf8;
17use crate::print::{print_seq, Formatter, Print, ToStringFromPrint};
18use crate::text::ascii;
19use crate::text::misc_token::{phrase, word, Phrase, Word, WordChars};
20use crate::text::quoted::print_quoted;
21use crate::text::utf8::{is_ascii_and, is_nonascii_or, take_utf8_while1};
22use crate::text::whitespace::{cfws, fws, is_obs_no_ws_ctl};
23use crate::text::words::{atom, dot_atom_text, Atom};
24#[cfg(feature = "arbitrary")]
25use crate::{
26    arbitrary_utils::{arbitrary_string_nonempty_where, arbitrary_vec_nonempty},
27    fuzz_eq::FuzzEq,
28};
29use eml_codec_derives::instrument_input;
30
31#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
32#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
33pub struct AddrSpec<'a> {
34    pub local_part: LocalPart<'a>,
35    pub domain: Domain<'a>,
36}
37impl<'a> Print for AddrSpec<'a> {
38    fn print(&self, fmt: &mut impl Formatter) {
39        self.local_part.print(fmt);
40        fmt.write_bytes(b"@");
41        self.domain.print(fmt)
42    }
43}
44
45#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
46#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
47pub struct MailboxRef<'a> {
48    // The actual "email address" like hello@example.com
49    pub addrspec: AddrSpec<'a>,
50    // The optional name
51    pub name: Option<Phrase<'a>>,
52}
53impl MailboxRef<'static> {
54    // Used as placeholder value for a missing or invalid address.
55    // Represents "unknown@unknown".
56    pub fn placeholder() -> Self {
57        MailboxRef {
58            addrspec: AddrSpec {
59                local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
60                    "unknown".into(),
61                )))]),
62                domain: Domain::Atoms(vec![Atom("unknown".into())]),
63            },
64            name: None,
65        }
66    }
67}
68impl<'a> From<AddrSpec<'a>> for MailboxRef<'a> {
69    fn from(addr: AddrSpec<'a>) -> Self {
70        MailboxRef {
71            name: None,
72            addrspec: addr,
73        }
74    }
75}
76impl<'a> Print for MailboxRef<'a> {
77    fn print(&self, fmt: &mut impl Formatter) {
78        match &self.name {
79            Some(name) => {
80                name.print(fmt);
81                fmt.write_fws();
82                fmt.write_bytes(b"<");
83                self.addrspec.print(fmt);
84                fmt.write_bytes(b">")
85            }
86            None => self.addrspec.print(fmt),
87        }
88    }
89}
90
91/// A non-empty list of mailboxes.
92#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
93#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
94pub struct MailboxList<'a>(pub Vec<MailboxRef<'a>>);
95
96impl<'a> Print for MailboxList<'a> {
97    fn print(&self, fmt: &mut impl Formatter) {
98        print_seq(fmt, &self.0, |fmt| {
99            fmt.write_bytes(b",");
100            fmt.write_fws()
101        })
102    }
103}
104#[cfg(feature = "arbitrary")]
105impl<'a> Arbitrary<'a> for MailboxList<'a> {
106    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
107        Ok(MailboxList(arbitrary_vec_nonempty(u)?))
108    }
109}
110
111/// Mailbox
112///
113/// ```abnf
114///    mailbox         =   name-addr / addr-spec
115/// ```
116#[instrument_input("tracing")]
117pub fn mailbox(input: &[u8]) -> IResult<&[u8], MailboxRef<'_>> {
118    alt((name_addr, into(addr_spec)))(input)
119}
120
121/// Mailbox list
122///
123/// ```abnf
124///    mailbox-list    =   (mailbox *("," mailbox)) / obs-mbox-list
125///    obs-mbox-list   =   *([CFWS] ",") mailbox *("," [mailbox / CFWS])
126/// ```
127#[instrument_input("tracing")]
128pub fn mailbox_list(input: &[u8]) -> IResult<&[u8], MailboxList<'_>> {
129    map_opt(mailbox_list_nullable, |mlist| mlist)(input)
130}
131
132// mailbox-list but allows the list to only contain "null" elements
133#[instrument_input("tracing")]
134pub(crate) fn mailbox_list_nullable(input: &[u8]) -> IResult<&[u8], Option<MailboxList<'_>>> {
135    map(
136        separated_list1(
137            tag(","),
138            alt((map(mailbox, Some), map(opt(cfws), |_| None))),
139        ),
140        |v: Vec<Option<_>>| {
141            let v: Vec<_> = v.into_iter().flatten().collect();
142            if v.is_empty() {
143                None
144            } else {
145                Some(MailboxList(v))
146            }
147        },
148    )(input)
149}
150
151/// Name of the email address
152///
153/// ```abnf
154///    name-addr       =   [display-name] angle-addr
155/// ```
156#[instrument_input("tracing")]
157fn name_addr(input: &[u8]) -> IResult<&[u8], MailboxRef<'_>> {
158    let (input, name) = opt(phrase)(input)?;
159    let (input, addrspec) = angle_addr(input)?;
160    Ok((input, MailboxRef { name, addrspec }))
161}
162
163/// Enclosed addr-spec with < and >
164///
165/// ```abnf
166/// angle-addr      =   [CFWS] "<" addr-spec ">" [CFWS] /
167///                     obs-angle-addr
168/// ```
169#[instrument_input("tracing")]
170pub fn angle_addr(input: &[u8]) -> IResult<&[u8], AddrSpec<'_>> {
171    delimited(
172        tuple((opt(cfws), tag(&[ascii::LT]), opt(obs_route))),
173        addr_spec,
174        pair(tag(&[ascii::GT]), opt(cfws)),
175    )(input)
176}
177
178///    obs-route       =   obs-domain-list ":"
179#[instrument_input("tracing")]
180fn obs_route(input: &[u8]) -> IResult<&[u8], Vec<Option<Domain<'_>>>> {
181    terminated(domain_list, tag(&[ascii::COL]))(input)
182}
183
184/// Domain list.
185///
186/// This implement a relaxed version of the obsolete syntax:
187/// ```abnf
188///    obs-domain-list =   *(CFWS / ",") "@" domain
189///                        *("," [CFWS] ["@" domain])
190/// ```
191/// The parser below is slightly more lenient as it allows domains list that
192/// contain no real domains (e.g. only commas).
193#[instrument_input("tracing")]
194fn domain_list(input: &[u8]) -> IResult<&[u8], Vec<Option<Domain<'_>>>> {
195    preceded(
196        opt(cfws),
197        separated_list1(
198            tag(&[ascii::COMMA]),
199            alt((
200                map(preceded(pair(opt(cfws), tag(&[ascii::AT])), domain), |d| {
201                    Some(d)
202                }),
203                map(opt(cfws), |_| None),
204            )),
205        ),
206    )(input)
207}
208
209/// AddrSpec
210///
211/// ```abnf
212///    addr-spec       =   local-part "@" domain
213/// ```
214#[instrument_input("tracing")]
215pub fn addr_spec(input: &[u8]) -> IResult<&[u8], AddrSpec<'_>> {
216    map(
217        tuple((
218            local_part,
219            tag(&[ascii::AT]),
220            domain,
221            opt(map(
222                many1(pair(tag(&[ascii::AT]), domain)), // for compatibility reasons with ENRON
223                |_| {
224                    #[cfg(feature = "tracing-recover")]
225                    warn!("addr_spec with multiple @ parts")
226                },
227            )),
228        )),
229        |(local_part, _, domain, _)| AddrSpec { local_part, domain },
230    )(input)
231}
232
233#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
234pub struct LocalPart<'a>(pub Vec<LocalPartToken<'a>>); // non-empty vec
235
236#[derive(Clone, Debug, PartialEq, ToStatic)]
237#[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
238pub enum LocalPartToken<'a> {
239    Dot,
240    Word(Word<'a>),
241}
242impl<'a> ContainsUtf8 for LocalPartToken<'a> {
243    fn contains_utf8(&self) -> bool {
244        match self {
245            Self::Dot => false,
246            Self::Word(w) => w.contains_utf8(),
247        }
248    }
249}
250
251impl<'a> LocalPart<'a> {
252    fn chars<'b>(&'b self) -> LocalPartChars<'a, 'b> {
253        LocalPartChars {
254            l: self,
255            inner: LocalPartCharsInner::NextToken(0),
256        }
257    }
258}
259#[derive(Clone)]
260struct LocalPartChars<'a, 'b> {
261    l: &'b LocalPart<'a>,
262    inner: LocalPartCharsInner<'a, 'b>,
263}
264#[derive(Clone)]
265enum LocalPartCharsInner<'a, 'b> {
266    NextToken(usize),
267    Word(usize, WordChars<'a, 'b>),
268}
269impl<'a, 'b> Iterator for LocalPartChars<'a, 'b> {
270    type Item = char;
271    fn next(&mut self) -> Option<Self::Item> {
272        match &mut self.inner {
273            LocalPartCharsInner::NextToken(idx) => match self.l.0.get(*idx) {
274                Some(LocalPartToken::Dot) => {
275                    self.inner = LocalPartCharsInner::NextToken(*idx + 1);
276                    Some('.')
277                }
278                Some(LocalPartToken::Word(w)) => {
279                    self.inner = LocalPartCharsInner::Word(*idx, w.chars());
280                    self.next()
281                }
282                None => None,
283            },
284            LocalPartCharsInner::Word(idx, it) => match it.next() {
285                Some(c) => Some(c),
286                None => {
287                    self.inner = LocalPartCharsInner::NextToken(*idx + 1);
288                    self.next()
289                }
290            },
291        }
292    }
293}
294
295#[cfg(feature = "arbitrary")]
296impl<'a> Arbitrary<'a> for LocalPart<'a> {
297    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
298        Ok(LocalPart(arbitrary_vec_nonempty(u)?))
299    }
300}
301#[cfg(feature = "arbitrary")]
302impl<'a> FuzzEq for LocalPart<'a> {
303    fn fuzz_eq(&self, other: &Self) -> bool {
304        self.chars().collect::<String>() == other.chars().collect::<String>()
305    }
306}
307
308impl<'a> Print for LocalPart<'a> {
309    // Assumption: `self.bytes()` only contains ASCII bytes.
310    fn print(&self, fmt: &mut impl Formatter) {
311        // Parsing of local parts is more lenient than printing (both wrt
312        // the spec and because of obsolete syntax). Thus, for printing, we
313        // only assume that `self` only contains ASCII and recompute how it
314        // should be printed.
315
316        // get the local part as a string
317        let as_str: String = self.chars().collect();
318
319        // If `as_str` is a dot-atom we print it as-is, otherwise
320        // we quote it. This ensures that our output is compliant with RFC5322.
321        if all_consuming(dot_atom_text)(as_str.as_bytes()).is_ok() {
322            fmt.write_bytes(as_str.as_bytes())
323        } else {
324            print_quoted(fmt, self.chars())
325        }
326    }
327}
328
329/// Local part
330///
331/// Compared to the RFC, we allow multiple dots.
332/// This is found in Enron emails and supported by Gmail.
333/// We also allow dots at the beginning and end.
334///
335/// This "local part" syntax is a superset of both the RFC's
336/// local-part and obs-local-part.
337///
338/// ```abnf
339/// local-part          = dot-atom / quoted-string / obs-local-part
340/// obs-local-part      = word *("." word)
341/// our-local-part      =  *"." word *(1*"." word) *"."
342/// ```
343#[instrument_input("tracing")]
344pub fn local_part(input: &[u8]) -> IResult<&[u8], LocalPart<'_>> {
345    let (input, _) = opt(cfws)(input)?;
346    let (input, prefix) = many0(local_part_dot)(input)?;
347    let (input, w) = local_part_word(input)?;
348    let (input, ws) = many0(pair(many1(local_part_dot), local_part_word))(input)?;
349    let (input, suffix) = many0(local_part_dot)(input)?;
350
351    if !prefix.is_empty() {
352        #[cfg(feature = "tracing-recover")]
353        warn!("best-effort local-part (leading dots)");
354    }
355    if !suffix.is_empty() {
356        #[cfg(feature = "tracing-recover")]
357        warn!("best-effort local part (trailing dots)");
358    }
359
360    let mut v: Vec<LocalPartToken> = vec![];
361    v.extend(prefix);
362    v.push(w);
363    for (dots, w) in ws.into_iter() {
364        if dots.len() > 1 {
365            #[cfg(feature = "tracing-recover")]
366            warn!("best-effort local part (consecutive dots)");
367        }
368        v.extend(dots);
369        v.push(w);
370    }
371    v.extend(suffix);
372    Ok((input, LocalPart(v)))
373}
374fn local_part_dot(input: &[u8]) -> IResult<&[u8], LocalPartToken<'_>> {
375    map(tag(&[ascii::PERIOD]), |_| LocalPartToken::Dot)(input)
376}
377fn local_part_word(input: &[u8]) -> IResult<&[u8], LocalPartToken<'_>> {
378    map(word, LocalPartToken::Word)(input)
379}
380
381#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
382#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
383pub enum Domain<'a> {
384    Atoms(Vec<Atom<'a>>), // non-empty vec
385    Literal(Vec<Dtext<'a>>),
386}
387
388impl<'a> Print for Domain<'a> {
389    fn print(&self, fmt: &mut impl Formatter) {
390        match self {
391            Domain::Atoms(atoms) => print_seq(fmt, atoms, |fmt| fmt.write_bytes(b".")),
392            Domain::Literal(parts) => {
393                fmt.write_bytes(b"[");
394                print_seq(fmt, parts, Formatter::write_fws);
395                fmt.write_bytes(b"]")
396            }
397        }
398    }
399}
400#[cfg(feature = "arbitrary")]
401impl<'a> Arbitrary<'a> for Domain<'a> {
402    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
403        if u.arbitrary()? {
404            Ok(Domain::Atoms(arbitrary_vec_nonempty(u)?))
405        } else {
406            Ok(Domain::Literal(u.arbitrary()?))
407        }
408    }
409}
410
411/// Domain
412///
413/// Rewritten so that domain is a superset of RFC-strict domain and obs_domain.
414///
415/// We also allow a final dot, which is not part of the RFC but occurs in
416/// old emails.
417///
418/// RFC5322:
419/// ```abnf
420///  domain          =   dot-atom / domain-literal / obs-domain
421///  obs-domain      =   atom *("." atom)
422///
423/// which is equivalent to:
424///
425/// domain           =   atom *("." atom) / domain-literal
426/// ```
427/// We implement:
428/// ```abnf
429///  our-domain      =   atom *("." atom) [.] / domain-literal
430/// ```
431#[instrument_input("tracing")]
432pub fn domain(input: &[u8]) -> IResult<&[u8], Domain<'_>> {
433    alt((
434        map(
435            terminated(
436                separated_list1(tag("."), atom),
437                opt(map(tag("."), |i| {
438                    #[cfg(feature = "tracing-recover")]
439                    warn!("trailing dot in domain");
440                    i
441                })),
442            ),
443            Domain::Atoms,
444        ),
445        domain_literal,
446    ))(input)
447}
448
449/// Domain literal
450///
451/// ```abnf
452///    domain-literal  =   [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
453/// ```
454#[instrument_input("tracing")]
455fn domain_literal(input: &[u8]) -> IResult<&[u8], Domain<'_>> {
456    delimited(
457        pair(opt(cfws), tag(&[ascii::LEFT_BRACKET])),
458        inner_domain_literal,
459        pair(tag(&[ascii::RIGHT_BRACKET]), opt(cfws)),
460    )(input)
461}
462
463#[instrument_input("tracing")]
464fn inner_domain_literal(input: &[u8]) -> IResult<&[u8], Domain<'_>> {
465    map(
466        terminated(many0(preceded(opt(fws), dtext)), opt(fws)),
467        Domain::Literal,
468    )(input)
469}
470
471// Invariant: must be non-empty
472#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
473pub struct Dtext<'a>(Cow<'a, str>);
474
475impl<'a> Dtext<'a> {
476    // Best-effort conversion of any `Dtext` contents into chars that all
477    // satisfy `is_strict_dtext`.
478    //
479    // - We drop characters which are not part of the strict syntax.
480    // Unfortunately this can drop printable characters, if they were part
481    // of a quote (\X), which is accepted by the obsolete syntax. However,
482    // we have no better option than to drop those since there is no way
483    // to represent them in the strict syntax.
484    // - Dropping obsolete characters may result in an empty string; however
485    // a `Dtext` must always be nonempty; in this case, we return "?", as a
486    // placeholder text.
487    // XXX it would be more consistent with the rest of the codebase if this
488    // sanitization was done at parsing time, resulting in an AST which is
489    // always "clean" as an invariant and can be printed directly.
490    fn to_strict_best_effort(&self) -> Self {
491        let mut strict_dtext: String = self.0.chars().filter(|c| is_strict_dtext(*c)).collect();
492        if strict_dtext.is_empty() {
493            strict_dtext.push('?')
494        }
495        Dtext(strict_dtext.into())
496    }
497}
498
499impl<'a> Print for Dtext<'a> {
500    fn print(&self, fmt: &mut impl Formatter) {
501        fmt.write_bytes(self.to_strict_best_effort().0.as_bytes())
502    }
503}
504#[cfg(feature = "arbitrary")]
505impl<'a> Arbitrary<'a> for Dtext<'a> {
506    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
507        let s: String = arbitrary_string_nonempty_where(u, is_dtext, 'X')?;
508        Ok(Dtext(Cow::Owned(s)))
509    }
510}
511#[cfg(feature = "arbitrary")]
512impl<'a> FuzzEq for Dtext<'a> {
513    fn fuzz_eq(&self, other: &Self) -> bool {
514        self.to_strict_best_effort() == other.to_strict_best_effort()
515    }
516}
517
518/// Is domain text
519///
520/// ```abnf
521///   dtext           =   %d33-90 /          ; Printable US-ASCII
522///                       %d94-126 /         ;  characters not including
523///                       obs-dtext          ;  "[", "]", or "\"
524///   obs-dtext       =   obs-NO-WS-CTL / quoted-pair
525/// ```
526/// following RFC6532, also allows non-ascii UTF-8 text
527fn is_dtext(c: char) -> bool {
528    is_strict_dtext(c) || is_obs_dtext(c)
529}
530fn is_strict_dtext(c: char) -> bool {
531    is_nonascii_or(|c| (0x21..=0x5A).contains(&c) || (0x5E..=0x7E).contains(&c))(c)
532}
533fn is_obs_dtext(c: char) -> bool {
534    is_ascii_and(is_obs_no_ws_ctl)(c)
535    //@FIXME does not support quoted pair yet while RFC requires it
536}
537
538#[instrument_input("tracing")]
539pub fn dtext<'a>(input: &'a [u8]) -> IResult<&'a [u8], Dtext<'a>> {
540    map(take_utf8_while1(is_dtext), Dtext)(input)
541}
542
543#[cfg(test)]
544mod tests {
545    use super::*;
546    use crate::print::tests::print_to_vec;
547    use crate::text::misc_token::PhraseToken;
548    use crate::text::quoted::QuotedString;
549
550    // NOTE: this roundtrip property does not hold in general for all valid
551    // 'addr-spec's, in particular because of the obsolete syntax (which gets
552    // dropped when printed back) but also because of quoting ('\a' gets printed
553    // back as 'a').
554    fn addr_roundtrip_as(addr: &[u8], parsed: AddrSpec<'_>) {
555        assert_eq!(addr_spec(addr), Ok((&b""[..], parsed.clone())));
556        let printed = print_to_vec(parsed);
557        assert_eq!(
558            String::from_utf8_lossy(addr),
559            String::from_utf8_lossy(&printed)
560        );
561    }
562    fn addr_roundtrip(addr: &[u8]) {
563        let (input, parsed) = addr_spec(addr).unwrap();
564        assert!(input.is_empty());
565        let printed = print_to_vec(parsed);
566        assert_eq!(
567            String::from_utf8_lossy(addr),
568            String::from_utf8_lossy(&printed)
569        );
570    }
571    fn addr_parsed_printed(addr: &[u8], parsed: AddrSpec<'_>, printed: &[u8]) {
572        assert_eq!(addr_spec(addr), Ok((&b""[..], parsed.clone())));
573        let reprinted = print_to_vec(parsed);
574        assert_eq!(
575            String::from_utf8_lossy(printed),
576            String::from_utf8_lossy(&reprinted)
577        );
578    }
579
580    // NOTE: like for addr-spec, this roundtrip property is not expected to hold
581    // in general.
582    fn mailbox_roundtrip_as(mbox: &[u8], parsed: MailboxRef<'_>) {
583        assert_eq!(mailbox(mbox), Ok((&b""[..], parsed.clone())));
584        let printed = print_to_vec(parsed);
585        assert_eq!(
586            String::from_utf8_lossy(mbox),
587            String::from_utf8_lossy(&printed)
588        );
589    }
590    fn mailbox_parsed_printed(mbox: &[u8], parsed: MailboxRef<'_>, printed: &[u8]) {
591        assert_eq!(mailbox(mbox), Ok((&b""[..], parsed.clone())));
592        let reprinted = print_to_vec(parsed);
593        assert_eq!(
594            String::from_utf8_lossy(printed),
595            String::from_utf8_lossy(&reprinted)
596        );
597    }
598
599    fn mailbox_list_reprint(mboxlist: &[u8], printed: &[u8]) {
600        let (input, parsed) = mailbox_list(mboxlist).unwrap();
601        assert!(input.is_empty());
602        let reprinted = print_to_vec(parsed);
603        assert_eq!(
604            String::from_utf8_lossy(&reprinted),
605            String::from_utf8_lossy(printed)
606        );
607    }
608
609    #[test]
610    fn test_addr_spec() {
611        addr_roundtrip_as(
612            b"alice@example.com",
613            AddrSpec {
614                local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
615                    "alice"[..].into(),
616                )))]),
617                domain: Domain::Atoms(vec![Atom("example"[..].into()), Atom("com"[..].into())]),
618            },
619        );
620
621        addr_roundtrip_as(
622            b"alice@smtp.example.com",
623            AddrSpec {
624                local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
625                    "alice"[..].into(),
626                )))]),
627                domain: Domain::Atoms(vec![
628                    Atom("smtp"[..].into()),
629                    Atom("example"[..].into()),
630                    Atom("com"[..].into()),
631                ]),
632            },
633        );
634
635        addr_roundtrip_as(
636            b"jsmith@[192.168.2.1]",
637            AddrSpec {
638                local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
639                    "jsmith"[..].into(),
640                )))]),
641                domain: Domain::Literal(vec![Dtext("192.168.2.1".into())]),
642            },
643        );
644
645        addr_roundtrip_as(
646            b"jsmith@[IPv6:2001:db8::1]",
647            AddrSpec {
648                local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
649                    "jsmith"[..].into(),
650                )))]),
651                domain: Domain::Literal(vec![Dtext("IPv6:2001:db8::1".into())]),
652            },
653        );
654
655        // UTF-8
656        addr_roundtrip_as(
657            "用户@例子.广告".as_bytes(),
658            AddrSpec {
659                local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom("用户".into())))]),
660                domain: Domain::Atoms(vec![Atom("例子".into()), Atom("广告".into())]),
661            },
662        );
663
664        // ASCII Edge cases
665        addr_roundtrip(b"user+mailbox/department=shipping@example.com");
666        addr_roundtrip(b"!#$%&'*+-/=?^_`.{|}~@example.com");
667
668        addr_roundtrip_as(
669            r#""Abc@def"@example.com"#.as_bytes(),
670            AddrSpec {
671                local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
672                    vec!["Abc@def".into()],
673                )))]),
674                domain: Domain::Atoms(vec![Atom("example"[..].into()), Atom("com"[..].into())]),
675            },
676        );
677        addr_parsed_printed(
678            r#""Fred\ Bloggs"@example.com"#.as_bytes(),
679            AddrSpec {
680                local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
681                    vec!["Fred".into(), " ".into(), "Bloggs".into()],
682                )))]),
683                domain: Domain::Atoms(vec![Atom("example"[..].into()), Atom("com"[..].into())]),
684            },
685            r#""Fred Bloggs"@example.com"#.as_bytes(), // escaping the space is unnecessary
686        );
687        addr_roundtrip_as(
688            r#""Joe.\\Blow"@example.com"#.as_bytes(),
689            AddrSpec {
690                local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
691                    vec!["Joe.".into(), "\\".into(), "Blow".into()],
692                )))]),
693                domain: Domain::Atoms(vec![Atom("example"[..].into()), Atom("com"[..].into())]),
694            },
695        );
696
697        // edge-case: domain literal part that contains only obsolete bytes -> gets reprinted as '?'
698        let mut addr = b"foobar@[X ".to_vec();
699        addr.extend(&[1, 0x1c, b']']);
700        addr_parsed_printed(
701            &addr,
702            AddrSpec {
703                local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
704                    "foobar".into(),
705                )))]),
706                domain: Domain::Literal(vec![Dtext("X"[..].into()), Dtext("\x01\x1c".into())]),
707            },
708            b"foobar@[X ?]",
709        );
710    }
711
712    #[test]
713    fn test_gmail_noncompliant() {
714        addr_parsed_printed(
715            b"foo..bar@gmail.com",
716            AddrSpec {
717                local_part: LocalPart(vec![
718                    LocalPartToken::Word(Word::Atom(Atom("foo".into()))),
719                    LocalPartToken::Dot,
720                    LocalPartToken::Dot,
721                    LocalPartToken::Word(Word::Atom(Atom("bar".into()))),
722                ]),
723                domain: Domain::Atoms(vec![Atom("gmail"[..].into()), Atom("com"[..].into())]),
724            },
725            b"\"foo..bar\"@gmail.com",
726        )
727    }
728
729    #[test]
730    fn test_mailbox() {
731        mailbox_roundtrip_as(
732            r#""Joe Q. Public" <john.q.public@example.com>"#.as_bytes(),
733            MailboxRef {
734                name: Some(Phrase(vec![PhraseToken::Word(Word::Quoted(QuotedString(
735                    vec![
736                        "Joe"[..].into(),
737                        " ".into(),
738                        "Q."[..].into(),
739                        " ".into(),
740                        "Public"[..].into(),
741                    ],
742                )))])),
743                addrspec: AddrSpec {
744                    local_part: LocalPart(vec![
745                        LocalPartToken::Word(Word::Atom(Atom("john"[..].into()))),
746                        LocalPartToken::Dot,
747                        LocalPartToken::Word(Word::Atom(Atom("q"[..].into()))),
748                        LocalPartToken::Dot,
749                        LocalPartToken::Word(Word::Atom(Atom("public"[..].into()))),
750                    ]),
751                    domain: Domain::Atoms(vec![Atom("example"[..].into()), Atom("com"[..].into())]),
752                },
753            },
754        );
755
756        // UTF-8 with invalid bytes
757        assert_eq!(
758            mailbox(b"a\xD4\xC6z\xE7 <tigermeeting@mail.net>"),
759            Ok((
760                &b""[..],
761                MailboxRef {
762                    name: Some(Phrase(vec![PhraseToken::Word(Word::Atom(Atom(
763                        "a\u{FFFD}\u{FFFD}z\u{FFFD}".into()
764                    ))),])),
765                    addrspec: AddrSpec {
766                        local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
767                            "tigermeeting".into()
768                        )))]),
769                        domain: Domain::Atoms(vec![Atom("mail".into()), Atom("net".into())]),
770                    },
771                }
772            ))
773        );
774
775        mailbox_roundtrip_as(
776            r#"Mary Smith <mary@x.test>"#.as_bytes(),
777            MailboxRef {
778                name: Some(Phrase(vec![
779                    PhraseToken::Word(Word::Atom(Atom("Mary"[..].into()))),
780                    PhraseToken::Word(Word::Atom(Atom("Smith"[..].into()))),
781                ])),
782                addrspec: AddrSpec {
783                    local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
784                        "mary"[..].into(),
785                    )))]),
786                    domain: Domain::Atoms(vec![Atom("x"[..].into()), Atom("test"[..].into())]),
787                },
788            },
789        );
790
791        mailbox_roundtrip_as(
792            r#"jdoe@example.org"#.as_bytes(),
793            MailboxRef {
794                name: None,
795                addrspec: AddrSpec {
796                    local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
797                        "jdoe"[..].into(),
798                    )))]),
799                    domain: Domain::Atoms(vec![Atom("example"[..].into()), Atom("org"[..].into())]),
800                },
801            },
802        );
803
804        mailbox_roundtrip_as(
805            r#"Who? <one@y.test>"#.as_bytes(),
806            MailboxRef {
807                name: Some(Phrase(vec![PhraseToken::Word(Word::Atom(Atom(
808                    "Who?"[..].into(),
809                )))])),
810                addrspec: AddrSpec {
811                    local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
812                        "one"[..].into(),
813                    )))]),
814                    domain: Domain::Atoms(vec![Atom("y"[..].into()), Atom("test"[..].into())]),
815                },
816            },
817        );
818
819        mailbox_parsed_printed(
820            r#"<boss@nil.test>"#.as_bytes(),
821            MailboxRef {
822                name: None,
823                addrspec: AddrSpec {
824                    local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
825                        "boss"[..].into(),
826                    )))]),
827                    domain: Domain::Atoms(vec![Atom("nil"[..].into()), Atom("test"[..].into())]),
828                },
829            },
830            r#"boss@nil.test"#.as_bytes(),
831        );
832
833        mailbox_roundtrip_as(
834            r#""Giant; \"Big\" Box" <sysservices@example.net>"#.as_bytes(),
835            MailboxRef {
836                name: Some(Phrase(vec![PhraseToken::Word(Word::Quoted(QuotedString(
837                    vec![
838                        "Giant;"[..].into(),
839                        " ".into(),
840                        "\"".into(),
841                        "Big"[..].into(),
842                        "\"".into(),
843                        " ".into(),
844                        "Box"[..].into(),
845                    ],
846                )))])),
847                addrspec: AddrSpec {
848                    local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
849                        "sysservices"[..].into(),
850                    )))]),
851                    domain: Domain::Atoms(vec![Atom("example"[..].into()), Atom("net"[..].into())]),
852                },
853            },
854        );
855
856        // Tricky example illustrating a subtility of parsing encoded words.
857        // A mailbox can start with a phrase, which allows encoded words.
858        // However, "=?X?q?@[?=" *IS NOT* a valid encoded word in a phrase (because of '@' and '['),
859        // even though it is a valid encoded word in other contexts.
860        // This means that the correct way to parse this input is as an addr-spec...
861        mailbox_roundtrip_as(
862            r#"=?X?q?@[?= <?@?>]"#.as_bytes(),
863            MailboxRef {
864                name: None,
865                addrspec: AddrSpec {
866                    local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
867                        "=?X?q?"[..].into(),
868                    )))]),
869                    domain: Domain::Literal(vec![
870                        Dtext("?="[..].into()),
871                        Dtext("<?@?>"[..].into()),
872                    ]),
873                },
874            },
875        );
876    }
877
878    #[test]
879    fn test_domain_list() {
880        assert_eq!(
881            domain_list(
882                r#"(shhh it's coming)
883 ,
884 (not yet)
885 @33+4.com,,,,
886 ,,,,
887 (again)
888 @example.com,@yep.com,@a,@b,,,@c"#
889                    .as_bytes()
890            ),
891            Ok((
892                &b""[..],
893                vec![
894                    None,
895                    Some(Domain::Atoms(vec![
896                        Atom("33+4"[..].into()),
897                        Atom("com"[..].into())
898                    ])),
899                    None,
900                    None,
901                    None,
902                    None,
903                    None,
904                    None,
905                    None,
906                    Some(Domain::Atoms(vec![
907                        Atom("example"[..].into()),
908                        Atom("com"[..].into())
909                    ])),
910                    Some(Domain::Atoms(vec![
911                        Atom("yep"[..].into()),
912                        Atom("com"[..].into())
913                    ])),
914                    Some(Domain::Atoms(vec![Atom("a"[..].into())])),
915                    Some(Domain::Atoms(vec![Atom("b"[..].into())])),
916                    None,
917                    None,
918                    Some(Domain::Atoms(vec![Atom("c"[..].into())])),
919                ]
920            ))
921        );
922
923        assert_eq!(
924            domain_list(b",, ,@foo,"),
925            Ok((
926                &b""[..],
927                vec![
928                    None,
929                    None,
930                    None,
931                    Some(Domain::Atoms(vec![Atom("foo"[..].into())])),
932                    None,
933                ]
934            ))
935        );
936    }
937
938    #[test]
939    fn test_enron1() {
940        addr_parsed_printed(
941            "a..howard@enron.com".as_bytes(),
942            AddrSpec {
943                local_part: LocalPart(vec![
944                    LocalPartToken::Word(Word::Atom(Atom("a"[..].into()))),
945                    LocalPartToken::Dot,
946                    LocalPartToken::Dot,
947                    LocalPartToken::Word(Word::Atom(Atom("howard"[..].into()))),
948                ]),
949                domain: Domain::Atoms(vec![Atom("enron"[..].into()), Atom("com"[..].into())]),
950            },
951            r#""a..howard"@enron.com"#.as_bytes(),
952        );
953    }
954
955    #[test]
956    fn test_enron2() {
957        addr_parsed_printed(
958            ".nelson@enron.com".as_bytes(),
959            AddrSpec {
960                local_part: LocalPart(vec![
961                    LocalPartToken::Dot,
962                    LocalPartToken::Word(Word::Atom(Atom("nelson"[..].into()))),
963                ]),
964                domain: Domain::Atoms(vec![Atom("enron"[..].into()), Atom("com"[..].into())]),
965            },
966            r#"".nelson"@enron.com"#.as_bytes(),
967        );
968
969        // variant with leading whitespace
970        addr_parsed_printed(
971            "  .nelson@enron.com".as_bytes(),
972            AddrSpec {
973                local_part: LocalPart(vec![
974                    LocalPartToken::Dot,
975                    LocalPartToken::Word(Word::Atom(Atom("nelson"[..].into()))),
976                ]),
977                domain: Domain::Atoms(vec![Atom("enron"[..].into()), Atom("com"[..].into())]),
978            },
979            r#"".nelson"@enron.com"#.as_bytes(),
980        );
981    }
982
983    #[test]
984    fn test_enron3() {
985        addr_parsed_printed(
986            "ecn2760.conf.@enron.com".as_bytes(),
987            AddrSpec {
988                local_part: LocalPart(vec![
989                    LocalPartToken::Word(Word::Atom(Atom("ecn2760"[..].into()))),
990                    LocalPartToken::Dot,
991                    LocalPartToken::Word(Word::Atom(Atom("conf"[..].into()))),
992                    LocalPartToken::Dot,
993                ]),
994                domain: Domain::Atoms(vec![Atom("enron"[..].into()), Atom("com"[..].into())]),
995            },
996            r#""ecn2760.conf."@enron.com"#.as_bytes(),
997        );
998    }
999
1000    #[test]
1001    fn test_enron4() {
1002        mailbox_parsed_printed(
1003            r#"<"mark_kopinski/intl/acim/americancentury"@americancentury.com@enron.com>"#
1004                .as_bytes(),
1005            MailboxRef {
1006                name: None,
1007                addrspec: AddrSpec {
1008                    local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
1009                        vec!["mark_kopinski/intl/acim/americancentury"[..].into()],
1010                    )))]),
1011                    domain: Domain::Atoms(vec![
1012                        Atom("americancentury"[..].into()),
1013                        Atom("com"[..].into()),
1014                    ]),
1015                },
1016            },
1017            b"mark_kopinski/intl/acim/americancentury@americancentury.com",
1018        );
1019    }
1020
1021    #[test]
1022    fn test_final_dot() {
1023        addr_parsed_printed(
1024            "201102080055@viruhosting.eu.".as_bytes(),
1025            AddrSpec {
1026                local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(Atom(
1027                    "201102080055"[..].into(),
1028                )))]),
1029                domain: Domain::Atoms(vec![Atom("viruhosting"[..].into()), Atom("eu"[..].into())]),
1030            },
1031            r#"201102080055@viruhosting.eu"#.as_bytes(),
1032        );
1033    }
1034
1035    #[test]
1036    fn test_mailbox_list() {
1037        mailbox_list_reprint(
1038            r#"Pete(A nice \) chap) <pete(his account)@silly.test(his host)>"#.as_bytes(),
1039            b"Pete <pete@silly.test>",
1040        );
1041
1042        mailbox_list_reprint(
1043            r#"Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>, <boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>"#.as_bytes(),
1044            r#"Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>, boss@nil.test, "Giant; \"Big\" Box" <sysservices@example.net>"#.as_bytes(),
1045        );
1046    }
1047
1048    #[test]
1049    fn test_mailbox_list_obs() {
1050        mailbox_list_reprint(
1051            b",foo@bar.com,,boss@nil.test,jdoe@example.org, \r\n ,,",
1052            br#"foo@bar.com, boss@nil.test, jdoe@example.org"#,
1053        );
1054    }
1055
1056    #[test]
1057    fn test_dtext_strictify() {
1058        let s: &str = &Dtext("\x03".into()).to_strict_best_effort().0;
1059        assert_eq!(s, "?")
1060    }
1061}