eml_codec/imf/
mailbox.rs

1use nom::{
2    branch::alt,
3    bytes::complete::{tag, take_while1},
4    combinator::{into, map, opt},
5    multi::{many0, separated_list1},
6    sequence::{delimited, pair, preceded, terminated, tuple},
7    IResult,
8};
9use std::fmt;
10
11use crate::text::ascii;
12use crate::text::misc_token::{phrase, word, Phrase, Word};
13use crate::text::whitespace::{cfws, fws, is_obs_no_ws_ctl};
14use crate::text::words::atom;
15
16#[derive(PartialEq)]
17pub struct AddrSpec<'a> {
18    pub local_part: LocalPart<'a>,
19    pub domain: Domain<'a>,
20}
21impl<'a> ToString for AddrSpec<'a> {
22    fn to_string(&self) -> String {
23        format!(
24            "{}@{}",
25            self.local_part.to_string(),
26            self.domain.to_string()
27        )
28    }
29}
30impl<'a> fmt::Debug for AddrSpec<'a> {
31    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
32        fmt.debug_tuple("AddrSpec")
33            .field(&format_args!("\"{}\"", self.to_string()))
34            .finish()
35    }
36}
37
38#[derive(Debug, PartialEq)]
39pub struct MailboxRef<'a> {
40    // The actual "email address" like hello@example.com
41    pub addrspec: AddrSpec<'a>,
42    pub name: Option<Phrase<'a>>,
43}
44impl<'a> ToString for MailboxRef<'a> {
45    fn to_string(&self) -> String {
46        match &self.name {
47            Some(n) => format!("{} <{}>", n.to_string(), self.addrspec.to_string()),
48            None => self.addrspec.to_string(),
49        }
50    }
51}
52impl<'a> From<AddrSpec<'a>> for MailboxRef<'a> {
53    fn from(addr: AddrSpec<'a>) -> Self {
54        MailboxRef {
55            name: None,
56            addrspec: addr,
57        }
58    }
59}
60pub type MailboxList<'a> = Vec<MailboxRef<'a>>;
61
62/// Mailbox
63///
64/// ```abnf
65///    mailbox         =   name-addr / addr-spec
66/// ```
67pub fn mailbox(input: &[u8]) -> IResult<&[u8], MailboxRef> {
68    alt((name_addr, into(addr_spec)))(input)
69}
70
71/// Name of the email address
72///
73/// ```abnf
74///    name-addr       =   [display-name] angle-addr
75/// ```
76fn name_addr(input: &[u8]) -> IResult<&[u8], MailboxRef> {
77    let (input, name) = opt(phrase)(input)?;
78    let (input, addrspec) = angle_addr(input)?;
79    Ok((input, MailboxRef { name, addrspec }))
80}
81
82/// Enclosed addr-spec with < and >
83///
84/// ```abnf
85/// angle-addr      =   [CFWS] "<" addr-spec ">" [CFWS] /
86///                     obs-angle-addr
87/// ```
88pub fn angle_addr(input: &[u8]) -> IResult<&[u8], AddrSpec> {
89    delimited(
90        tuple((opt(cfws), tag(&[ascii::LT]), opt(obs_route))),
91        addr_spec,
92        pair(tag(&[ascii::GT]), opt(cfws)),
93    )(input)
94}
95
96///    obs-route       =   obs-domain-list ":"
97fn obs_route(input: &[u8]) -> IResult<&[u8], Vec<Option<Domain>>> {
98    terminated(obs_domain_list, tag(&[ascii::COL]))(input)
99}
100
101/// ```abnf
102///    obs-domain-list =   *(CFWS / ",") "@" domain
103///                        *("," [CFWS] ["@" domain])
104/// ```
105fn obs_domain_list(input: &[u8]) -> IResult<&[u8], Vec<Option<Domain>>> {
106    preceded(
107        many0(cfws),
108        separated_list1(
109            tag(&[ascii::COMMA]),
110            preceded(many0(cfws), opt(preceded(tag(&[ascii::AT]), obs_domain))),
111        ),
112    )(input)
113}
114
115/// AddrSpec
116///
117/// ```abnf
118///    addr-spec       =   local-part "@" domain
119/// ```
120/// @FIXME: this system does not work to alternate between strict and obsolete
121/// so I force obsolete for now...
122pub fn addr_spec(input: &[u8]) -> IResult<&[u8], AddrSpec> {
123    map(
124        tuple((
125            obs_local_part,
126            tag(&[ascii::AT]),
127            obs_domain,
128            many0(pair(tag(&[ascii::AT]), obs_domain)), // for compatibility reasons with ENRON
129        )),
130        |(local_part, _, domain, _)| AddrSpec { local_part, domain },
131    )(input)
132}
133
134#[derive(Debug, PartialEq)]
135pub enum LocalPartToken<'a> {
136    Dot,
137    Word(Word<'a>),
138}
139
140#[derive(Debug, PartialEq)]
141pub struct LocalPart<'a>(pub Vec<LocalPartToken<'a>>);
142
143impl<'a> LocalPart<'a> {
144    pub fn to_string(&self) -> String {
145        self.0.iter().fold(String::new(), |mut acc, token| {
146            match token {
147                LocalPartToken::Dot => acc.push('.'),
148                LocalPartToken::Word(v) => acc.push_str(v.to_string().as_ref()),
149            }
150            acc
151        })
152    }
153}
154
155/// Obsolete local part
156///
157/// Compared to the RFC, we allow multiple dots.
158/// This is found in Enron emails and supported by Gmail.
159///
160/// Obsolete local part is a superset of strict_local_part:
161/// anything that is parsed by strict_local_part will be parsed by
162/// obs_local_part.
163///
164/// ```abnf
165/// obs-local-part  =  *("." / word)
166/// ```
167fn obs_local_part(input: &[u8]) -> IResult<&[u8], LocalPart> {
168    map(
169        many0(alt((
170            map(tag(&[ascii::PERIOD]), |_| LocalPartToken::Dot),
171            map(word, LocalPartToken::Word),
172        ))),
173        LocalPart,
174    )(input)
175}
176
177#[derive(PartialEq)]
178pub enum Domain<'a> {
179    Atoms(Vec<&'a [u8]>),
180    Litteral(Vec<&'a [u8]>),
181}
182
183impl<'a> ToString for Domain<'a> {
184    fn to_string(&self) -> String {
185        match self {
186            Domain::Atoms(v) => v
187                .iter()
188                .map(|v| {
189                    encoding_rs::UTF_8
190                        .decode_without_bom_handling(v)
191                        .0
192                        .to_string()
193                })
194                .collect::<Vec<String>>()
195                .join("."),
196            Domain::Litteral(v) => {
197                let inner = v
198                    .iter()
199                    .map(|v| {
200                        encoding_rs::UTF_8
201                            .decode_without_bom_handling(v)
202                            .0
203                            .to_string()
204                    })
205                    .collect::<Vec<String>>()
206                    .join(" ");
207                format!("[{}]", inner)
208            }
209        }
210    }
211}
212impl<'a> fmt::Debug for Domain<'a> {
213    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
214        fmt.debug_tuple("Domain")
215            .field(&format_args!("\"{}\"", self.to_string()))
216            .finish()
217    }
218}
219
220/// Obsolete domain
221///
222/// Rewritten so that obs_domain is a superset
223/// of strict_domain.
224///
225/// ```abnf
226///  obs-domain      =   atom *("." atom) / domain-literal
227/// ```
228pub fn obs_domain(input: &[u8]) -> IResult<&[u8], Domain> {
229    alt((
230        map(separated_list1(tag("."), atom), Domain::Atoms),
231        domain_litteral,
232    ))(input)
233}
234
235/// Domain litteral
236///
237/// ```abnf
238///    domain-literal  =   [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
239/// ```
240fn domain_litteral(input: &[u8]) -> IResult<&[u8], Domain> {
241    delimited(
242        pair(opt(cfws), tag(&[ascii::LEFT_BRACKET])),
243        inner_domain_litteral,
244        pair(tag(&[ascii::RIGHT_BRACKET]), opt(cfws)),
245    )(input)
246}
247
248fn inner_domain_litteral(input: &[u8]) -> IResult<&[u8], Domain> {
249    map(
250        terminated(many0(preceded(opt(fws), take_while1(is_dtext))), opt(fws)),
251        Domain::Litteral,
252    )(input)
253}
254
255fn is_strict_dtext(c: u8) -> bool {
256    (0x21..=0x5A).contains(&c) || (0x5E..=0x7E).contains(&c)
257}
258
259/// Is domain text
260///
261/// ```abnf
262///   dtext           =   %d33-90 /          ; Printable US-ASCII
263///                       %d94-126 /         ;  characters not including
264///                       obs-dtext          ;  "[", "]", or "\"
265///   obs-dtext       =   obs-NO-WS-CTL / quoted-pair
266/// ```
267pub fn is_dtext(c: u8) -> bool {
268    is_strict_dtext(c) || is_obs_no_ws_ctl(c)
269    //@FIXME does not support quoted pair yet while RFC requires it
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275    use crate::text::quoted::QuotedString;
276
277    #[test]
278    fn test_addr_spec() {
279        assert_eq!(
280            addr_spec(b"alice@example.com"),
281            Ok((
282                &b""[..],
283                AddrSpec {
284                    local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"alice"[..]))]),
285                    domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
286                }
287            ))
288        );
289
290        assert_eq!(
291            addr_spec(b"jsmith@[192.168.2.1]").unwrap().1.to_string(),
292            "jsmith@[192.168.2.1]".to_string(),
293        );
294        assert_eq!(
295            addr_spec(b"jsmith@[IPv6:2001:db8::1]")
296                .unwrap()
297                .1
298                .to_string(),
299            "jsmith@[IPv6:2001:db8::1]".to_string(),
300        );
301
302        // UTF-8
303        // @FIXME ASCII SUPPORT IS BROKEN
304        /*assert_eq!(
305            addr_spec("用户@例子.广告"),
306            Ok((
307                "",
308                AddrSpec {
309                    local_part: "用户".into(),
310                    domain: "例子.广告".into()
311                }
312            ))
313        );*/
314
315        // ASCII Edge cases
316        assert_eq!(
317            addr_spec(b"user+mailbox/department=shipping@example.com")
318                .unwrap()
319                .1
320                .to_string(),
321            "user+mailbox/department=shipping@example.com".to_string(),
322        );
323
324        assert_eq!(
325            addr_spec(b"!#$%&'*+-/=?^_`.{|}~@example.com")
326                .unwrap()
327                .1
328                .to_string(),
329            "!#$%&'*+-/=?^_`.{|}~@example.com".to_string(),
330        );
331
332        assert_eq!(
333            addr_spec(r#""Abc@def"@example.com"#.as_bytes()),
334            Ok((
335                &b""[..],
336                AddrSpec {
337                    local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
338                        vec![b"Abc@def"]
339                    )))]),
340                    domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
341                }
342            ))
343        );
344        assert_eq!(
345            addr_spec(r#""Fred\ Bloggs"@example.com"#.as_bytes()),
346            Ok((
347                &b""[..],
348                AddrSpec {
349                    local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
350                        vec![b"Fred", b" ", b"Bloggs"]
351                    )))]),
352                    domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
353                }
354            ))
355        );
356        assert_eq!(
357            addr_spec(r#""Joe.\\Blow"@example.com"#.as_bytes()),
358            Ok((
359                &b""[..],
360                AddrSpec {
361                    local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
362                        vec![b"Joe.", &[ascii::BACKSLASH], b"Blow"]
363                    )))]),
364                    domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
365                }
366            ))
367        );
368    }
369
370    #[test]
371    fn test_mailbox() {
372        assert_eq!(
373            mailbox(r#""Joe Q. Public" <john.q.public@example.com>"#.as_bytes()),
374            Ok((
375                &b""[..],
376                MailboxRef {
377                    name: Some(Phrase(vec![Word::Quoted(QuotedString(vec![
378                        &b"Joe"[..],
379                        &[ascii::SP],
380                        &b"Q."[..],
381                        &[ascii::SP],
382                        &b"Public"[..]
383                    ]))])),
384                    addrspec: AddrSpec {
385                        local_part: LocalPart(vec![
386                            LocalPartToken::Word(Word::Atom(&b"john"[..])),
387                            LocalPartToken::Dot,
388                            LocalPartToken::Word(Word::Atom(&b"q"[..])),
389                            LocalPartToken::Dot,
390                            LocalPartToken::Word(Word::Atom(&b"public"[..])),
391                        ]),
392                        domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
393                    }
394                }
395            ))
396        );
397
398        assert_eq!(
399            mailbox(r#"Mary Smith <mary@x.test>"#.as_bytes()),
400            Ok((
401                &b""[..],
402                MailboxRef {
403                    name: Some(Phrase(vec![
404                        Word::Atom(&b"Mary"[..]),
405                        Word::Atom(&b"Smith"[..])
406                    ])),
407                    addrspec: AddrSpec {
408                        local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"mary"[..]))]),
409                        domain: Domain::Atoms(vec![&b"x"[..], &b"test"[..]]),
410                    }
411                }
412            ))
413        );
414
415        assert_eq!(
416            mailbox(r#"jdoe@example.org"#.as_bytes()),
417            Ok((
418                &b""[..],
419                MailboxRef {
420                    name: None,
421                    addrspec: AddrSpec {
422                        local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"jdoe"[..]))]),
423                        domain: Domain::Atoms(vec![&b"example"[..], &b"org"[..]]),
424                    }
425                }
426            ))
427        );
428
429        assert_eq!(
430            mailbox(r#"Who? <one@y.test>"#.as_bytes()),
431            Ok((
432                &b""[..],
433                MailboxRef {
434                    name: Some(Phrase(vec![Word::Atom(&b"Who?"[..])])),
435                    addrspec: AddrSpec {
436                        local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"one"[..]))]),
437                        domain: Domain::Atoms(vec![&b"y"[..], &b"test"[..]]),
438                    }
439                }
440            ))
441        );
442
443        assert_eq!(
444            mailbox(r#"<boss@nil.test>"#.as_bytes()),
445            Ok((
446                &b""[..],
447                MailboxRef {
448                    name: None,
449                    addrspec: AddrSpec {
450                        local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"boss"[..]))]),
451                        domain: Domain::Atoms(vec![&b"nil"[..], &b"test"[..]]),
452                    }
453                }
454            ))
455        );
456
457        assert_eq!(
458            mailbox(r#""Giant; \"Big\" Box" <sysservices@example.net>"#.as_bytes()),
459            Ok((
460                &b""[..],
461                MailboxRef {
462                    name: Some(Phrase(vec![Word::Quoted(QuotedString(vec![
463                        &b"Giant;"[..],
464                        &[ascii::SP],
465                        &[ascii::DQUOTE],
466                        &b"Big"[..],
467                        &[ascii::DQUOTE],
468                        &[ascii::SP],
469                        &b"Box"[..]
470                    ]))])),
471                    addrspec: AddrSpec {
472                        local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(
473                            &b"sysservices"[..]
474                        ))]),
475                        domain: Domain::Atoms(vec![&b"example"[..], &b"net"[..]]),
476                    }
477                }
478            ))
479        );
480    }
481
482    #[test]
483    fn test_obs_domain_list() {
484        assert_eq!(
485            obs_domain_list(
486                r#"(shhh it's coming)
487 ,
488 (not yet)
489 @33+4.com,,,,
490 ,,,,
491 (again)
492 @example.com,@yep.com,@a,@b,,,@c"#
493                    .as_bytes()
494            ),
495            Ok((
496                &b""[..],
497                vec![
498                    None,
499                    Some(Domain::Atoms(vec![&b"33+4"[..], &b"com"[..]])),
500                    None,
501                    None,
502                    None,
503                    None,
504                    None,
505                    None,
506                    None,
507                    Some(Domain::Atoms(vec![&b"example"[..], &b"com"[..]])),
508                    Some(Domain::Atoms(vec![&b"yep"[..], &b"com"[..]])),
509                    Some(Domain::Atoms(vec![&b"a"[..]])),
510                    Some(Domain::Atoms(vec![&b"b"[..]])),
511                    None,
512                    None,
513                    Some(Domain::Atoms(vec![&b"c"[..]])),
514                ]
515            ))
516        );
517    }
518
519    #[test]
520    fn test_enron1() {
521        assert_eq!(
522            addr_spec("a..howard@enron.com".as_bytes()),
523            Ok((
524                &b""[..],
525                AddrSpec {
526                    local_part: LocalPart(vec![
527                        LocalPartToken::Word(Word::Atom(&b"a"[..])),
528                        LocalPartToken::Dot,
529                        LocalPartToken::Dot,
530                        LocalPartToken::Word(Word::Atom(&b"howard"[..])),
531                    ]),
532                    domain: Domain::Atoms(vec![&b"enron"[..], &b"com"[..]]),
533                }
534            ))
535        );
536    }
537
538    #[test]
539    fn test_enron2() {
540        assert_eq!(
541            addr_spec(".nelson@enron.com".as_bytes()),
542            Ok((
543                &b""[..],
544                AddrSpec {
545                    local_part: LocalPart(vec![
546                        LocalPartToken::Dot,
547                        LocalPartToken::Word(Word::Atom(&b"nelson"[..])),
548                    ]),
549                    domain: Domain::Atoms(vec![&b"enron"[..], &b"com"[..]]),
550                }
551            ))
552        );
553    }
554
555    #[test]
556    fn test_enron3() {
557        assert_eq!(
558            addr_spec("ecn2760.conf.@enron.com".as_bytes()),
559            Ok((
560                &b""[..],
561                AddrSpec {
562                    local_part: LocalPart(vec![
563                        LocalPartToken::Word(Word::Atom(&b"ecn2760"[..])),
564                        LocalPartToken::Dot,
565                        LocalPartToken::Word(Word::Atom(&b"conf"[..])),
566                        LocalPartToken::Dot,
567                    ]),
568                    domain: Domain::Atoms(vec![&b"enron"[..], &b"com"[..]]),
569                }
570            ))
571        );
572    }
573
574    #[test]
575    fn test_enron4() {
576        assert_eq!(
577            mailbox(
578                r#"<"mark_kopinski/intl/acim/americancentury"@americancentury.com@enron.com>"#
579                    .as_bytes()
580            ),
581            Ok((
582                &b""[..],
583                MailboxRef {
584                    name: None,
585                    addrspec: AddrSpec {
586                        local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(
587                            QuotedString(vec![&b"mark_kopinski/intl/acim/americancentury"[..],])
588                        ))]),
589                        domain: Domain::Atoms(vec![&b"americancentury"[..], &b"com"[..]]),
590                    }
591                }
592            ))
593        );
594    }
595}