Skip to main content

eml_codec/text/
misc_token.rs

1#[cfg(feature = "arbitrary")]
2use arbitrary::Arbitrary;
3use bounded_static::ToStatic;
4use nom::{
5    branch::alt,
6    bytes::complete::{tag, take_while1},
7    combinator::{map, opt},
8    multi::{many0, many1, separated_list0},
9    sequence::delimited,
10    IResult, Parser,
11};
12use std::borrow::Cow;
13#[cfg(feature = "tracing")]
14use tracing::warn;
15
16use crate::i18n::ContainsUtf8;
17use crate::print::{print_seq, Formatter, Print, ToStringFromPrint};
18use crate::text::{
19    ascii,
20    encoding::{self, encoded_word, encoded_word_plain},
21    quoted::{quoted_string, QuotedString, QuotedStringChars},
22    utf8::take_utf8_while1,
23    whitespace::{cfws, fws, is_obs_no_ws_ctl},
24    words::{atom, is_vchar, mime_atom, Atom, MIMEAtom, MIMEAtomChars},
25};
26#[cfg(feature = "arbitrary")]
27use crate::{
28    arbitrary_utils::{
29        arbitrary_string_nonempty_where, arbitrary_vec_nonempty, arbitrary_whitespace_nonempty,
30    },
31    fuzz_eq::FuzzEq,
32};
33use eml_codec_derives::instrument_input;
34
35#[derive(Clone, ContainsUtf8, Debug, PartialEq, Default, ToStatic, ToStringFromPrint)]
36#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
37pub struct PhraseList<'a>(pub Vec<Phrase<'a>>); // must be nonempty
38
39#[cfg(feature = "arbitrary")]
40impl<'a> Arbitrary<'a> for PhraseList<'a> {
41    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
42        Ok(Self(arbitrary_vec_nonempty(u)?))
43    }
44}
45
46/// A comma-separated list of phrases. Handles the obsolete syntax:
47///
48/// obs-phrase-list =   [phrase / CFWS] *("," [phrase / CFWS])
49///
50/// where phrase-list (not defined explicitly in the RFC, but used in keywords):
51///
52/// phrase-list     = phrase *("," phrase)
53///
54/// We return an option to represent the empty-list case of the obsolete syntax;
55/// and in turn, a `PhraseList` is always non-empty.
56#[instrument_input("tracing")]
57pub fn phrase_list(input: &[u8]) -> IResult<&[u8], Option<PhraseList<'_>>> {
58    let (input, phrases_opt) =
59        separated_list0(tag(","), alt((map(phrase, Some), map(opt(cfws), |_| None))))(input)?;
60    let phrases: Vec<Phrase> = phrases_opt.into_iter().flatten().collect();
61    if phrases.is_empty() {
62        Ok((input, None))
63    } else {
64        Ok((input, Some(PhraseList(phrases))))
65    }
66}
67impl<'a> Print for PhraseList<'a> {
68    fn print(&self, fmt: &mut impl Formatter) {
69        print_seq(fmt, &self.0, |fmt| {
70            fmt.write_bytes(b",");
71            fmt.write_fws()
72        })
73    }
74}
75
76#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
77#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
78pub enum MIMEWord<'a> {
79    Quoted(QuotedString<'a>),
80    Atom(MIMEAtom<'a>),
81}
82impl Default for MIMEWord<'static> {
83    fn default() -> Self {
84        Self::Atom(MIMEAtom::default())
85    }
86}
87#[instrument_input("tracing")]
88pub fn mime_word(input: &[u8]) -> IResult<&[u8], MIMEWord<'_>> {
89    alt((
90        map(quoted_string, MIMEWord::Quoted),
91        map(mime_atom, MIMEWord::Atom),
92    ))(input)
93}
94
95impl<'a> MIMEWord<'a> {
96    pub fn chars<'b>(&'b self) -> MIMEWordChars<'a, 'b> {
97        match self {
98            MIMEWord::Quoted(q) => MIMEWordChars::Quoted(q.chars()),
99            MIMEWord::Atom(a) => MIMEWordChars::Atom(a.chars()),
100        }
101    }
102}
103impl<'a> Print for MIMEWord<'a> {
104    fn print(&self, fmt: &mut impl Formatter) {
105        match self {
106            MIMEWord::Quoted(q) => q.print(fmt),
107            MIMEWord::Atom(a) => a.print(fmt),
108        }
109    }
110}
111
112#[derive(Clone)]
113pub enum MIMEWordChars<'a, 'b> {
114    Quoted(QuotedStringChars<'a, 'b>),
115    Atom(MIMEAtomChars<'a, 'b>),
116}
117
118impl<'a, 'b> Iterator for MIMEWordChars<'a, 'b> {
119    type Item = char;
120    fn next(&mut self) -> Option<Self::Item> {
121        match self {
122            MIMEWordChars::Quoted(q) => q.next(),
123            MIMEWordChars::Atom(a) => a.next(),
124        }
125    }
126}
127
128#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
129#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
130pub enum Word<'a> {
131    Quoted(QuotedString<'a>),
132    Atom(Atom<'a>),
133}
134
135impl<'a> Print for Word<'a> {
136    fn print(&self, fmt: &mut impl Formatter) {
137        match self {
138            Word::Quoted(q) => q.print(fmt),
139            Word::Atom(a) => a.print(fmt),
140        }
141    }
142}
143
144impl<'a> Word<'a> {
145    pub fn chars<'b>(&'b self) -> WordChars<'a, 'b> {
146        match self {
147            Word::Quoted(q) => WordChars::Quoted(q.chars()),
148            Word::Atom(a) => WordChars::Atom(a.0.chars()),
149        }
150    }
151}
152
153#[derive(Clone)]
154pub enum WordChars<'a, 'b> {
155    Quoted(QuotedStringChars<'a, 'b>),
156    Atom(std::str::Chars<'b>),
157}
158
159impl<'a, 'b> Iterator for WordChars<'a, 'b> {
160    type Item = char;
161    fn next(&mut self) -> Option<Self::Item> {
162        match self {
163            WordChars::Quoted(q) => q.next(),
164            WordChars::Atom(a) => a.next(),
165        }
166    }
167}
168
169/// Word
170///
171/// ```abnf
172///    word            =   atom / quoted-string
173/// ```
174#[instrument_input("tracing")]
175pub fn word(input: &[u8]) -> IResult<&[u8], Word<'_>> {
176    alt((map(quoted_string, Word::Quoted), map(atom, Word::Atom)))(input)
177}
178
179#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
180#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
181pub enum PhraseToken<'a> {
182    // Word MUST NOT be a Word::Atom that represents an encoded
183    // word, ie of the form =?..?..?..?=
184    Word(Word<'a>),
185    Encoded(encoding::EncodedWord<'a>),
186}
187impl<'a> Print for PhraseToken<'a> {
188    fn print(&self, fmt: &mut impl Formatter) {
189        match self {
190            PhraseToken::Word(w) => w.print(fmt),
191            PhraseToken::Encoded(e) => e.print(fmt),
192        }
193    }
194}
195#[cfg(feature = "arbitrary")]
196impl<'a> Arbitrary<'a> for PhraseToken<'a> {
197    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
198        if u.arbitrary()? {
199            let w: Word<'_> = u.arbitrary()?;
200            // As a coarse-grained measure, reject any atom that contains '=?'
201            // to avoid confusion with Encoded tokens
202            if let Word::Atom(a) = &w {
203                if a.0.find("=?").is_some() {
204                    return Err(arbitrary::Error::IncorrectFormat);
205                }
206            }
207            Ok(PhraseToken::Word(w))
208        } else {
209            Ok(PhraseToken::Encoded(u.arbitrary()?))
210        }
211    }
212}
213
214/// A part of a phrase or obs-phrase
215#[instrument_input("tracing")]
216pub fn phrase_token(input: &[u8]) -> IResult<&[u8], PhraseToken<'_>> {
217    alt((
218        // NOTE: try encoded words first because they can also be parsed as words.
219        map(
220            encoded_word(encoding::Context::Phrase),
221            PhraseToken::Encoded,
222        ),
223        map(word, PhraseToken::Word),
224        // "obs-phrase" allows periods while "phrase" does not.
225        // We could have a dedicated `Dot` constructor to `PhraseToken`
226        // to represent them, and later decide how `Dot` should be printed
227        // (note: printing must not use the obs- syntax!)
228        // Here, we use a different approach and directly parse naked dots
229        // as the AST for `"."` (note the quotes), which is allowed in the
230        // non-obs- syntax, thus ensuring that this AST can be safely
231        // printed as-is.
232        map(
233            delimited(opt(cfws), tag(&[ascii::PERIOD][..]), opt(cfws)),
234            |_| {
235                PhraseToken::Word(Word::Quoted(QuotedString(vec![Cow::Owned(
236                    ".".to_string(),
237                )])))
238            },
239        ),
240    ))(input)
241}
242
243// Must be a non-empty list.
244#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
245pub struct Phrase<'a>(pub Vec<PhraseToken<'a>>);
246
247impl<'a> Print for Phrase<'a> {
248    fn print(&self, fmt: &mut impl Formatter) {
249        print_seq(fmt, &self.0, Formatter::write_fws)
250    }
251}
252
253impl<'a> Phrase<'a> {
254    // Merges consecutive Encoded tokens
255    #[cfg(feature = "arbitrary")]
256    fn normalize(&self) -> Self {
257        let mut v = Vec::new();
258        for tok in &self.0 {
259            match (v.last_mut(), tok) {
260                (Some(PhraseToken::Encoded(ref mut e1)), PhraseToken::Encoded(e2)) => {
261                    e1.0.extend(e2.0.clone())
262                }
263                (_, tok) => v.push(tok.clone()),
264            }
265        }
266        Self(v)
267    }
268}
269#[cfg(feature = "arbitrary")]
270impl<'a> Arbitrary<'a> for Phrase<'a> {
271    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
272        Ok(Self(arbitrary_vec_nonempty(u)?))
273    }
274}
275#[cfg(feature = "arbitrary")]
276impl<'a> FuzzEq for Phrase<'a> {
277    fn fuzz_eq(&self, other: &Self) -> bool {
278        self.normalize().0.fuzz_eq(&other.normalize().0)
279    }
280}
281
282/// Phrase (including obsolete syntax)
283///
284/// ```abnf
285///    phrase          =   1*(encoded-word / word) / obs-phrase
286///    obs-phrase      =   word *(word / "." / CFWS)
287/// ```
288///
289/// (encoded-word comes from RFC2047)
290///
291/// The grammar above is equivalent to the following, which is
292/// what we parse:
293///
294/// ```abnf
295///   phrase       =  1*phrase_token
296///   phrase_token =  encoded-word / word / ([CFWS] "." [CFWS])
297/// ```
298#[instrument_input("tracing")]
299pub fn phrase(input: &[u8]) -> IResult<&[u8], Phrase<'_>> {
300    let (input, phrase) = map(many1(phrase_token), Phrase)(input)?;
301    Ok((input, phrase))
302}
303
304#[derive(Debug, PartialEq, Clone, ToStatic)]
305pub struct UtextToken<'a> {
306    txt: Cow<'a, str>,
307    obs: bool,
308}
309
310/// Compatible unstructured input
311///
312/// ```abnf
313/// obs-utext       =   %d0 / obs-NO-WS-CTL / VCHAR
314/// ```
315/// and also non-ascii UTF-8 text, following RFC6532.
316///
317/// The parser result records which parts of the input
318/// were using the obsolete syntax (i.e. not VCHAR).
319///
320/// Parses a single run of either obsolete or non-obsolete
321/// characters.
322fn obs_utext_token<'a>(input: &'a [u8]) -> IResult<&'a [u8], UtextToken<'a>> {
323    alt((
324        take_utf8_while1(is_vchar).map(|s| UtextToken { txt: s, obs: false }),
325        take_while1(|c| is_obs_no_ws_ctl(c) || c == ascii::NULL)
326            // SAFETY: from the line above we know that `s` contains ASCII bytes
327            // (they satisfy either is_obs_no_ws_ctl or are NULL).
328            .map(|s| unsafe { str::from_utf8_unchecked(s) })
329            .map(|s| UtextToken {
330                txt: Cow::Borrowed(s),
331                obs: true,
332            }),
333    ))(input)
334}
335
336#[derive(Debug, PartialEq, Copy, Clone, ToStatic)]
337pub enum UnstrTxtKind {
338    Txt, // non-space text
339    Obs, // non-space text using obsolete characters
340    Fws, // whitespace
341}
342
343#[derive(PartialEq, Clone, Debug, ToStatic)]
344#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
345pub enum UnstrToken<'a> {
346    Encoded(encoding::EncodedWord<'a>),
347    // `Plain` MUST NOT contain text that represents an encoded word,
348    // ie. of the form =?..?..?..?=
349    #[cfg_attr(feature = "arbitrary", fuzz_eq(use_eq))]
350    Plain(Cow<'a, str>, UnstrTxtKind),
351}
352
353impl<'a> UnstrToken<'a> {
354    pub(crate) fn from_plain(s: &'a str, kind: UnstrTxtKind) -> Self {
355        Self::Plain(Cow::Borrowed(s), kind)
356    }
357
358    fn from_utext(tok: UtextToken<'a>) -> Self {
359        if tok.obs {
360            Self::Plain(tok.txt, UnstrTxtKind::Obs)
361        } else {
362            Self::Plain(tok.txt, UnstrTxtKind::Txt)
363        }
364    }
365}
366impl<'a> ContainsUtf8 for UnstrToken<'a> {
367    fn contains_utf8(&self) -> bool {
368        match self {
369            UnstrToken::Encoded(_) => false,
370            UnstrToken::Plain(s, _) => s.contains_utf8(),
371        }
372    }
373}
374impl<'a> Print for UnstrToken<'a> {
375    fn print(&self, fmt: &mut impl Formatter) {
376        match self {
377            UnstrToken::Encoded(e) => e.print(fmt),
378            UnstrToken::Plain(txt, UnstrTxtKind::Txt) => fmt.write_bytes(txt.as_bytes()),
379            UnstrToken::Plain(_, UnstrTxtKind::Obs) =>
380                // skip obsolete parts
381                {}
382            UnstrToken::Plain(txt, UnstrTxtKind::Fws) => fmt.write_fws_bytes(txt.as_bytes()),
383        }
384    }
385}
386#[cfg(feature = "arbitrary")]
387impl<'a> Arbitrary<'a> for UnstrToken<'a> {
388    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
389        // XXX we do not generate `Obs` tokens because those are dropped at printing time.
390        // this is somewhat of a hack.
391        match u.int_in_range(0..=2)? {
392            0 => Ok(UnstrToken::Encoded(u.arbitrary()?)),
393            1 => {
394                let txt = arbitrary_string_nonempty_where(u, is_vchar, 'X')?;
395                // As a coarse-grained measure, reject text that contains '=?' to avoid confusion with
396                // Encoded tokens
397                if txt.find("=?").is_some() {
398                    return Err(arbitrary::Error::IncorrectFormat);
399                }
400                Ok(UnstrToken::Plain(txt.into(), UnstrTxtKind::Txt))
401            }
402            2 => {
403                let txt = arbitrary_whitespace_nonempty(u)?;
404                Ok(UnstrToken::Plain(txt.into(), UnstrTxtKind::Fws))
405            }
406            _ => unreachable!(),
407        }
408    }
409}
410
411// Invariant:
412// - Encoded and Plain(_, Txt) tokens are always separated by whitespace
413//   (encoded words must be separated from other words by whitespace)
414// - There must not be whitespace token in between two Encoded tokens
415//   (whitespace in between encoded words is meaningless and is ignored during parsing)
416#[derive(Debug, PartialEq, Clone, ToStatic, ToStringFromPrint)]
417pub struct Unstructured<'a>(pub Vec<UnstrToken<'a>>);
418
419impl<'a> Print for Unstructured<'a> {
420    fn print(&self, fmt: &mut impl Formatter) {
421        for i in 0..self.0.len() {
422            let tok = &self.0[i];
423
424            // consecutive encoded tokens must be separated by whitespace
425            if i > 0 {
426                if let (UnstrToken::Encoded(_), UnstrToken::Encoded(_)) = (&self.0[i - 1], tok) {
427                    fmt.write_fws()
428                }
429            }
430
431            tok.print(fmt)
432        }
433    }
434}
435
436impl<'a> Unstructured<'a> {
437    pub fn to_string_keep_obs(&self) -> String {
438        let mut s = String::new();
439        for tok in &self.0 {
440            match tok {
441                UnstrToken::Encoded(e) => s.push_str(&e.to_string()),
442                UnstrToken::Plain(txt, _) => s.push_str(txt),
443            }
444        }
445        s
446    }
447
448    // Merges consecutive tokens of the same kind.
449    // Used to define fuzz_eq.
450    #[cfg(feature = "arbitrary")]
451    fn fuzz_eq_normalize(&self) -> Unstructured<'static> {
452        use bounded_static::ToBoundedStatic;
453        let mut v: Vec<UnstrToken<'static>> = Vec::new();
454        for tok in &self.0 {
455            match (v.last_mut(), tok) {
456                (Some(UnstrToken::Plain(s1, k1)), UnstrToken::Plain(s2, k2)) if k1 == k2 => {
457                    s1.to_mut().push_str(s2)
458                }
459                (Some(UnstrToken::Encoded(e1)), UnstrToken::Encoded(e2)) => {
460                    e1.0.extend(e2.to_static().0)
461                }
462                _ => v.push(tok.to_static()),
463            }
464        }
465        Unstructured(v)
466    }
467}
468impl<'a> ContainsUtf8 for Unstructured<'a> {
469    fn contains_utf8(&self) -> bool {
470        self.0.contains_utf8()
471    }
472}
473
474#[cfg(feature = "arbitrary")]
475impl<'a> FuzzEq for Unstructured<'a> {
476    fn fuzz_eq(&self, other: &Self) -> bool {
477        self.fuzz_eq_normalize()
478            .0
479            .fuzz_eq(&other.fuzz_eq_normalize().0)
480    }
481}
482
483#[cfg(feature = "arbitrary")]
484impl<'a> Arbitrary<'a> for Unstructured<'a> {
485    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
486        enum Kind {
487            Encoded,
488            Wsp,
489            Txt,
490        }
491        fn k(tok: &UnstrToken<'_>) -> Kind {
492            match tok {
493                UnstrToken::Encoded(_) => Kind::Encoded,
494                UnstrToken::Plain(_, UnstrTxtKind::Fws) => Kind::Wsp,
495                UnstrToken::Plain(_, _) => Kind::Txt,
496            }
497        }
498
499        let mut v: Vec<UnstrToken> = Vec::new();
500        let mut before_last = None;
501        let mut last = None;
502        for _ in 0..u.arbitrary_len::<UnstrToken>()? {
503            let tok: UnstrToken = u.arbitrary()?;
504            match (&before_last, &last, k(&tok)) {
505                // invariant: no whitespace between encoded tokens
506                (Some(Kind::Encoded), Some(Kind::Wsp), Kind::Encoded) |
507                // invariant: encoded and text must be separated by whitespace
508                (_, Some(Kind::Encoded), Kind::Txt) | (_, Some(Kind::Txt), Kind::Encoded) => {
509                    return Err(arbitrary::Error::IncorrectFormat)
510                },
511
512                // consecutive Txt or Wsp nodes should be treated as one when
513                // tracking "the" previous token kind
514                (_, Some(Kind::Wsp), Kind::Wsp) | (_, Some(Kind::Txt), Kind::Txt) =>
515                    (),
516                (_, _, ktok) => {
517                    before_last = last;
518                    last = Some(ktok);
519                }
520            };
521            v.push(tok)
522        }
523        Ok(Unstructured(v))
524    }
525}
526
527/// Unstructured header field body
528///
529/// Grammar from the RFC:
530/// ```abnf
531/// unstructured    =   (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
532/// obs-unstruct    =   *((*CR 1*(obs_utext / FWS)) / 1*LF) *CR   (cf errata)
533/// ```
534/// + RFC 2047 (MIME pt3) encoded words
535///
536/// However, in our relaxed parsing of line endings in whitespace::obs_crlf,
537/// bare CR or LF are treated as line endings and thus cannot also be part of
538/// the obs-unstruct syntax.
539///
540/// We thus choose to parse obs-unstruct minus the bare CR and LF, which
541/// corresponds to (also adding RFC2047 encoded words):
542///
543/// our-obs-unstruct    =   *(encoded-words / obs_utext / FWS)
544///
545/// This does not match the RFC but seems to better match real-world practices.
546#[instrument_input("tracing")]
547pub fn unstructured(input: &[u8]) -> IResult<&[u8], Unstructured<'_>> {
548    let (input, r) = many0(alt((
549        map(encoded_word_plain(encoding::Context::Unstructured), |w| {
550            vec![UnstrToken::Encoded(w)]
551        }),
552        map(obs_utext_token, |tok| vec![UnstrToken::from_utext(tok)]),
553        map(fws, |v| {
554            v.into_iter()
555                .map(|s| UnstrToken::from_plain(s, UnstrTxtKind::Fws))
556                .collect()
557        }),
558    )))(input)?;
559
560    Ok((input, Unstructured(r.into_iter().flatten().collect())))
561}
562
563#[cfg(test)]
564mod tests {
565    use super::*;
566    use crate::print::tests::print_to_vec;
567    use crate::text::charset::EmailCharset;
568    use crate::text::encoding::{EncodedWord, EncodedWordToken, QuotedChunk, QuotedWord};
569
570    #[test]
571    fn test_phrase() {
572        assert_eq!(
573            print_to_vec(phrase(b"hello world").unwrap().1),
574            b"hello world".to_vec(),
575        );
576        // XXX: would we rather want this to be reprinted as "salut le monde"?
577        // (since the quotes are unnecessary in this case)
578        assert_eq!(
579            print_to_vec(phrase(b"salut \"le\" monde").unwrap().1),
580            b"salut \"le\" monde".to_vec(),
581        );
582
583        let (rest, parsed) = phrase(b"fin\r\n du\r\nmonde").unwrap();
584        assert_eq!(rest, &b"\r\nmonde"[..]);
585        assert_eq!(&print_to_vec(parsed), b"fin du");
586
587        let (rest, parsed) = phrase(b"foo.bar").unwrap();
588        assert_eq!(rest, &b""[..]);
589        assert_eq!(&print_to_vec(parsed), b"foo \".\" bar");
590    }
591
592    #[test]
593    fn test_phrase_list() {
594        let (rest, parsed) = phrase_list(b",abc def,,   ,ghi").unwrap();
595        assert_eq!(rest, &b""[..]);
596        assert_eq!(&print_to_vec(parsed.as_ref().unwrap()), b"abc def, ghi");
597    }
598
599    #[test]
600    fn test_unstructured() {
601        let (rest, parsed) = unstructured(b"").unwrap();
602        assert_eq!(rest, &b""[..]);
603        assert_eq!(parsed, Unstructured(vec![]));
604
605        let (rest, parsed) = unstructured(b" \t").unwrap();
606        assert_eq!(rest, &b""[..]);
607        assert_eq!(
608            parsed,
609            Unstructured(vec![UnstrToken::Plain(" \t"[..].into(), UnstrTxtKind::Fws)])
610        );
611
612        let (rest, parsed) = unstructured(b"foo =?UTF-8?q?foo?=").unwrap();
613        assert_eq!(rest, &b""[..]);
614        assert_eq!(
615            parsed,
616            Unstructured(vec![
617                UnstrToken::Plain("foo"[..].into(), UnstrTxtKind::Txt),
618                UnstrToken::Plain(" "[..].into(), UnstrTxtKind::Fws),
619                UnstrToken::Encoded(EncodedWord(vec![EncodedWordToken::Quoted(QuotedWord {
620                    enc: EmailCharset::utf8(),
621                    chunks: vec![QuotedChunk::Safe(b"foo"[..].into())],
622                })]))
623            ])
624        );
625
626        // RFC 2047 specifies that encoded words MUST be separated from other
627        // words by whitespace. otherwise, we parse them as normal text...
628        let (rest, parsed) = unstructured(b"foo=?UTF-8?q?foo?=").unwrap();
629        assert_eq!(rest, &b""[..]);
630        assert_eq!(
631            parsed,
632            Unstructured(vec![UnstrToken::Plain(
633                "foo=?UTF-8?q?foo?="[..].into(),
634                UnstrTxtKind::Txt
635            ),])
636        );
637
638        // trailing FWS is allowed
639        let (rest, parsed) = unstructured(b"foo\r\n\t").unwrap();
640        assert_eq!(rest, &b""[..]);
641        assert_eq!(
642            parsed,
643            Unstructured(vec![
644                UnstrToken::Plain("foo"[..].into(), UnstrTxtKind::Txt),
645                UnstrToken::Plain("\t"[..].into(), UnstrTxtKind::Fws),
646            ])
647        );
648    }
649}