1#[cfg(feature = "arbitrary")]
2use arbitrary::Arbitrary;
3use bounded_static::ToStatic;
4use nom::{
5 branch::alt,
6 bytes::complete::{tag, take_while1},
7 combinator::{map, opt},
8 multi::{many0, many1, separated_list0},
9 sequence::delimited,
10 IResult, Parser,
11};
12use std::borrow::Cow;
13#[cfg(feature = "tracing")]
14use tracing::warn;
15
16use crate::i18n::ContainsUtf8;
17use crate::print::{print_seq, Formatter, Print, ToStringFromPrint};
18use crate::text::{
19 ascii,
20 encoding::{self, encoded_word, encoded_word_plain},
21 quoted::{quoted_string, QuotedString, QuotedStringChars},
22 utf8::take_utf8_while1,
23 whitespace::{cfws, fws, is_obs_no_ws_ctl},
24 words::{atom, is_vchar, mime_atom, Atom, MIMEAtom, MIMEAtomChars},
25};
26#[cfg(feature = "arbitrary")]
27use crate::{
28 arbitrary_utils::{
29 arbitrary_string_nonempty_where, arbitrary_vec_nonempty, arbitrary_whitespace_nonempty,
30 },
31 fuzz_eq::FuzzEq,
32};
33use eml_codec_derives::instrument_input;
34
35#[derive(Clone, ContainsUtf8, Debug, PartialEq, Default, ToStatic, ToStringFromPrint)]
36#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
37pub struct PhraseList<'a>(pub Vec<Phrase<'a>>); #[cfg(feature = "arbitrary")]
40impl<'a> Arbitrary<'a> for PhraseList<'a> {
41 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
42 Ok(Self(arbitrary_vec_nonempty(u)?))
43 }
44}
45
46#[instrument_input("tracing")]
57pub fn phrase_list(input: &[u8]) -> IResult<&[u8], Option<PhraseList<'_>>> {
58 let (input, phrases_opt) =
59 separated_list0(tag(","), alt((map(phrase, Some), map(opt(cfws), |_| None))))(input)?;
60 let phrases: Vec<Phrase> = phrases_opt.into_iter().flatten().collect();
61 if phrases.is_empty() {
62 Ok((input, None))
63 } else {
64 Ok((input, Some(PhraseList(phrases))))
65 }
66}
67impl<'a> Print for PhraseList<'a> {
68 fn print(&self, fmt: &mut impl Formatter) {
69 print_seq(fmt, &self.0, |fmt| {
70 fmt.write_bytes(b",");
71 fmt.write_fws()
72 })
73 }
74}
75
76#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
77#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
78pub enum MIMEWord<'a> {
79 Quoted(QuotedString<'a>),
80 Atom(MIMEAtom<'a>),
81}
82impl Default for MIMEWord<'static> {
83 fn default() -> Self {
84 Self::Atom(MIMEAtom::default())
85 }
86}
87#[instrument_input("tracing")]
88pub fn mime_word(input: &[u8]) -> IResult<&[u8], MIMEWord<'_>> {
89 alt((
90 map(quoted_string, MIMEWord::Quoted),
91 map(mime_atom, MIMEWord::Atom),
92 ))(input)
93}
94
95impl<'a> MIMEWord<'a> {
96 pub fn chars<'b>(&'b self) -> MIMEWordChars<'a, 'b> {
97 match self {
98 MIMEWord::Quoted(q) => MIMEWordChars::Quoted(q.chars()),
99 MIMEWord::Atom(a) => MIMEWordChars::Atom(a.chars()),
100 }
101 }
102}
103impl<'a> Print for MIMEWord<'a> {
104 fn print(&self, fmt: &mut impl Formatter) {
105 match self {
106 MIMEWord::Quoted(q) => q.print(fmt),
107 MIMEWord::Atom(a) => a.print(fmt),
108 }
109 }
110}
111
112#[derive(Clone)]
113pub enum MIMEWordChars<'a, 'b> {
114 Quoted(QuotedStringChars<'a, 'b>),
115 Atom(MIMEAtomChars<'a, 'b>),
116}
117
118impl<'a, 'b> Iterator for MIMEWordChars<'a, 'b> {
119 type Item = char;
120 fn next(&mut self) -> Option<Self::Item> {
121 match self {
122 MIMEWordChars::Quoted(q) => q.next(),
123 MIMEWordChars::Atom(a) => a.next(),
124 }
125 }
126}
127
128#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
129#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
130pub enum Word<'a> {
131 Quoted(QuotedString<'a>),
132 Atom(Atom<'a>),
133}
134
135impl<'a> Print for Word<'a> {
136 fn print(&self, fmt: &mut impl Formatter) {
137 match self {
138 Word::Quoted(q) => q.print(fmt),
139 Word::Atom(a) => a.print(fmt),
140 }
141 }
142}
143
144impl<'a> Word<'a> {
145 pub fn chars<'b>(&'b self) -> WordChars<'a, 'b> {
146 match self {
147 Word::Quoted(q) => WordChars::Quoted(q.chars()),
148 Word::Atom(a) => WordChars::Atom(a.0.chars()),
149 }
150 }
151}
152
153#[derive(Clone)]
154pub enum WordChars<'a, 'b> {
155 Quoted(QuotedStringChars<'a, 'b>),
156 Atom(std::str::Chars<'b>),
157}
158
159impl<'a, 'b> Iterator for WordChars<'a, 'b> {
160 type Item = char;
161 fn next(&mut self) -> Option<Self::Item> {
162 match self {
163 WordChars::Quoted(q) => q.next(),
164 WordChars::Atom(a) => a.next(),
165 }
166 }
167}
168
169#[instrument_input("tracing")]
175pub fn word(input: &[u8]) -> IResult<&[u8], Word<'_>> {
176 alt((map(quoted_string, Word::Quoted), map(atom, Word::Atom)))(input)
177}
178
179#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
180#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
181pub enum PhraseToken<'a> {
182 Word(Word<'a>),
185 Encoded(encoding::EncodedWord<'a>),
186}
187impl<'a> Print for PhraseToken<'a> {
188 fn print(&self, fmt: &mut impl Formatter) {
189 match self {
190 PhraseToken::Word(w) => w.print(fmt),
191 PhraseToken::Encoded(e) => e.print(fmt),
192 }
193 }
194}
195#[cfg(feature = "arbitrary")]
196impl<'a> Arbitrary<'a> for PhraseToken<'a> {
197 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
198 if u.arbitrary()? {
199 let w: Word<'_> = u.arbitrary()?;
200 if let Word::Atom(a) = &w {
203 if a.0.find("=?").is_some() {
204 return Err(arbitrary::Error::IncorrectFormat);
205 }
206 }
207 Ok(PhraseToken::Word(w))
208 } else {
209 Ok(PhraseToken::Encoded(u.arbitrary()?))
210 }
211 }
212}
213
214#[instrument_input("tracing")]
216pub fn phrase_token(input: &[u8]) -> IResult<&[u8], PhraseToken<'_>> {
217 alt((
218 map(
220 encoded_word(encoding::Context::Phrase),
221 PhraseToken::Encoded,
222 ),
223 map(word, PhraseToken::Word),
224 map(
233 delimited(opt(cfws), tag(&[ascii::PERIOD][..]), opt(cfws)),
234 |_| {
235 PhraseToken::Word(Word::Quoted(QuotedString(vec![Cow::Owned(
236 ".".to_string(),
237 )])))
238 },
239 ),
240 ))(input)
241}
242
243#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
245pub struct Phrase<'a>(pub Vec<PhraseToken<'a>>);
246
247impl<'a> Print for Phrase<'a> {
248 fn print(&self, fmt: &mut impl Formatter) {
249 print_seq(fmt, &self.0, Formatter::write_fws)
250 }
251}
252
253impl<'a> Phrase<'a> {
254 #[cfg(feature = "arbitrary")]
256 fn normalize(&self) -> Self {
257 let mut v = Vec::new();
258 for tok in &self.0 {
259 match (v.last_mut(), tok) {
260 (Some(PhraseToken::Encoded(ref mut e1)), PhraseToken::Encoded(e2)) => {
261 e1.0.extend(e2.0.clone())
262 }
263 (_, tok) => v.push(tok.clone()),
264 }
265 }
266 Self(v)
267 }
268}
269#[cfg(feature = "arbitrary")]
270impl<'a> Arbitrary<'a> for Phrase<'a> {
271 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
272 Ok(Self(arbitrary_vec_nonempty(u)?))
273 }
274}
275#[cfg(feature = "arbitrary")]
276impl<'a> FuzzEq for Phrase<'a> {
277 fn fuzz_eq(&self, other: &Self) -> bool {
278 self.normalize().0.fuzz_eq(&other.normalize().0)
279 }
280}
281
282#[instrument_input("tracing")]
299pub fn phrase(input: &[u8]) -> IResult<&[u8], Phrase<'_>> {
300 let (input, phrase) = map(many1(phrase_token), Phrase)(input)?;
301 Ok((input, phrase))
302}
303
304#[derive(Debug, PartialEq, Clone, ToStatic)]
305pub struct UtextToken<'a> {
306 txt: Cow<'a, str>,
307 obs: bool,
308}
309
310fn obs_utext_token<'a>(input: &'a [u8]) -> IResult<&'a [u8], UtextToken<'a>> {
323 alt((
324 take_utf8_while1(is_vchar).map(|s| UtextToken { txt: s, obs: false }),
325 take_while1(|c| is_obs_no_ws_ctl(c) || c == ascii::NULL)
326 .map(|s| unsafe { str::from_utf8_unchecked(s) })
329 .map(|s| UtextToken {
330 txt: Cow::Borrowed(s),
331 obs: true,
332 }),
333 ))(input)
334}
335
336#[derive(Debug, PartialEq, Copy, Clone, ToStatic)]
337pub enum UnstrTxtKind {
338 Txt, Obs, Fws, }
342
343#[derive(PartialEq, Clone, Debug, ToStatic)]
344#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
345pub enum UnstrToken<'a> {
346 Encoded(encoding::EncodedWord<'a>),
347 #[cfg_attr(feature = "arbitrary", fuzz_eq(use_eq))]
350 Plain(Cow<'a, str>, UnstrTxtKind),
351}
352
353impl<'a> UnstrToken<'a> {
354 pub(crate) fn from_plain(s: &'a str, kind: UnstrTxtKind) -> Self {
355 Self::Plain(Cow::Borrowed(s), kind)
356 }
357
358 fn from_utext(tok: UtextToken<'a>) -> Self {
359 if tok.obs {
360 Self::Plain(tok.txt, UnstrTxtKind::Obs)
361 } else {
362 Self::Plain(tok.txt, UnstrTxtKind::Txt)
363 }
364 }
365}
366impl<'a> ContainsUtf8 for UnstrToken<'a> {
367 fn contains_utf8(&self) -> bool {
368 match self {
369 UnstrToken::Encoded(_) => false,
370 UnstrToken::Plain(s, _) => s.contains_utf8(),
371 }
372 }
373}
374impl<'a> Print for UnstrToken<'a> {
375 fn print(&self, fmt: &mut impl Formatter) {
376 match self {
377 UnstrToken::Encoded(e) => e.print(fmt),
378 UnstrToken::Plain(txt, UnstrTxtKind::Txt) => fmt.write_bytes(txt.as_bytes()),
379 UnstrToken::Plain(_, UnstrTxtKind::Obs) =>
380 {}
382 UnstrToken::Plain(txt, UnstrTxtKind::Fws) => fmt.write_fws_bytes(txt.as_bytes()),
383 }
384 }
385}
386#[cfg(feature = "arbitrary")]
387impl<'a> Arbitrary<'a> for UnstrToken<'a> {
388 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
389 match u.int_in_range(0..=2)? {
392 0 => Ok(UnstrToken::Encoded(u.arbitrary()?)),
393 1 => {
394 let txt = arbitrary_string_nonempty_where(u, is_vchar, 'X')?;
395 if txt.find("=?").is_some() {
398 return Err(arbitrary::Error::IncorrectFormat);
399 }
400 Ok(UnstrToken::Plain(txt.into(), UnstrTxtKind::Txt))
401 }
402 2 => {
403 let txt = arbitrary_whitespace_nonempty(u)?;
404 Ok(UnstrToken::Plain(txt.into(), UnstrTxtKind::Fws))
405 }
406 _ => unreachable!(),
407 }
408 }
409}
410
411#[derive(Debug, PartialEq, Clone, ToStatic, ToStringFromPrint)]
417pub struct Unstructured<'a>(pub Vec<UnstrToken<'a>>);
418
419impl<'a> Print for Unstructured<'a> {
420 fn print(&self, fmt: &mut impl Formatter) {
421 for i in 0..self.0.len() {
422 let tok = &self.0[i];
423
424 if i > 0 {
426 if let (UnstrToken::Encoded(_), UnstrToken::Encoded(_)) = (&self.0[i - 1], tok) {
427 fmt.write_fws()
428 }
429 }
430
431 tok.print(fmt)
432 }
433 }
434}
435
436impl<'a> Unstructured<'a> {
437 pub fn to_string_keep_obs(&self) -> String {
438 let mut s = String::new();
439 for tok in &self.0 {
440 match tok {
441 UnstrToken::Encoded(e) => s.push_str(&e.to_string()),
442 UnstrToken::Plain(txt, _) => s.push_str(txt),
443 }
444 }
445 s
446 }
447
448 #[cfg(feature = "arbitrary")]
451 fn fuzz_eq_normalize(&self) -> Unstructured<'static> {
452 use bounded_static::ToBoundedStatic;
453 let mut v: Vec<UnstrToken<'static>> = Vec::new();
454 for tok in &self.0 {
455 match (v.last_mut(), tok) {
456 (Some(UnstrToken::Plain(s1, k1)), UnstrToken::Plain(s2, k2)) if k1 == k2 => {
457 s1.to_mut().push_str(s2)
458 }
459 (Some(UnstrToken::Encoded(e1)), UnstrToken::Encoded(e2)) => {
460 e1.0.extend(e2.to_static().0)
461 }
462 _ => v.push(tok.to_static()),
463 }
464 }
465 Unstructured(v)
466 }
467}
468impl<'a> ContainsUtf8 for Unstructured<'a> {
469 fn contains_utf8(&self) -> bool {
470 self.0.contains_utf8()
471 }
472}
473
474#[cfg(feature = "arbitrary")]
475impl<'a> FuzzEq for Unstructured<'a> {
476 fn fuzz_eq(&self, other: &Self) -> bool {
477 self.fuzz_eq_normalize()
478 .0
479 .fuzz_eq(&other.fuzz_eq_normalize().0)
480 }
481}
482
483#[cfg(feature = "arbitrary")]
484impl<'a> Arbitrary<'a> for Unstructured<'a> {
485 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
486 enum Kind {
487 Encoded,
488 Wsp,
489 Txt,
490 }
491 fn k(tok: &UnstrToken<'_>) -> Kind {
492 match tok {
493 UnstrToken::Encoded(_) => Kind::Encoded,
494 UnstrToken::Plain(_, UnstrTxtKind::Fws) => Kind::Wsp,
495 UnstrToken::Plain(_, _) => Kind::Txt,
496 }
497 }
498
499 let mut v: Vec<UnstrToken> = Vec::new();
500 let mut before_last = None;
501 let mut last = None;
502 for _ in 0..u.arbitrary_len::<UnstrToken>()? {
503 let tok: UnstrToken = u.arbitrary()?;
504 match (&before_last, &last, k(&tok)) {
505 (Some(Kind::Encoded), Some(Kind::Wsp), Kind::Encoded) |
507 (_, Some(Kind::Encoded), Kind::Txt) | (_, Some(Kind::Txt), Kind::Encoded) => {
509 return Err(arbitrary::Error::IncorrectFormat)
510 },
511
512 (_, Some(Kind::Wsp), Kind::Wsp) | (_, Some(Kind::Txt), Kind::Txt) =>
515 (),
516 (_, _, ktok) => {
517 before_last = last;
518 last = Some(ktok);
519 }
520 };
521 v.push(tok)
522 }
523 Ok(Unstructured(v))
524 }
525}
526
527#[instrument_input("tracing")]
547pub fn unstructured(input: &[u8]) -> IResult<&[u8], Unstructured<'_>> {
548 let (input, r) = many0(alt((
549 map(encoded_word_plain(encoding::Context::Unstructured), |w| {
550 vec![UnstrToken::Encoded(w)]
551 }),
552 map(obs_utext_token, |tok| vec![UnstrToken::from_utext(tok)]),
553 map(fws, |v| {
554 v.into_iter()
555 .map(|s| UnstrToken::from_plain(s, UnstrTxtKind::Fws))
556 .collect()
557 }),
558 )))(input)?;
559
560 Ok((input, Unstructured(r.into_iter().flatten().collect())))
561}
562
563#[cfg(test)]
564mod tests {
565 use super::*;
566 use crate::print::tests::print_to_vec;
567 use crate::text::charset::EmailCharset;
568 use crate::text::encoding::{EncodedWord, EncodedWordToken, QuotedChunk, QuotedWord};
569
570 #[test]
571 fn test_phrase() {
572 assert_eq!(
573 print_to_vec(phrase(b"hello world").unwrap().1),
574 b"hello world".to_vec(),
575 );
576 assert_eq!(
579 print_to_vec(phrase(b"salut \"le\" monde").unwrap().1),
580 b"salut \"le\" monde".to_vec(),
581 );
582
583 let (rest, parsed) = phrase(b"fin\r\n du\r\nmonde").unwrap();
584 assert_eq!(rest, &b"\r\nmonde"[..]);
585 assert_eq!(&print_to_vec(parsed), b"fin du");
586
587 let (rest, parsed) = phrase(b"foo.bar").unwrap();
588 assert_eq!(rest, &b""[..]);
589 assert_eq!(&print_to_vec(parsed), b"foo \".\" bar");
590 }
591
592 #[test]
593 fn test_phrase_list() {
594 let (rest, parsed) = phrase_list(b",abc def,, ,ghi").unwrap();
595 assert_eq!(rest, &b""[..]);
596 assert_eq!(&print_to_vec(parsed.as_ref().unwrap()), b"abc def, ghi");
597 }
598
599 #[test]
600 fn test_unstructured() {
601 let (rest, parsed) = unstructured(b"").unwrap();
602 assert_eq!(rest, &b""[..]);
603 assert_eq!(parsed, Unstructured(vec![]));
604
605 let (rest, parsed) = unstructured(b" \t").unwrap();
606 assert_eq!(rest, &b""[..]);
607 assert_eq!(
608 parsed,
609 Unstructured(vec![UnstrToken::Plain(" \t"[..].into(), UnstrTxtKind::Fws)])
610 );
611
612 let (rest, parsed) = unstructured(b"foo =?UTF-8?q?foo?=").unwrap();
613 assert_eq!(rest, &b""[..]);
614 assert_eq!(
615 parsed,
616 Unstructured(vec![
617 UnstrToken::Plain("foo"[..].into(), UnstrTxtKind::Txt),
618 UnstrToken::Plain(" "[..].into(), UnstrTxtKind::Fws),
619 UnstrToken::Encoded(EncodedWord(vec![EncodedWordToken::Quoted(QuotedWord {
620 enc: EmailCharset::utf8(),
621 chunks: vec![QuotedChunk::Safe(b"foo"[..].into())],
622 })]))
623 ])
624 );
625
626 let (rest, parsed) = unstructured(b"foo=?UTF-8?q?foo?=").unwrap();
629 assert_eq!(rest, &b""[..]);
630 assert_eq!(
631 parsed,
632 Unstructured(vec![UnstrToken::Plain(
633 "foo=?UTF-8?q?foo?="[..].into(),
634 UnstrTxtKind::Txt
635 ),])
636 );
637
638 let (rest, parsed) = unstructured(b"foo\r\n\t").unwrap();
640 assert_eq!(rest, &b""[..]);
641 assert_eq!(
642 parsed,
643 Unstructured(vec![
644 UnstrToken::Plain("foo"[..].into(), UnstrTxtKind::Txt),
645 UnstrToken::Plain("\t"[..].into(), UnstrTxtKind::Fws),
646 ])
647 );
648 }
649}