#[cfg(feature = "arbitrary")]
use arbitrary::Arbitrary;
use bounded_static::ToStatic;
use nom::{
branch::alt,
bytes::complete::{tag, take_while1},
combinator::{map, opt},
multi::{many0, many1, separated_list0},
sequence::delimited,
IResult, Parser,
};
use std::borrow::Cow;
#[cfg(feature = "tracing")]
use tracing::warn;
use crate::i18n::ContainsUtf8;
use crate::print::{print_seq, Formatter, Print, ToStringFromPrint};
use crate::text::{
ascii,
encoding::{self, encoded_word, encoded_word_plain},
quoted::{quoted_string, QuotedString, QuotedStringChars},
utf8::take_utf8_while1,
whitespace::{cfws, fws, is_obs_no_ws_ctl},
words::{atom, is_vchar, mime_atom, Atom, MIMEAtom, MIMEAtomChars},
};
#[cfg(feature = "arbitrary")]
use crate::{
arbitrary_utils::{
arbitrary_string_nonempty_where, arbitrary_vec_nonempty, arbitrary_whitespace_nonempty,
},
fuzz_eq::FuzzEq,
};
use eml_codec_derives::instrument_input;
#[derive(Clone, ContainsUtf8, Debug, PartialEq, Default, ToStatic, ToStringFromPrint)]
#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
pub struct PhraseList<'a>(pub Vec<Phrase<'a>>);
#[cfg(feature = "arbitrary")]
impl<'a> Arbitrary<'a> for PhraseList<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
Ok(Self(arbitrary_vec_nonempty(u)?))
}
}
#[instrument_input("tracing")]
pub fn phrase_list(input: &[u8]) -> IResult<&[u8], Option<PhraseList<'_>>> {
let (input, phrases_opt) =
separated_list0(tag(","), alt((map(phrase, Some), map(opt(cfws), |_| None))))(input)?;
let phrases: Vec<Phrase> = phrases_opt.into_iter().flatten().collect();
if phrases.is_empty() {
Ok((input, None))
} else {
Ok((input, Some(PhraseList(phrases))))
}
}
impl<'a> Print for PhraseList<'a> {
fn print(&self, fmt: &mut impl Formatter) {
print_seq(fmt, &self.0, |fmt| {
fmt.write_bytes(b",");
fmt.write_fws()
})
}
}
#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
pub enum MIMEWord<'a> {
Quoted(QuotedString<'a>),
Atom(MIMEAtom<'a>),
}
impl Default for MIMEWord<'static> {
fn default() -> Self {
Self::Atom(MIMEAtom::default())
}
}
#[instrument_input("tracing")]
pub fn mime_word(input: &[u8]) -> IResult<&[u8], MIMEWord<'_>> {
alt((
map(quoted_string, MIMEWord::Quoted),
map(mime_atom, MIMEWord::Atom),
))(input)
}
impl<'a> MIMEWord<'a> {
pub fn chars<'b>(&'b self) -> MIMEWordChars<'a, 'b> {
match self {
MIMEWord::Quoted(q) => MIMEWordChars::Quoted(q.chars()),
MIMEWord::Atom(a) => MIMEWordChars::Atom(a.chars()),
}
}
}
impl<'a> Print for MIMEWord<'a> {
fn print(&self, fmt: &mut impl Formatter) {
match self {
MIMEWord::Quoted(q) => q.print(fmt),
MIMEWord::Atom(a) => a.print(fmt),
}
}
}
#[derive(Clone)]
pub enum MIMEWordChars<'a, 'b> {
Quoted(QuotedStringChars<'a, 'b>),
Atom(MIMEAtomChars<'a, 'b>),
}
impl<'a, 'b> Iterator for MIMEWordChars<'a, 'b> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
match self {
MIMEWordChars::Quoted(q) => q.next(),
MIMEWordChars::Atom(a) => a.next(),
}
}
}
#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
#[cfg_attr(feature = "arbitrary", derive(Arbitrary, FuzzEq))]
pub enum Word<'a> {
Quoted(QuotedString<'a>),
Atom(Atom<'a>),
}
impl<'a> Print for Word<'a> {
fn print(&self, fmt: &mut impl Formatter) {
match self {
Word::Quoted(q) => q.print(fmt),
Word::Atom(a) => a.print(fmt),
}
}
}
impl<'a> Word<'a> {
pub fn chars<'b>(&'b self) -> WordChars<'a, 'b> {
match self {
Word::Quoted(q) => WordChars::Quoted(q.chars()),
Word::Atom(a) => WordChars::Atom(a.0.chars()),
}
}
}
#[derive(Clone)]
pub enum WordChars<'a, 'b> {
Quoted(QuotedStringChars<'a, 'b>),
Atom(std::str::Chars<'b>),
}
impl<'a, 'b> Iterator for WordChars<'a, 'b> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
match self {
WordChars::Quoted(q) => q.next(),
WordChars::Atom(a) => a.next(),
}
}
}
#[instrument_input("tracing")]
pub fn word(input: &[u8]) -> IResult<&[u8], Word<'_>> {
alt((map(quoted_string, Word::Quoted), map(atom, Word::Atom)))(input)
}
#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
pub enum PhraseToken<'a> {
Word(Word<'a>),
Encoded(encoding::EncodedWord<'a>),
}
impl<'a> Print for PhraseToken<'a> {
fn print(&self, fmt: &mut impl Formatter) {
match self {
PhraseToken::Word(w) => w.print(fmt),
PhraseToken::Encoded(e) => e.print(fmt),
}
}
}
#[cfg(feature = "arbitrary")]
impl<'a> Arbitrary<'a> for PhraseToken<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
if u.arbitrary()? {
let w: Word<'_> = u.arbitrary()?;
if let Word::Atom(a) = &w {
if a.0.find("=?").is_some() {
return Err(arbitrary::Error::IncorrectFormat);
}
}
Ok(PhraseToken::Word(w))
} else {
Ok(PhraseToken::Encoded(u.arbitrary()?))
}
}
}
#[instrument_input("tracing")]
pub fn phrase_token(input: &[u8]) -> IResult<&[u8], PhraseToken<'_>> {
alt((
map(
encoded_word(encoding::Context::Phrase),
PhraseToken::Encoded,
),
map(word, PhraseToken::Word),
map(
delimited(opt(cfws), tag(&[ascii::PERIOD][..]), opt(cfws)),
|_| {
PhraseToken::Word(Word::Quoted(QuotedString(vec![Cow::Owned(
".".to_string(),
)])))
},
),
))(input)
}
#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
pub struct Phrase<'a>(pub Vec<PhraseToken<'a>>);
impl<'a> Print for Phrase<'a> {
fn print(&self, fmt: &mut impl Formatter) {
print_seq(fmt, &self.0, Formatter::write_fws)
}
}
impl<'a> Phrase<'a> {
#[cfg(feature = "arbitrary")]
fn normalize(&self) -> Self {
let mut v = Vec::new();
for tok in &self.0 {
match (v.last_mut(), tok) {
(Some(PhraseToken::Encoded(ref mut e1)), PhraseToken::Encoded(e2)) => {
e1.0.extend(e2.0.clone())
}
(_, tok) => v.push(tok.clone()),
}
}
Self(v)
}
}
#[cfg(feature = "arbitrary")]
impl<'a> Arbitrary<'a> for Phrase<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
Ok(Self(arbitrary_vec_nonempty(u)?))
}
}
#[cfg(feature = "arbitrary")]
impl<'a> FuzzEq for Phrase<'a> {
fn fuzz_eq(&self, other: &Self) -> bool {
self.normalize().0.fuzz_eq(&other.normalize().0)
}
}
#[instrument_input("tracing")]
pub fn phrase(input: &[u8]) -> IResult<&[u8], Phrase<'_>> {
let (input, phrase) = map(many1(phrase_token), Phrase)(input)?;
Ok((input, phrase))
}
#[derive(Debug, PartialEq, Clone, ToStatic)]
pub struct UtextToken<'a> {
txt: Cow<'a, str>,
obs: bool,
}
fn obs_utext_token<'a>(input: &'a [u8]) -> IResult<&'a [u8], UtextToken<'a>> {
alt((
take_utf8_while1(is_vchar).map(|s| UtextToken { txt: s, obs: false }),
take_while1(|c| is_obs_no_ws_ctl(c) || c == ascii::NULL)
.map(|s| unsafe { str::from_utf8_unchecked(s) })
.map(|s| UtextToken {
txt: Cow::Borrowed(s),
obs: true,
}),
))(input)
}
#[derive(Debug, PartialEq, Copy, Clone, ToStatic)]
pub enum UnstrTxtKind {
Txt, Obs, Fws, }
#[derive(PartialEq, Clone, Debug, ToStatic)]
#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
pub enum UnstrToken<'a> {
Encoded(encoding::EncodedWord<'a>),
#[cfg_attr(feature = "arbitrary", fuzz_eq(use_eq))]
Plain(Cow<'a, str>, UnstrTxtKind),
}
impl<'a> UnstrToken<'a> {
pub(crate) fn from_plain(s: &'a str, kind: UnstrTxtKind) -> Self {
Self::Plain(Cow::Borrowed(s), kind)
}
fn from_utext(tok: UtextToken<'a>) -> Self {
if tok.obs {
Self::Plain(tok.txt, UnstrTxtKind::Obs)
} else {
Self::Plain(tok.txt, UnstrTxtKind::Txt)
}
}
}
impl<'a> ContainsUtf8 for UnstrToken<'a> {
fn contains_utf8(&self) -> bool {
match self {
UnstrToken::Encoded(_) => false,
UnstrToken::Plain(s, _) => s.contains_utf8(),
}
}
}
impl<'a> Print for UnstrToken<'a> {
fn print(&self, fmt: &mut impl Formatter) {
match self {
UnstrToken::Encoded(e) => e.print(fmt),
UnstrToken::Plain(txt, UnstrTxtKind::Txt) => fmt.write_bytes(txt.as_bytes()),
UnstrToken::Plain(_, UnstrTxtKind::Obs) =>
{}
UnstrToken::Plain(txt, UnstrTxtKind::Fws) => fmt.write_fws_bytes(txt.as_bytes()),
}
}
}
#[cfg(feature = "arbitrary")]
impl<'a> Arbitrary<'a> for UnstrToken<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
match u.int_in_range(0..=2)? {
0 => Ok(UnstrToken::Encoded(u.arbitrary()?)),
1 => {
let txt = arbitrary_string_nonempty_where(u, is_vchar, 'X')?;
if txt.find("=?").is_some() {
return Err(arbitrary::Error::IncorrectFormat);
}
Ok(UnstrToken::Plain(txt.into(), UnstrTxtKind::Txt))
}
2 => {
let txt = arbitrary_whitespace_nonempty(u)?;
Ok(UnstrToken::Plain(txt.into(), UnstrTxtKind::Fws))
}
_ => unreachable!(),
}
}
}
#[derive(Debug, PartialEq, Clone, ToStatic, ToStringFromPrint)]
pub struct Unstructured<'a>(pub Vec<UnstrToken<'a>>);
impl<'a> Print for Unstructured<'a> {
fn print(&self, fmt: &mut impl Formatter) {
for i in 0..self.0.len() {
let tok = &self.0[i];
if i > 0 {
if let (UnstrToken::Encoded(_), UnstrToken::Encoded(_)) = (&self.0[i - 1], tok) {
fmt.write_fws()
}
}
tok.print(fmt)
}
}
}
impl<'a> Unstructured<'a> {
pub fn to_string_keep_obs(&self) -> String {
let mut s = String::new();
for tok in &self.0 {
match tok {
UnstrToken::Encoded(e) => s.push_str(&e.to_string()),
UnstrToken::Plain(txt, _) => s.push_str(txt),
}
}
s
}
#[cfg(feature = "arbitrary")]
fn fuzz_eq_normalize(&self) -> Unstructured<'static> {
use bounded_static::ToBoundedStatic;
let mut v: Vec<UnstrToken<'static>> = Vec::new();
for tok in &self.0 {
match (v.last_mut(), tok) {
(Some(UnstrToken::Plain(s1, k1)), UnstrToken::Plain(s2, k2)) if k1 == k2 => {
s1.to_mut().push_str(s2)
}
(Some(UnstrToken::Encoded(e1)), UnstrToken::Encoded(e2)) => {
e1.0.extend(e2.to_static().0)
}
_ => v.push(tok.to_static()),
}
}
Unstructured(v)
}
}
impl<'a> ContainsUtf8 for Unstructured<'a> {
fn contains_utf8(&self) -> bool {
self.0.contains_utf8()
}
}
#[cfg(feature = "arbitrary")]
impl<'a> FuzzEq for Unstructured<'a> {
fn fuzz_eq(&self, other: &Self) -> bool {
self.fuzz_eq_normalize()
.0
.fuzz_eq(&other.fuzz_eq_normalize().0)
}
}
#[cfg(feature = "arbitrary")]
impl<'a> Arbitrary<'a> for Unstructured<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
enum Kind {
Encoded,
Wsp,
Txt,
}
fn k(tok: &UnstrToken<'_>) -> Kind {
match tok {
UnstrToken::Encoded(_) => Kind::Encoded,
UnstrToken::Plain(_, UnstrTxtKind::Fws) => Kind::Wsp,
UnstrToken::Plain(_, _) => Kind::Txt,
}
}
let mut v: Vec<UnstrToken> = Vec::new();
let mut before_last = None;
let mut last = None;
for _ in 0..u.arbitrary_len::<UnstrToken>()? {
let tok: UnstrToken = u.arbitrary()?;
match (&before_last, &last, k(&tok)) {
(Some(Kind::Encoded), Some(Kind::Wsp), Kind::Encoded) |
(_, Some(Kind::Encoded), Kind::Txt) | (_, Some(Kind::Txt), Kind::Encoded) => {
return Err(arbitrary::Error::IncorrectFormat)
},
(_, Some(Kind::Wsp), Kind::Wsp) | (_, Some(Kind::Txt), Kind::Txt) =>
(),
(_, _, ktok) => {
before_last = last;
last = Some(ktok);
}
};
v.push(tok)
}
Ok(Unstructured(v))
}
}
#[instrument_input("tracing")]
pub fn unstructured(input: &[u8]) -> IResult<&[u8], Unstructured<'_>> {
let (input, r) = many0(alt((
map(encoded_word_plain(encoding::Context::Unstructured), |w| {
vec![UnstrToken::Encoded(w)]
}),
map(obs_utext_token, |tok| vec![UnstrToken::from_utext(tok)]),
map(fws, |v| {
v.into_iter()
.map(|s| UnstrToken::from_plain(s, UnstrTxtKind::Fws))
.collect()
}),
)))(input)?;
Ok((input, Unstructured(r.into_iter().flatten().collect())))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::print::tests::print_to_vec;
use crate::text::charset::EmailCharset;
use crate::text::encoding::{EncodedWord, EncodedWordToken, QuotedChunk, QuotedWord};
#[test]
fn test_phrase() {
assert_eq!(
print_to_vec(phrase(b"hello world").unwrap().1),
b"hello world".to_vec(),
);
assert_eq!(
print_to_vec(phrase(b"salut \"le\" monde").unwrap().1),
b"salut \"le\" monde".to_vec(),
);
let (rest, parsed) = phrase(b"fin\r\n du\r\nmonde").unwrap();
assert_eq!(rest, &b"\r\nmonde"[..]);
assert_eq!(&print_to_vec(parsed), b"fin du");
let (rest, parsed) = phrase(b"foo.bar").unwrap();
assert_eq!(rest, &b""[..]);
assert_eq!(&print_to_vec(parsed), b"foo \".\" bar");
}
#[test]
fn test_phrase_list() {
let (rest, parsed) = phrase_list(b",abc def,, ,ghi").unwrap();
assert_eq!(rest, &b""[..]);
assert_eq!(&print_to_vec(parsed.as_ref().unwrap()), b"abc def, ghi");
}
#[test]
fn test_unstructured() {
let (rest, parsed) = unstructured(b"").unwrap();
assert_eq!(rest, &b""[..]);
assert_eq!(parsed, Unstructured(vec![]));
let (rest, parsed) = unstructured(b" \t").unwrap();
assert_eq!(rest, &b""[..]);
assert_eq!(
parsed,
Unstructured(vec![UnstrToken::Plain(" \t"[..].into(), UnstrTxtKind::Fws)])
);
let (rest, parsed) = unstructured(b"foo =?UTF-8?q?foo?=").unwrap();
assert_eq!(rest, &b""[..]);
assert_eq!(
parsed,
Unstructured(vec![
UnstrToken::Plain("foo"[..].into(), UnstrTxtKind::Txt),
UnstrToken::Plain(" "[..].into(), UnstrTxtKind::Fws),
UnstrToken::Encoded(EncodedWord(vec![EncodedWordToken::Quoted(QuotedWord {
enc: EmailCharset::utf8(),
chunks: vec![QuotedChunk::Safe(b"foo"[..].into())],
})]))
])
);
let (rest, parsed) = unstructured(b"foo=?UTF-8?q?foo?=").unwrap();
assert_eq!(rest, &b""[..]);
assert_eq!(
parsed,
Unstructured(vec![UnstrToken::Plain(
"foo=?UTF-8?q?foo?="[..].into(),
UnstrTxtKind::Txt
),])
);
let (rest, parsed) = unstructured(b"foo\r\n\t").unwrap();
assert_eq!(rest, &b""[..]);
assert_eq!(
parsed,
Unstructured(vec![
UnstrToken::Plain("foo"[..].into(), UnstrTxtKind::Txt),
UnstrToken::Plain("\t"[..].into(), UnstrTxtKind::Fws),
])
);
}
}