#[cfg(feature = "arbitrary")]
use arbitrary::Arbitrary;
use bounded_static::ToStatic;
use nom::{
branch::alt,
bytes::complete::tag,
combinator::{eof, map, opt, recognize},
multi::many0,
sequence::{delimited, pair, preceded, tuple},
IResult,
};
use std::borrow::Cow;
#[cfg(any(feature = "tracing-recover", feature = "tracing-unsupported"))]
use tracing::warn;
use crate::i18n::ContainsUtf8;
use crate::imf::mailbox::{domain, dtext, local_part, Domain, Dtext, LocalPart};
use crate::print::{print_seq, Formatter, Print, ToStringFromPrint};
use crate::text::recovery::{take_quoted_encoded_or_until1, take_quoted_or_until};
use crate::text::utf8::{is_nonascii_or, take_utf8_while1};
use crate::text::whitespace::cfws;
#[cfg(any(feature = "tracing-recover", feature = "tracing-unsupported"))]
use crate::utils::bytes_to_trace_string;
#[cfg(feature = "arbitrary")]
use crate::{arbitrary_utils::arbitrary_string_nonempty_where, fuzz_eq::FuzzEq};
use eml_codec_derives::instrument_input;
#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
pub enum MessageID<'a> {
ObsLeftRight {
left: LocalPart<'a>,
right: Domain<'a>,
},
#[cfg_attr(feature = "arbitrary", fuzz_eq(use_eq))]
Invalid(Cow<'a, str>),
}
impl<'a> Print for MessageID<'a> {
fn print(&self, fmt: &mut impl Formatter) {
fmt.write_bytes(b"<");
match &self {
MessageID::ObsLeftRight { left, right } => {
left.print(fmt);
fmt.write_bytes(b"@");
right.print(fmt);
}
MessageID::Invalid(txt) => fmt.write_bytes(txt.as_bytes()),
}
fmt.write_bytes(b">");
}
}
#[cfg(feature = "arbitrary")]
impl<'a> Arbitrary<'a> for MessageID<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
match u.int_in_range(0..=1)? {
0 => Ok(MessageID::ObsLeftRight {
left: u.arbitrary()?,
right: u.arbitrary()?,
}),
1 => {
let s = arbitrary_string_nonempty_where(u, is_invalid_msgid_text, 'X')?;
Ok(MessageID::Invalid(s.into()))
}
_ => unreachable!(),
}
}
}
pub type MessageIDList<'a> = Vec<MessageID<'a>>;
impl<'a> Print for MessageIDList<'a> {
fn print(&self, fmt: &mut impl Formatter) {
print_seq(fmt, self, Formatter::write_fws)
}
}
#[instrument_input("tracing")]
pub fn msg_id(input: &[u8]) -> IResult<&[u8], MessageID<'_>> {
alt((
msg_id_angle,
map(msg_id_bare(|i: &[u8]| eof(i)), |msg| {
#[cfg(feature = "tracing-recover")]
warn!("message-id: bare msg-id without <>");
msg
}),
))(input)
}
pub fn msg_id_angle(input: &[u8]) -> IResult<&[u8], MessageID<'_>> {
preceded(
pair(opt(cfws), tag("<")),
msg_id_bare(|i: &[u8]| recognize(pair(tag(">"), opt(cfws)))(i)),
)(input)
}
pub fn msg_id_bare<F>(terminator: F) -> impl FnMut(&[u8]) -> IResult<&[u8], MessageID<'_>>
where
F: for<'a> Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]>,
{
move |input: &[u8]| {
alt((
map(
tuple((id_left, tag("@"), id_right, &terminator)),
|(left, _, right, _)| MessageID::ObsLeftRight { left, right },
),
map(
tuple((
opt(cfws),
take_utf8_while1(is_invalid_msgid_text),
opt(cfws),
&terminator,
)),
|(_, s, _, _)| {
#[cfg(feature = "tracing-recover")]
warn!("message-id: bare string instead of id-left@id-right");
MessageID::Invalid(s)
},
),
))(input)
}
}
fn is_invalid_msgid_text(c: char) -> bool {
is_nonascii_or(|c| c.is_ascii_graphic() && c != b'<' && c != b'>' && c != b'"')(c)
}
#[instrument_input("tracing")]
pub fn nullable_msg_list(input: &[u8]) -> IResult<&[u8], MessageIDList<'_>> {
let (input, tokens) = many0(alt((
map(msg_id_angle, Some),
map(
recognize(tuple((
tag("<"),
take_quoted_or_until(|c| c == b'>'),
opt(tag(">")),
))),
|_i| {
#[cfg(feature = "tracing-unsupported")]
warn!(input = %bytes_to_trace_string(_i),
"unsupported msg-id in msg-list");
None
},
),
map(cfws, |_| None),
map(take_quoted_encoded_or_until1(|c| c == b'<'), |_i| {
#[cfg(feature = "tracing-recover")]
warn!(input = %bytes_to_trace_string(_i),
"non-compliant text between msg-ids");
None
}),
)))(input)?;
Ok((input, tokens.into_iter().flatten().collect()))
}
#[instrument_input("tracing")]
fn id_left(input: &[u8]) -> IResult<&[u8], LocalPart<'_>> {
local_part(input)
}
#[instrument_input("tracing")]
fn id_right(input: &[u8]) -> IResult<&[u8], Domain<'_>> {
domain(input)
}
#[allow(dead_code)]
#[instrument_input("tracing")]
fn no_fold_literal(input: &[u8]) -> IResult<&[u8], Dtext<'_>> {
delimited(tag("["), dtext, tag("]"))(input)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::imf::mailbox::{Domain, LocalPart, LocalPartToken};
use crate::print::tests::print_to_vec;
use crate::text::misc_token::Word;
use crate::text::quoted::QuotedString;
use crate::text::words::Atom;
fn assert_msg_list_reprinted(txt: &[u8], printed: &[u8]) {
let (rest, parsed) = nullable_msg_list(txt).unwrap();
assert_eq!(rest, b"");
let reprinted = print_to_vec(parsed);
assert_eq!(
String::from_utf8_lossy(&reprinted),
String::from_utf8_lossy(printed)
);
}
#[test]
fn test_msg_id() {
assert_eq!(
msg_id(b"<5678.21-Nov-1997@example.com>"),
Ok((
&b""[..],
MessageID::ObsLeftRight {
left: LocalPart(vec![
LocalPartToken::Word(Word::Atom(Atom("5678".into()))),
LocalPartToken::Dot,
LocalPartToken::Word(Word::Atom(Atom("21-Nov-1997".into()))),
]),
right: Domain::Atoms(vec![Atom("example".into()), Atom("com".into()),]),
}
)),
);
}
#[test]
fn test_obsolete_msg_id() {
assert_eq!(
msg_id(b" < foo . bar@univ-valenciennes .fr >"),
Ok((
&b""[..],
MessageID::ObsLeftRight {
left: LocalPart(vec![
LocalPartToken::Word(Word::Atom(Atom("foo".into()))),
LocalPartToken::Dot,
LocalPartToken::Word(Word::Atom(Atom("bar".into()))),
]),
right: Domain::Atoms(
vec![Atom("univ-valenciennes".into()), Atom("fr".into()),]
),
}
)),
);
assert_eq!(
msg_id(b"<\"24806 Tue Sep 19 11:05:34 1995\"@bnr.ca>"),
Ok((
&b""[..],
MessageID::ObsLeftRight {
left: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
vec![
"24806".into(),
" ".into(),
"Tue".into(),
" ".into(),
"Sep".into(),
" ".into(),
"19".into(),
" ".into(),
"11:05:34".into(),
" ".into(),
"1995".into(),
]
)))]),
right: Domain::Atoms(vec![Atom("bnr".into()), Atom("ca".into()),]),
}
)),
);
}
#[test]
fn test_noncompliant_msg_id() {
assert_eq!(
msg_id(b" <523C50DA-160C-4550-A44E-7E192513CF91> "),
Ok((
&b""[..],
MessageID::Invalid("523C50DA-160C-4550-A44E-7E192513CF91".into())
))
);
assert_eq!(
msg_id(b" foo "),
Ok((&b""[..], MessageID::Invalid("foo".into())))
);
assert_eq!(
msg_id(b"text/plain.RKLqBQUAAZl1yPGCYOHKDjrj_nwwBg.1758617731@alan.eu"),
Ok((
&b""[..],
MessageID::ObsLeftRight {
left: LocalPart(vec![
LocalPartToken::Word(Word::Atom(Atom("text/plain".into()))),
LocalPartToken::Dot,
LocalPartToken::Word(Word::Atom(Atom(
"RKLqBQUAAZl1yPGCYOHKDjrj_nwwBg".into()
))),
LocalPartToken::Dot,
LocalPartToken::Word(Word::Atom(Atom("1758617731".into()))),
]),
right: Domain::Atoms(vec![Atom("alan".into()), Atom("eu".into()),]),
},
))
);
assert_eq!(
msg_id(b" <aAdGYiJBX0VZF2TI@millmess@rouba.net> "),
Ok((
&b""[..],
MessageID::Invalid("aAdGYiJBX0VZF2TI@millmess@rouba.net".into())
))
);
assert_eq!(
msg_id(b"<md5:xqmIG/sV8WoSG9UzafBCGw==>"),
Ok((
&b""[..],
MessageID::Invalid("md5:xqmIG/sV8WoSG9UzafBCGw==".into())
))
);
}
#[test]
fn test_comma_separated_msg_list() {
assert_eq!(
nullable_msg_list(b" <8d9bb189354d4804bcc2fd1d1a5398b5@cnrs.fr>,<ef8fac8b36834864bae895571064565c@cnrs.fr>"),
Ok((
&b""[..],
vec![
MessageID::ObsLeftRight {
left: LocalPart(vec![
LocalPartToken::Word(Word::Atom(Atom("8d9bb189354d4804bcc2fd1d1a5398b5".into()))),
]),
right: Domain::Atoms(vec![
Atom("cnrs".into()),
Atom("fr".into()),
]),
},
MessageID::ObsLeftRight {
left: LocalPart(vec![
LocalPartToken::Word(Word::Atom(Atom("ef8fac8b36834864bae895571064565c".into()))),
]),
right: Domain::Atoms(vec![
Atom("cnrs".into()),
Atom("fr".into()),
]),
},
]
))
);
}
#[test]
fn test_msg_list_weird() {
assert_msg_list_reprinted(
b"<3AB624F9.5B6C6680@example.com>; from foo@example.com on Mon, Mar 19, 2001 at 04:25:45PM +0100",
b"<3AB624F9.5B6C6680@example.com>"
);
assert_msg_list_reprinted(
b"<3AB624F9.5B6C6680@example.com> from \"Foo bar\" on Mon, Mar 19, 2001 at 04:25:45 AM",
b"<3AB624F9.5B6C6680@example.com>",
);
}
#[test]
fn test_msg_list_recover() {
assert_msg_list_reprinted(
b"<abc@def>,<foo\n\tbar@outlook.com>,<baz@outlook.com>",
b"<abc@def> <baz@outlook.com>",
);
assert_msg_list_reprinted(b"<abc@def>,<foo\n\tbar@outlook.com ", b"<abc@def>");
assert_msg_list_reprinted(
b"<abc@def>,random\"garbage=?utf-8?q?aabb?= <uuu@jjj>",
b"<abc@def> <uuu@jjj>",
);
assert_msg_list_reprinted(b"<abc@def>,<randomgarbage\">\" <uuu@jjj>", b"<abc@def>");
}
}