1#[cfg(feature = "arbitrary")]
2use arbitrary::Arbitrary;
3use bounded_static::ToStatic;
4use nom::{
5 branch::alt,
6 bytes::complete::tag,
7 combinator::{eof, map, opt, recognize},
8 multi::many0,
9 sequence::{delimited, pair, preceded, tuple},
10 IResult,
11};
12use std::borrow::Cow;
13#[cfg(any(feature = "tracing-recover", feature = "tracing-unsupported"))]
14use tracing::warn;
15
16use crate::i18n::ContainsUtf8;
17use crate::imf::mailbox::{domain, dtext, local_part, Domain, Dtext, LocalPart};
18use crate::print::{print_seq, Formatter, Print, ToStringFromPrint};
19use crate::text::recovery::{take_quoted_encoded_or_until1, take_quoted_or_until};
20use crate::text::utf8::{is_nonascii_or, take_utf8_while1};
21use crate::text::whitespace::cfws;
22#[cfg(any(feature = "tracing-recover", feature = "tracing-unsupported"))]
23use crate::utils::bytes_to_trace_string;
24#[cfg(feature = "arbitrary")]
25use crate::{arbitrary_utils::arbitrary_string_nonempty_where, fuzz_eq::FuzzEq};
26use eml_codec_derives::instrument_input;
27
28#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
31#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
32pub enum MessageID<'a> {
33 ObsLeftRight {
35 left: LocalPart<'a>,
36 right: Domain<'a>,
37 },
38 #[cfg_attr(feature = "arbitrary", fuzz_eq(use_eq))]
40 Invalid(Cow<'a, str>),
41}
42impl<'a> Print for MessageID<'a> {
43 fn print(&self, fmt: &mut impl Formatter) {
44 fmt.write_bytes(b"<");
45 match &self {
46 MessageID::ObsLeftRight { left, right } => {
47 left.print(fmt);
48 fmt.write_bytes(b"@");
49 right.print(fmt);
50 }
51 MessageID::Invalid(txt) => fmt.write_bytes(txt.as_bytes()),
52 }
53 fmt.write_bytes(b">");
54 }
55}
56#[cfg(feature = "arbitrary")]
57impl<'a> Arbitrary<'a> for MessageID<'a> {
58 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
59 match u.int_in_range(0..=1)? {
60 0 => Ok(MessageID::ObsLeftRight {
61 left: u.arbitrary()?,
62 right: u.arbitrary()?,
63 }),
64 1 => {
65 let s = arbitrary_string_nonempty_where(u, is_invalid_msgid_text, 'X')?;
66 Ok(MessageID::Invalid(s.into()))
67 }
68 _ => unreachable!(),
69 }
70 }
71}
72
73pub type MessageIDList<'a> = Vec<MessageID<'a>>;
75
76impl<'a> Print for MessageIDList<'a> {
77 fn print(&self, fmt: &mut impl Formatter) {
78 print_seq(fmt, self, Formatter::write_fws)
79 }
80}
81
82#[instrument_input("tracing")]
101pub fn msg_id(input: &[u8]) -> IResult<&[u8], MessageID<'_>> {
102 alt((
103 msg_id_angle,
104 map(msg_id_bare(|i: &[u8]| eof(i)), |msg| {
105 #[cfg(feature = "tracing-recover")]
106 warn!("message-id: bare msg-id without <>");
107 msg
108 }),
109 ))(input)
110}
111pub fn msg_id_angle(input: &[u8]) -> IResult<&[u8], MessageID<'_>> {
112 preceded(
113 pair(opt(cfws), tag("<")),
114 msg_id_bare(|i: &[u8]| recognize(pair(tag(">"), opt(cfws)))(i)),
115 )(input)
116}
117pub fn msg_id_bare<F>(terminator: F) -> impl FnMut(&[u8]) -> IResult<&[u8], MessageID<'_>>
118where
119 F: for<'a> Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]>,
120{
121 move |input: &[u8]| {
122 alt((
123 map(
124 tuple((id_left, tag("@"), id_right, &terminator)),
125 |(left, _, right, _)| MessageID::ObsLeftRight { left, right },
126 ),
127 map(
128 tuple((
129 opt(cfws),
130 take_utf8_while1(is_invalid_msgid_text),
131 opt(cfws),
132 &terminator,
133 )),
134 |(_, s, _, _)| {
135 #[cfg(feature = "tracing-recover")]
136 warn!("message-id: bare string instead of id-left@id-right");
137 MessageID::Invalid(s)
138 },
139 ),
140 ))(input)
141 }
142}
143
144fn is_invalid_msgid_text(c: char) -> bool {
146 is_nonascii_or(|c| c.is_ascii_graphic() && c != b'<' && c != b'>' && c != b'"')(c)
147}
148
149#[instrument_input("tracing")]
168pub fn nullable_msg_list(input: &[u8]) -> IResult<&[u8], MessageIDList<'_>> {
169 let (input, tokens) = many0(alt((
170 map(msg_id_angle, Some),
171 map(
173 recognize(tuple((
174 tag("<"),
175 take_quoted_or_until(|c| c == b'>'),
176 opt(tag(">")),
178 ))),
179 |_i| {
180 #[cfg(feature = "tracing-unsupported")]
181 warn!(input = %bytes_to_trace_string(_i),
182 "unsupported msg-id in msg-list");
183 None
184 },
185 ),
186 map(cfws, |_| None),
188 map(take_quoted_encoded_or_until1(|c| c == b'<'), |_i| {
190 #[cfg(feature = "tracing-recover")]
191 warn!(input = %bytes_to_trace_string(_i),
192 "non-compliant text between msg-ids");
193 None
194 }),
195 )))(input)?;
196
197 Ok((input, tokens.into_iter().flatten().collect()))
198}
199
200#[instrument_input("tracing")]
209fn id_left(input: &[u8]) -> IResult<&[u8], LocalPart<'_>> {
210 local_part(input)
211}
212
213#[instrument_input("tracing")]
222fn id_right(input: &[u8]) -> IResult<&[u8], Domain<'_>> {
223 domain(input)
224}
225
226#[allow(dead_code)]
227#[instrument_input("tracing")]
228fn no_fold_literal(input: &[u8]) -> IResult<&[u8], Dtext<'_>> {
229 delimited(tag("["), dtext, tag("]"))(input)
230}
231
232#[cfg(test)]
233mod tests {
234 use super::*;
235 use crate::imf::mailbox::{Domain, LocalPart, LocalPartToken};
236 use crate::print::tests::print_to_vec;
237 use crate::text::misc_token::Word;
238 use crate::text::quoted::QuotedString;
239 use crate::text::words::Atom;
240
241 fn assert_msg_list_reprinted(txt: &[u8], printed: &[u8]) {
242 let (rest, parsed) = nullable_msg_list(txt).unwrap();
243 assert_eq!(rest, b"");
244 let reprinted = print_to_vec(parsed);
245 assert_eq!(
246 String::from_utf8_lossy(&reprinted),
247 String::from_utf8_lossy(printed)
248 );
249 }
250
251 #[test]
252 fn test_msg_id() {
253 assert_eq!(
254 msg_id(b"<5678.21-Nov-1997@example.com>"),
255 Ok((
256 &b""[..],
257 MessageID::ObsLeftRight {
258 left: LocalPart(vec![
259 LocalPartToken::Word(Word::Atom(Atom("5678".into()))),
260 LocalPartToken::Dot,
261 LocalPartToken::Word(Word::Atom(Atom("21-Nov-1997".into()))),
262 ]),
263 right: Domain::Atoms(vec![Atom("example".into()), Atom("com".into()),]),
264 }
265 )),
266 );
267 }
268
269 #[test]
270 fn test_obsolete_msg_id() {
271 assert_eq!(
272 msg_id(b" < foo . bar@univ-valenciennes .fr >"),
273 Ok((
274 &b""[..],
275 MessageID::ObsLeftRight {
276 left: LocalPart(vec![
277 LocalPartToken::Word(Word::Atom(Atom("foo".into()))),
278 LocalPartToken::Dot,
279 LocalPartToken::Word(Word::Atom(Atom("bar".into()))),
280 ]),
281 right: Domain::Atoms(
282 vec![Atom("univ-valenciennes".into()), Atom("fr".into()),]
283 ),
284 }
285 )),
286 );
287
288 assert_eq!(
289 msg_id(b"<\"24806 Tue Sep 19 11:05:34 1995\"@bnr.ca>"),
290 Ok((
291 &b""[..],
292 MessageID::ObsLeftRight {
293 left: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
294 vec![
295 "24806".into(),
296 " ".into(),
297 "Tue".into(),
298 " ".into(),
299 "Sep".into(),
300 " ".into(),
301 "19".into(),
302 " ".into(),
303 "11:05:34".into(),
304 " ".into(),
305 "1995".into(),
306 ]
307 )))]),
308 right: Domain::Atoms(vec![Atom("bnr".into()), Atom("ca".into()),]),
309 }
310 )),
311 );
312 }
313
314 #[test]
315 fn test_noncompliant_msg_id() {
316 assert_eq!(
317 msg_id(b" <523C50DA-160C-4550-A44E-7E192513CF91> "),
318 Ok((
319 &b""[..],
320 MessageID::Invalid("523C50DA-160C-4550-A44E-7E192513CF91".into())
321 ))
322 );
323
324 assert_eq!(
325 msg_id(b" foo "),
326 Ok((&b""[..], MessageID::Invalid("foo".into())))
327 );
328
329 assert_eq!(
330 msg_id(b"text/plain.RKLqBQUAAZl1yPGCYOHKDjrj_nwwBg.1758617731@alan.eu"),
331 Ok((
332 &b""[..],
333 MessageID::ObsLeftRight {
334 left: LocalPart(vec![
335 LocalPartToken::Word(Word::Atom(Atom("text/plain".into()))),
336 LocalPartToken::Dot,
337 LocalPartToken::Word(Word::Atom(Atom(
338 "RKLqBQUAAZl1yPGCYOHKDjrj_nwwBg".into()
339 ))),
340 LocalPartToken::Dot,
341 LocalPartToken::Word(Word::Atom(Atom("1758617731".into()))),
342 ]),
343 right: Domain::Atoms(vec![Atom("alan".into()), Atom("eu".into()),]),
344 },
345 ))
346 );
347
348 assert_eq!(
349 msg_id(b" <aAdGYiJBX0VZF2TI@millmess@rouba.net> "),
350 Ok((
351 &b""[..],
352 MessageID::Invalid("aAdGYiJBX0VZF2TI@millmess@rouba.net".into())
353 ))
354 );
355
356 assert_eq!(
357 msg_id(b"<md5:xqmIG/sV8WoSG9UzafBCGw==>"),
358 Ok((
359 &b""[..],
360 MessageID::Invalid("md5:xqmIG/sV8WoSG9UzafBCGw==".into())
361 ))
362 );
363 }
364
365 #[test]
366 fn test_comma_separated_msg_list() {
367 assert_eq!(
369 nullable_msg_list(b" <8d9bb189354d4804bcc2fd1d1a5398b5@cnrs.fr>,<ef8fac8b36834864bae895571064565c@cnrs.fr>"),
370 Ok((
371 &b""[..],
372 vec![
373 MessageID::ObsLeftRight {
374 left: LocalPart(vec![
375 LocalPartToken::Word(Word::Atom(Atom("8d9bb189354d4804bcc2fd1d1a5398b5".into()))),
376 ]),
377 right: Domain::Atoms(vec![
378 Atom("cnrs".into()),
379 Atom("fr".into()),
380 ]),
381 },
382 MessageID::ObsLeftRight {
383 left: LocalPart(vec![
384 LocalPartToken::Word(Word::Atom(Atom("ef8fac8b36834864bae895571064565c".into()))),
385 ]),
386 right: Domain::Atoms(vec![
387 Atom("cnrs".into()),
388 Atom("fr".into()),
389 ]),
390 },
391 ]
392 ))
393 );
394 }
395
396 #[test]
397 fn test_msg_list_weird() {
398 assert_msg_list_reprinted(
399 b"<3AB624F9.5B6C6680@example.com>; from foo@example.com on Mon, Mar 19, 2001 at 04:25:45PM +0100",
400 b"<3AB624F9.5B6C6680@example.com>"
401 );
402
403 assert_msg_list_reprinted(
404 b"<3AB624F9.5B6C6680@example.com> from \"Foo bar\" on Mon, Mar 19, 2001 at 04:25:45 AM",
405 b"<3AB624F9.5B6C6680@example.com>",
406 );
407 }
408
409 #[test]
410 fn test_msg_list_recover() {
411 assert_msg_list_reprinted(
415 b"<abc@def>,<foo\n\tbar@outlook.com>,<baz@outlook.com>",
416 b"<abc@def> <baz@outlook.com>",
417 );
418
419 assert_msg_list_reprinted(b"<abc@def>,<foo\n\tbar@outlook.com ", b"<abc@def>");
422
423 assert_msg_list_reprinted(
424 b"<abc@def>,random\"garbage=?utf-8?q?aabb?= <uuu@jjj>",
425 b"<abc@def> <uuu@jjj>",
426 );
427
428 assert_msg_list_reprinted(b"<abc@def>,<randomgarbage\">\" <uuu@jjj>", b"<abc@def>");
429 }
430}