1#[cfg(feature = "arbitrary")]
2use arbitrary::Arbitrary;
3use bounded_static::ToStatic;
4
5use base64::{engine::general_purpose, Engine as _};
6use nom::{
7 branch::alt,
8 bytes::complete::{tag, take, take_while, take_while1},
9 character::complete::one_of,
10 character::is_alphanumeric,
11 combinator::{all_consuming, map, map_parser, opt, recognize},
12 multi::{many0, many1, separated_list1},
13 sequence::{delimited, preceded, terminated, tuple},
14 IResult,
15};
16use std::borrow::Cow;
17use std::fmt;
18
19use crate::i18n::ContainsUtf8;
20use crate::print::{print_seq, Formatter, Print, ToStringFromPrint};
21use crate::text::ascii;
22use crate::text::charset::EmailCharset;
23use crate::text::utf8::take_utf8_while1;
24use crate::text::whitespace::{self, cfws, fws};
25use crate::text::words;
26#[cfg(feature = "arbitrary")]
27use crate::{
28 arbitrary_utils::{arbitrary_vec_nonempty, arbitrary_vec_where},
29 fuzz_eq::FuzzEq,
30};
31
32#[derive(Clone, Copy)]
37pub enum Context {
38 Phrase,
39 Comment,
40 Unstructured,
41}
42
43pub fn encoded_word(ctx: Context) -> impl FnMut(&[u8]) -> IResult<&[u8], EncodedWord<'_>> {
44 move |input| delimited(opt(cfws), encoded_word_plain(ctx), opt(cfws))(input)
45}
46
47pub fn encoded_word_plain(ctx: Context) -> impl FnMut(&[u8]) -> IResult<&[u8], EncodedWord<'_>> {
50 move |input| map(separated_list1(fws, encoded_word_token(ctx)), EncodedWord)(input)
51}
52
53pub fn encoded_word_token(
54 ctx: Context,
55) -> impl FnMut(&[u8]) -> IResult<&[u8], EncodedWordToken<'_>> {
56 move |input| {
57 map_parser(
61 encoded_word_token_atom(ctx),
63 all_consuming(alt((encoded_word_token_quoted, encoded_word_token_base64))),
65 )(input)
66 }
67}
68
69fn encoded_word_token_atom(ctx: Context) -> impl FnMut(&[u8]) -> IResult<&[u8], &[u8]> {
70 move |input| {
71 match ctx {
78 Context::Phrase => recognize(take_utf8_while1(words::is_atext))(input),
80 Context::Comment => recognize(take_utf8_while1(whitespace::is_ctext))(input),
82 Context::Unstructured => recognize(take_utf8_while1(words::is_vchar))(input),
84 }
85 }
86}
87
88pub fn encoded_word_token_quoted(input: &[u8]) -> IResult<&[u8], EncodedWordToken<'_>> {
89 let (rest, (_, charset, _, _, _, txt, _)) = tuple((
90 tag("=?"),
91 words::mime_atom_plain,
92 tag("?"),
93 one_of("Qq"),
94 tag("?"),
95 ptext,
96 tag("?="),
97 ))(input)?;
98
99 let parsed = EncodedWordToken::Quoted(QuotedWord {
100 enc: charset.0.into(),
101 chunks: txt,
102 });
103 Ok((rest, parsed))
104}
105
106pub fn encoded_word_token_base64(input: &[u8]) -> IResult<&[u8], EncodedWordToken<'_>> {
107 let (rest, (_, charset, _, _, _, txt, _)) = tuple((
108 tag("=?"),
109 words::mime_atom_plain,
110 tag("?"),
111 one_of("Bb"),
112 tag("?"),
113 btext,
114 tag("?="),
115 ))(input)?;
116
117 let parsed = EncodedWordToken::Base64(Base64Word {
118 enc: charset.0.into(),
119 content: Cow::Borrowed(txt),
120 });
121 Ok((rest, parsed))
122}
123
124#[derive(Clone, ContainsUtf8, Debug, PartialEq, ToStatic, ToStringFromPrint)]
126#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
127#[contains_utf8(false)]
128pub struct EncodedWord<'a>(pub Vec<EncodedWordToken<'a>>); impl<'a> EncodedWord<'a> {
131 pub fn data(&self) -> String {
133 self.0
134 .iter()
135 .map(|tok| tok.data())
136 .collect::<Vec<_>>()
137 .join("")
138 }
139
140 pub fn from_chars<I>(chars: I) -> Self
143 where
144 I: IntoIterator<Item = char>,
145 {
146 const HEADER: &[u8] = b"=?UTF-8?Q?";
147 const FOOTER: &[u8] = b"?=";
148 const MAX_LEN: usize = 75;
150
151 let mut tokens: Vec<EncodedWordToken> = vec![];
152 let mut cur_chunks: Vec<QuotedChunk> = vec![];
153 let mut cur_word_len = 0;
154 let mut char_bytes: [u8; 4] = [0; 4];
155
156 for c in chars {
157 if HEADER.len() + cur_word_len + FOOTER.len() > MAX_LEN - 3
158 {
160 let mut w = QuotedWord {
161 enc: EmailCharset::utf8(),
162 chunks: vec![],
163 };
164 std::mem::swap(&mut w.chunks, &mut cur_chunks);
165 tokens.push(EncodedWordToken::Quoted(w));
166 cur_word_len = 0;
167 }
168
169 if c.is_ascii() && is_qchar_safe_strict(c as u8) {
170 if let Some(QuotedChunk::Safe(s)) = cur_chunks.last_mut() {
171 let s = s.to_mut();
172 s.push(c as u8)
173 } else {
174 cur_chunks.push(QuotedChunk::Safe(vec![c as u8].into()));
175 }
176 cur_word_len += 1;
177 } else if c == char::from(ascii::SP) {
178 cur_chunks.push(QuotedChunk::Space);
180 cur_word_len += 1;
181 } else {
182 c.encode_utf8(&mut char_bytes);
183 let c_bytes = &char_bytes[0..c.len_utf8()];
184 if let Some(QuotedChunk::Encoded(e)) = cur_chunks.last_mut() {
185 e.extend_from_slice(c_bytes)
186 } else {
187 cur_chunks.push(QuotedChunk::Encoded(c_bytes.to_vec()))
188 }
189 cur_word_len += 3 * c.len_utf8();
191 }
192 }
193
194 tokens.push(EncodedWordToken::Quoted(QuotedWord {
195 enc: EmailCharset::utf8(),
196 chunks: cur_chunks,
197 }));
198
199 EncodedWord(tokens)
200 }
201}
202impl<'a> Print for EncodedWord<'a> {
203 fn print(&self, fmt: &mut impl Formatter) {
204 print_seq(fmt, &self.0, Formatter::write_fws)
205 }
206}
207
208#[cfg(feature = "arbitrary")]
209impl<'a> Arbitrary<'a> for EncodedWord<'a> {
210 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
211 Ok(EncodedWord(arbitrary_vec_nonempty(u)?))
212 }
213}
214
215#[derive(PartialEq, Debug, Clone, ToStatic)]
216#[cfg_attr(feature = "arbitrary", derive(FuzzEq, Arbitrary))]
217pub enum EncodedWordToken<'a> {
218 Quoted(QuotedWord<'a>),
219 Base64(Base64Word<'a>),
220}
221impl<'a> EncodedWordToken<'a> {
222 pub fn data(&self) -> String {
223 match self {
224 EncodedWordToken::Quoted(v) => v.data(),
225 EncodedWordToken::Base64(v) => v.data(),
226 }
227 }
228}
229impl<'a> Print for EncodedWordToken<'a> {
230 fn print(&self, fmt: &mut impl Formatter) {
231 match self {
232 EncodedWordToken::Quoted(q) => q.print(fmt),
233 EncodedWordToken::Base64(b) => b.print(fmt),
234 }
235 }
236}
237
238#[derive(PartialEq, Clone, ToStatic)]
239#[cfg_attr(feature = "arbitrary", derive(FuzzEq))]
240pub struct Base64Word<'a> {
241 pub enc: EmailCharset,
242 #[cfg_attr(feature = "arbitrary", fuzz_eq(use_eq))]
245 pub content: Cow<'a, [u8]>,
246}
247impl<'a> fmt::Debug for Base64Word<'a> {
248 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
249 fmt.debug_struct("Base64Word")
250 .field("enc", &self.enc)
251 .field("content", &String::from_utf8_lossy(&self.content))
252 .finish()
253 }
254}
255
256impl<'a> Base64Word<'a> {
257 pub fn data(&self) -> String {
258 general_purpose::STANDARD_NO_PAD
259 .decode(&self.content)
260 .map(|d| self.enc.decode(d.as_slice()).to_string())
261 .unwrap_or("".into())
262 }
263}
264
265impl<'a> Print for Base64Word<'a> {
266 fn print(&self, fmt: &mut impl Formatter) {
267 fmt.write_bytes(b"=?");
268 fmt.write_bytes(self.enc.as_bytes());
269 fmt.write_bytes(b"?B?");
270 fmt.write_bytes(&self.content);
271 fmt.write_bytes(b"?=");
272 }
273}
274
275#[cfg(feature = "arbitrary")]
276impl<'a> Arbitrary<'a> for Base64Word<'a> {
277 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
278 let enc: EmailCharset = u.arbitrary()?;
279 let content = arbitrary_vec_where(u, |c| is_bchar(*c))?;
280 Ok(Base64Word {
281 enc,
282 content: Cow::Owned(content),
283 })
284 }
285}
286
287#[derive(PartialEq, Debug, Clone, ToStatic)]
288#[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
289pub struct QuotedWord<'a> {
290 pub enc: EmailCharset,
291 pub chunks: Vec<QuotedChunk<'a>>,
292}
293
294impl<'a> QuotedWord<'a> {
295 pub fn data(&self) -> String {
296 self.chunks.iter().fold(String::new(), |mut acc, c| {
297 match c {
298 QuotedChunk::Safe(v) => {
299 let (content, _) = encoding_rs::UTF_8.decode_without_bom_handling(v);
300 acc.push_str(content.as_ref());
301 }
302 QuotedChunk::Space => acc.push(' '),
303 QuotedChunk::Encoded(v) => {
304 let d = self.enc.decode(v.as_slice());
305 acc.push_str(d.as_ref());
306 }
307 };
308 acc
309 })
310 }
311}
312
313impl<'a> Print for QuotedWord<'a> {
314 fn print(&self, fmt: &mut impl Formatter) {
315 fmt.write_bytes(b"=?");
316 fmt.write_bytes(self.enc.as_bytes());
317 fmt.write_bytes(b"?Q?");
318 print_seq(fmt, &self.chunks, |_| ());
319 fmt.write_bytes(b"?=");
320 }
321}
322
323#[cfg(feature = "arbitrary")]
324impl<'a> FuzzEq for QuotedWord<'a> {
325 fn fuzz_eq(&self, other: &Self) -> bool {
326 self.enc.fuzz_eq(&other.enc)
327 && normalize_quoted_chunks(&self.chunks) == normalize_quoted_chunks(&other.chunks)
328 }
329}
330
331#[derive(PartialEq, Clone, ToStatic)]
332pub enum QuotedChunk<'a> {
333 Safe(Cow<'a, [u8]>), Encoded(Vec<u8>),
335 Space,
336}
337impl<'a> fmt::Debug for QuotedChunk<'a> {
338 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
339 match self {
340 QuotedChunk::Safe(b) => fmt
341 .debug_tuple("QuotedChunk::Safe")
342 .field(&String::from_utf8_lossy(b))
343 .finish(),
344 QuotedChunk::Encoded(e) => fmt.debug_tuple("QuotedChunk::Encoded").field(e).finish(),
345 QuotedChunk::Space => fmt.debug_tuple("QuotedChunk::Space").finish(),
346 }
347 }
348}
349
350impl<'a> Print for QuotedChunk<'a> {
351 fn print(&self, fmt: &mut impl Formatter) {
352 match self {
353 QuotedChunk::Safe(b) => fmt.write_bytes(b),
354 QuotedChunk::Encoded(e) => {
355 for c in e {
356 fmt.write_bytes(format!("={:02X}", c).as_bytes());
357 }
358 }
359 QuotedChunk::Space => fmt.write_bytes(b"_"),
360 }
361 }
362}
363
364#[cfg(feature = "arbitrary")]
365impl<'a> Arbitrary<'a> for QuotedChunk<'a> {
366 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
367 match u.int_in_range(0..=2)? {
368 0 => {
369 let v = arbitrary_vec_where(u, |c| is_safe_char2(*c))?;
370 Ok(QuotedChunk::Safe(Cow::Owned(v)))
371 }
372 1 => {
373 let v: Vec<u8> = u.arbitrary()?;
374 Ok(QuotedChunk::Encoded(v))
375 }
376 2 => Ok(QuotedChunk::Space),
377 _ => unreachable!(),
378 }
379 }
380}
381
382pub fn ptext(input: &[u8]) -> IResult<&[u8], Vec<QuotedChunk<'_>>> {
384 many0(alt((safe_char2, encoded_space, many_hex_octet)))(input)
385}
386
387fn safe_char2(input: &[u8]) -> IResult<&[u8], QuotedChunk<'_>> {
388 map(take_while1(is_safe_char2), |b| {
389 QuotedChunk::Safe(Cow::Borrowed(b))
390 })(input)
391}
392
393fn is_safe_char2(c: u8) -> bool {
398 words::is_vchar(c.into()) && c != ascii::UNDERSCORE && c != ascii::QUESTION && c != ascii::EQ
399}
400
401fn encoded_space(input: &[u8]) -> IResult<&[u8], QuotedChunk<'_>> {
402 map(tag("_"), |_| QuotedChunk::Space)(input)
403}
404
405fn hex_octet(input: &[u8]) -> IResult<&[u8], u8> {
406 let (rest, hbytes) = preceded(tag("="), take(2usize))(input)?;
407
408 let hstr = String::from_utf8_lossy(hbytes);
409 let parsed = u8::from_str_radix(hstr.as_ref(), 16).map_err(|_| {
410 nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Verify))
411 })?;
412
413 Ok((rest, parsed))
414}
415
416fn many_hex_octet(input: &[u8]) -> IResult<&[u8], QuotedChunk<'_>> {
417 map(many1(hex_octet), QuotedChunk::Encoded)(input)
418}
419
420pub fn btext(input: &[u8]) -> IResult<&[u8], &[u8]> {
423 terminated(take_while(is_bchar), many0(tag("=")))(input)
424}
425
426fn is_bchar(c: u8) -> bool {
427 is_alphanumeric(c) || c == ascii::PLUS || c == ascii::SLASH
428}
429
430fn is_qchar_safe_strict(b: u8) -> bool {
437 is_alphanumeric(b)
441 || b == ascii::EXCLAMATION
442 || b == ascii::ASTERISK
443 || b == ascii::PLUS
444 || b == ascii::MINUS
445 || b == ascii::SLASH
446}
447
448#[cfg(feature = "arbitrary")]
449fn normalize_quoted_chunks<'a>(chunks: &Vec<QuotedChunk<'a>>) -> Vec<QuotedChunk<'static>> {
450 use bounded_static::ToBoundedStatic;
451 let mut new_chunks: Vec<QuotedChunk<'static>> = vec![];
452 for chunk in chunks {
453 match (new_chunks.last_mut(), chunk) {
454 (Some(QuotedChunk::Safe(b1)), QuotedChunk::Safe(b2)) => b1.to_mut().extend(&**b2),
455 (Some(QuotedChunk::Encoded(v1)), QuotedChunk::Encoded(v2)) => v1.extend(v2),
456 (_, _) => new_chunks.push(chunk.to_static()),
457 }
458 }
459 new_chunks
460}
461
462#[cfg(test)]
463mod tests {
464 use super::*;
465 use crate::print::tests::print_to_vec_with;
466
467 #[test]
469 fn test_ptext() {
470 assert_eq!(
471 ptext(b"Accus=E9_de_r=E9ception_(affich=E9)"),
472 Ok((
473 &b""[..],
474 vec![
475 QuotedChunk::Safe(b"Accus"[..].into()),
476 QuotedChunk::Encoded(vec![0xe9]),
477 QuotedChunk::Space,
478 QuotedChunk::Safe(b"de"[..].into()),
479 QuotedChunk::Space,
480 QuotedChunk::Safe(b"r"[..].into()),
481 QuotedChunk::Encoded(vec![0xe9]),
482 QuotedChunk::Safe(b"ception"[..].into()),
483 QuotedChunk::Space,
484 QuotedChunk::Safe(b"(affich"[..].into()),
485 QuotedChunk::Encoded(vec![0xe9]),
486 QuotedChunk::Safe(b")"[..].into()),
487 ]
488 ))
489 );
490 }
491
492 #[test]
493 fn test_invalid_space() {
494 assert!(
496 encoded_word(Context::Unstructured)(b"=?iso8859-1?Q?Accus=E9 de r=E9ception?=")
497 .is_err()
498 );
499 }
500
501 #[test]
502 fn test_decode_word() {
503 assert_eq!(
505 encoded_word(Context::Unstructured)(
506 b"=?iso8859-1?Q?Accus=E9_de_r=E9ception_(affich=E9)?="
507 )
508 .unwrap()
509 .1
510 .data(),
511 "Accusé de réception (affiché)".to_string(),
512 );
513
514 assert_eq!(
515 encoded_word(Context::Unstructured)(b"=?iso-8859-1?Q?=805.4bn?=")
516 .unwrap()
517 .1
518 .data(),
519 "€5.4bn".to_string(),
520 );
521
522 assert!(encoded_word(Context::Phrase)(
523 b"=?iso8859-1?Q?Accus=E9_de_r=E9ception_(affich=E9)?="
524 )
525 .is_err());
526 }
527
528 #[test]
529 fn test_decode_word_ast() {
530 assert_eq!(
531 encoded_word(Context::Phrase)(b"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=")
532 .unwrap()
533 .1,
534 EncodedWord(vec![EncodedWordToken::Base64(Base64Word {
535 enc: EmailCharset::from(b"iso-8859-1"),
536 content: b"SWYgeW91IGNhbiByZWFkIHRoaXMgeW8"[..].into(),
537 })])
538 );
539 }
540
541 #[test]
543 fn test_decode_word_b64() {
544 assert_eq!(
545 encoded_word(Context::Phrase)(b"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=")
546 .unwrap()
547 .1
548 .data(),
549 "If you can read this yo".to_string(),
550 );
551 }
552
553 #[test]
554 fn test_strange_quoted() {
555 assert_eq!(
556 encoded_word(Context::Phrase)(b"=?UTF-8?Q?John_Sm=C3=AEth?=")
557 .unwrap()
558 .1
559 .data(),
560 "John Smîth".to_string(),
561 );
562 }
563
564 #[test]
565 fn test_multiple() {
566 assert_eq!(
568 encoded_word(Context::Phrase)(b"=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=")
569 .unwrap()
570 .1
571 .data(),
572 "ab".to_string(),
573 );
574
575 assert_eq!(
576 encoded_word(Context::Phrase)(b"=?ISO-8859-1?Q?a?= \r\n =?ISO-8859-1?Q?b?=")
577 .unwrap()
578 .1
579 .data(),
580 "ab".to_string(),
581 );
582 }
583
584 #[test]
585 fn test_encode() {
586 let out = print_to_vec_with(|f| {
587 EncodedWord::from_chars("Accusé de réception (affiché)".chars()).print(f);
588 });
589 assert_eq!(
590 String::from_utf8_lossy(&out),
591 "=?UTF-8?Q?Accus=C3=A9_de_r=C3=A9ception_=28affich=C3=A9=29?="
592 );
593
594 let out = print_to_vec_with(|f| {
595 EncodedWord::from_chars("John Smîth".chars()).print(f);
596 });
597 assert_eq!(out, b"=?UTF-8?Q?John_Sm=C3=AEth?=");
598 }
599
600 #[test]
601 fn test_encode_folding() {
602 let out = print_to_vec_with(|f| {
603 f.begin_line_folding();
604 EncodedWord::from_chars(
605 "Accusé de réception (affiché) Accusé de réception (affiché)".chars(),
606 )
607 .print(f);
608 });
609 assert_eq!(
610 String::from_utf8_lossy(&out),
611 "=?UTF-8?Q?Accus=C3=A9_de_r=C3=A9ception_=28affich=C3=A9=29_Accus=C3=A9_?=\r\n =?UTF-8?Q?de_r=C3=A9ception_=28affich=C3=A9=29?="
612 );
613 }
614
615 #[test]
616 fn test_encode_empty() {
617 let out = print_to_vec_with(|f| {
618 EncodedWord::from_chars("".chars()).print(f);
619 });
620 assert_eq!(out, b"=?UTF-8?Q??=");
621 }
622}