1use base64::Engine as _;
17
18use crate::error::Error;
19use crate::types::{Address, DateTime, ParsedAttachment, ParsedEmail};
20
21const MAX_MIME_DEPTH: u32 = 64;
24
25const LENIENT_BASE64: base64::engine::GeneralPurpose = base64::engine::GeneralPurpose::new(
28 &base64::alphabet::STANDARD,
29 base64::engine::GeneralPurposeConfig::new()
30 .with_decode_padding_mode(base64::engine::DecodePaddingMode::Indifferent),
31);
32
33struct HeaderFields {
38 message_id: Option<String>,
39 in_reply_to: Option<String>,
40 references: Option<String>,
41 subject: Option<String>,
42 from: Address,
43 to: Vec<Address>,
44 cc: Vec<Address>,
45 bcc: Vec<Address>,
46 reply_to: Vec<Address>,
47 date: Option<DateTime>,
48}
49
50fn extract_header_fields(headers: &[(String, String)]) -> Result<HeaderFields, Error> {
56 Ok(HeaderFields {
57 message_id: extract_message_id(headers),
58 in_reply_to: extract_in_reply_to(headers),
59 references: extract_references(headers),
60 subject: get_header_value(headers, "subject").map(|v| decode_encoded_words(&v)),
61 from: extract_from(headers)?,
62 to: extract_address_list(headers, "to"),
63 cc: extract_address_list(headers, "cc"),
64 bcc: extract_address_list(headers, "bcc"),
65 reply_to: extract_address_list(headers, "reply-to"),
66 date: extract_date(headers),
67 })
68}
69
70pub fn parse_email(raw: &[u8]) -> Result<ParsedEmail, Error> {
82 if raw.is_empty() {
83 return Err(Error::EmptyInput);
84 }
85
86 let size = raw.len() as u64;
87
88 let (header_bytes, body_bytes) = split_header_body(raw);
90 let raw_headers = String::from_utf8_lossy(header_bytes).into_owned();
91
92 let headers = parse_headers(header_bytes);
95 let hf = extract_header_fields(&headers)?;
96
97 let content_type = get_header_value(&headers, "content-type")
99 .unwrap_or_else(|| "text/plain; charset=us-ascii".to_string());
100 let transfer_encoding =
101 get_header_value(&headers, "content-transfer-encoding").unwrap_or_default();
102 let content_disposition = get_header_value(&headers, "content-disposition").unwrap_or_default();
103 let content_id = get_header_value(&headers, "content-id");
104
105 let (body_text, body_html, attachments) = if is_multipart(&content_type) {
106 match extract_boundary(&content_type) {
107 Some(boundary) => {
108 let is_digest = extract_mime_type(&content_type) == "multipart/digest";
109 walk_mime_tree(body_bytes, &boundary, "", 0, is_digest)
110 }
111 None => extract_simple_body(
115 body_bytes,
116 "text/plain; charset=us-ascii",
117 &transfer_encoding,
118 &content_disposition,
119 content_id.as_deref(),
120 ),
121 }
122 } else {
123 extract_simple_body(
124 body_bytes,
125 &content_type,
126 &transfer_encoding,
127 &content_disposition,
128 content_id.as_deref(),
129 )
130 };
131
132 Ok(ParsedEmail {
133 message_id: hf.message_id,
134 in_reply_to: hf.in_reply_to,
135 references: hf.references,
136 subject: hf.subject,
137 from: hf.from,
138 to: hf.to,
139 cc: hf.cc,
140 bcc: hf.bcc,
141 reply_to: hf.reply_to,
142 date: hf.date,
143 body_text,
144 body_html,
145 attachments,
146 raw_headers,
147 size,
148 })
149}
150
151pub fn parse_headers_only(raw: &[u8]) -> Result<ParsedEmail, Error> {
161 if raw.is_empty() {
162 return Err(Error::EmptyInput);
163 }
164
165 let size = raw.len() as u64;
166
167 let (header_bytes, _) = split_header_body(raw);
168 let raw_headers = String::from_utf8_lossy(header_bytes).into_owned();
169 let headers = parse_headers(header_bytes);
170 let hf = extract_header_fields(&headers)?;
171
172 Ok(ParsedEmail {
173 message_id: hf.message_id,
174 in_reply_to: hf.in_reply_to,
175 references: hf.references,
176 subject: hf.subject,
177 from: hf.from,
178 to: hf.to,
179 cc: hf.cc,
180 bcc: hf.bcc,
181 reply_to: hf.reply_to,
182 date: hf.date,
183 body_text: None,
184 body_html: None,
185 attachments: Vec::new(),
186 raw_headers,
187 size,
188 })
189}
190
191fn split_header_body(raw: &[u8]) -> (&[u8], &[u8]) {
202 if raw.starts_with(b"\r\n") {
205 return (&[], &raw[2..]);
206 }
207 if raw.starts_with(b"\n") {
208 return (&[], &raw[1..]);
209 }
210 if let Some(pos) = find_subsequence(raw, b"\r\n\r\n") {
211 return (&raw[..pos], &raw[pos + 4..]);
212 }
213 if let Some(pos) = find_subsequence(raw, b"\n\n") {
214 return (&raw[..pos], &raw[pos + 2..]);
215 }
216 (raw, &[])
218}
219
220fn parse_headers(raw: &[u8]) -> Vec<(String, String)> {
225 let text = String::from_utf8_lossy(raw);
226 let mut headers: Vec<(String, String)> = Vec::new();
227 let mut current_name = String::new();
228 let mut current_value = String::new();
229
230 for line in text.split('\n') {
231 let line = line.strip_suffix('\r').unwrap_or(line);
232 if line.is_empty() {
233 break;
234 }
235 if line.starts_with(' ') || line.starts_with('\t') {
236 if !current_name.is_empty() {
241 current_value.push_str(line);
242 }
243 } else if let Some(colon_pos) = line.find(':') {
244 if !current_name.is_empty() {
246 headers.push((current_name.to_lowercase(), current_value));
247 }
248 current_name = line[..colon_pos].trim().to_string();
249 current_value = line[colon_pos + 1..].trim_start().to_string();
254 }
255 }
257 if !current_name.is_empty() {
259 headers.push((current_name.to_lowercase(), current_value));
260 }
261
262 headers
263}
264
265fn get_header_value(headers: &[(String, String)], name: &str) -> Option<String> {
267 headers
268 .iter()
269 .find(|(k, _)| k == name)
270 .map(|(_, v)| v.clone())
271}
272
273pub(crate) fn decode_encoded_words(input: &str) -> String {
282 let mut result = String::new();
283 let mut remaining = input;
284
285 while !remaining.is_empty() {
286 if let Some(start) = remaining.find("=?") {
287 result.push_str(&remaining[..start]);
289 remaining = &remaining[start..];
290
291 if let Some((decoded, consumed)) = try_decode_encoded_word(remaining) {
292 result.push_str(&decoded);
293 remaining = &remaining[consumed..];
294
295 let trimmed = remaining.trim_start_matches([' ', '\t']);
297 if trimmed.starts_with("=?") {
298 remaining = trimmed;
299 }
300 } else {
301 result.push_str("=?");
303 remaining = &remaining[2..];
304 }
305 } else {
306 result.push_str(remaining);
307 break;
308 }
309 }
310
311 result
312}
313
314fn try_decode_encoded_word(input: &str) -> Option<(String, usize)> {
318 let rest = input.strip_prefix("=?")?;
320 let q1 = rest.find('?')?;
321 let charset = &rest[..q1];
322 let rest2 = &rest[q1 + 1..];
323 let q2 = rest2.find('?')?;
324 let encoding = &rest2[..q2];
325 let rest3 = &rest2[q2 + 1..];
326 let q3 = rest3.find("?=")?;
327 let encoded_text = &rest3[..q3];
328 let consumed = 2 + q1 + 1 + q2 + 1 + q3 + 2;
329
330 let bytes = match encoding.to_ascii_uppercase().as_str() {
331 "B" => LENIENT_BASE64.decode(encoded_text.as_bytes()).ok()?,
332 "Q" => decode_q_encoding(encoded_text),
333 _ => return None,
334 };
335
336 Some((decode_charset(charset, &bytes), consumed))
337}
338
339fn decode_q_encoding(input: &str) -> Vec<u8> {
343 let bytes = input.as_bytes();
344 let mut result = Vec::with_capacity(bytes.len());
345 let mut i = 0;
346 while i < bytes.len() {
347 if bytes[i] == b'=' && i + 2 < bytes.len() {
348 if let Some(val) = decode_hex_pair(bytes[i + 1], bytes[i + 2]) {
349 result.push(val);
350 i += 3;
351 continue;
352 }
353 }
354 if bytes[i] == b'_' {
355 result.push(b' ');
357 } else {
358 result.push(bytes[i]);
359 }
360 i += 1;
361 }
362 result
363}
364
365fn extract_from(headers: &[(String, String)]) -> Result<Address, Error> {
380 let value = get_header_value(headers, "from").ok_or(Error::MissingFrom)?;
381 let addrs = decode_address_names(parse_address_list(&value));
386 addrs.into_iter().next().ok_or(Error::MissingFrom)
387}
388
389fn extract_address_list(headers: &[(String, String)], name: &str) -> Vec<Address> {
394 get_header_value(headers, name)
395 .map(|v| decode_address_names(parse_address_list(&v)))
396 .unwrap_or_default()
397}
398
399fn decode_address_names(addrs: Vec<Address>) -> Vec<Address> {
407 addrs
408 .into_iter()
409 .map(|mut addr| {
410 if let Some(ref name) = addr.name {
411 addr.name = Some(decode_encoded_words(name));
412 }
413 addr
414 })
415 .collect()
416}
417
418fn parse_address_list(input: &str) -> Vec<Address> {
432 let mut addresses = Vec::new();
433 let mut current = String::new();
434 let mut in_quotes = false;
435 let mut escaped = false;
436 let mut angle_depth: i32 = 0;
437 let mut paren_depth: i32 = 0;
441 let mut in_group = false;
444
445 for ch in input.chars() {
446 if escaped {
449 current.push(ch);
450 escaped = false;
451 continue;
452 }
453 match ch {
454 '\\' if in_quotes || paren_depth > 0 => {
455 escaped = true;
459 current.push(ch);
460 }
461 '"' if paren_depth == 0 => {
462 in_quotes = !in_quotes;
463 current.push(ch);
464 }
465 '(' if !in_quotes => {
469 paren_depth += 1;
470 current.push(ch);
471 }
472 ')' if !in_quotes && paren_depth > 0 => {
473 paren_depth -= 1;
474 current.push(ch);
475 }
476 '<' if !in_quotes && paren_depth == 0 => {
477 angle_depth += 1;
478 current.push(ch);
479 }
480 '>' if !in_quotes && paren_depth == 0 => {
481 angle_depth -= 1;
482 current.push(ch);
483 }
484 ':' if !in_quotes && angle_depth == 0 && paren_depth == 0 && !in_group => {
490 if current.trim().contains('@') {
491 current.push(ch);
492 } else {
493 in_group = true;
495 current.clear();
496 }
497 }
498 ';' if !in_quotes && angle_depth == 0 && paren_depth == 0 && in_group => {
500 if let Some(addr) = parse_single_address(¤t) {
502 addresses.push(addr);
503 }
504 current.clear();
505 in_group = false;
506 }
507 ',' if !in_quotes && angle_depth == 0 && paren_depth == 0 => {
508 if let Some(addr) = parse_single_address(¤t) {
509 addresses.push(addr);
510 }
511 current.clear();
512 }
513 _ => current.push(ch),
514 }
515 }
516 if let Some(addr) = parse_single_address(¤t) {
517 addresses.push(addr);
518 }
519
520 addresses
521}
522
523fn parse_single_address(input: &str) -> Option<Address> {
530 let input = input.trim();
531 if input.is_empty() {
532 return None;
533 }
534
535 if let Some(angle_start) = input.rfind('<') {
537 if let Some(angle_end) = input.rfind('>') {
538 if angle_end > angle_start {
539 let email = input[angle_start + 1..angle_end].trim().to_string();
540 let name_part = input[..angle_start].trim();
541 let name = if name_part.is_empty() {
542 None
543 } else {
544 let name = strip_outer_quotes(name_part).trim().to_string();
549 if name.is_empty() {
550 None
551 } else {
552 Some(unescape_quoted_string(&name))
554 }
555 };
556 if !email.is_empty() {
557 return Some(Address { name, email });
558 }
559 }
560 }
561 }
562
563 if input.contains('@') {
566 if let Some(paren_start) = input.find('(') {
569 let email_part = input[..paren_start].trim();
570 let after_email = input[paren_start..].trim();
573 let name = if !email_part.is_empty() && email_part.contains('@') {
574 extract_comment_text(after_email)
577 } else {
578 None
579 };
580 let stripped = strip_comments(input);
583 let email = stripped.trim().to_string();
584 if !email.is_empty() && email.contains('@') {
585 return Some(Address { name, email });
586 }
587 }
588 return Some(Address {
589 name: None,
590 email: input.to_string(),
591 });
592 }
593
594 None
595}
596
597fn extract_comment_text(s: &str) -> Option<String> {
603 let s = s.trim();
604 if !s.starts_with('(') {
605 return None;
606 }
607 let mut depth: u32 = 0;
609 let mut result = String::new();
610 let mut escaped = false;
611 let mut started = false;
612 for c in s.chars() {
613 if escaped {
614 escaped = false;
615 result.push(c);
616 continue;
617 }
618 match c {
619 '\\' => {
620 escaped = true;
621 }
622 '(' => {
623 if started {
624 result.push(c);
626 }
627 depth = depth.saturating_add(1);
628 started = true;
629 }
630 ')' => {
631 depth = depth.saturating_sub(1);
632 if depth == 0 {
633 break;
634 }
635 result.push(c);
637 }
638 _ => {
639 if depth > 0 {
640 result.push(c);
641 }
642 }
643 }
644 }
645 let trimmed = result.trim().to_string();
646 if trimmed.is_empty() {
647 None
648 } else {
649 Some(trimmed)
650 }
651}
652
653fn extract_message_id(headers: &[(String, String)]) -> Option<String> {
662 get_header_value(headers, "message-id").and_then(|v| {
663 if let Some(id) = extract_first_msg_id(&v) {
665 return Some(id);
666 }
667 let trimmed = v.trim();
670 if trimmed.is_empty() || trimmed.contains('<') || trimmed.contains('>') {
671 None
672 } else {
673 Some(trimmed.to_string())
674 }
675 })
676}
677
678fn extract_in_reply_to(headers: &[(String, String)]) -> Option<String> {
680 get_header_value(headers, "in-reply-to").and_then(|v| extract_first_msg_id(&v))
681}
682
683fn extract_references(headers: &[(String, String)]) -> Option<String> {
685 get_header_value(headers, "references").and_then(|v| {
686 let ids = extract_all_msg_ids(&v);
687 if ids.is_empty() {
688 None
689 } else {
690 Some(ids.join(" "))
691 }
692 })
693}
694
695fn extract_first_msg_id(value: &str) -> Option<String> {
697 let start = value.find('<')?;
698 let end = value[start..].find('>')? + start;
699 let id = value[start + 1..end].trim();
700 if id.is_empty() {
701 None
702 } else {
703 Some(id.to_string())
704 }
705}
706
707fn extract_all_msg_ids(value: &str) -> Vec<String> {
709 let mut ids = Vec::new();
710 let mut remaining = value;
711 while let Some(start) = remaining.find('<') {
712 remaining = &remaining[start + 1..];
713 if let Some(end) = remaining.find('>') {
714 let id = remaining[..end].trim();
715 if !id.is_empty() {
716 ids.push(id.to_string());
717 }
718 remaining = &remaining[end + 1..];
719 } else {
720 break;
721 }
722 }
723 ids
724}
725
726fn extract_date(headers: &[(String, String)]) -> Option<DateTime> {
732 get_header_value(headers, "date").and_then(|v| parse_rfc5322_date(&v))
733}
734
735pub(crate) fn parse_rfc5322_date(input: &str) -> Option<DateTime> {
746 let input = strip_comments(input);
747 let input = input.trim();
748
749 let input = if let Some(comma_pos) = input.find(',') {
751 input[comma_pos + 1..].trim()
752 } else {
753 input
754 };
755
756 let parts: Vec<&str> = input.split_whitespace().collect();
757 if parts.len() < 4 {
758 return None;
759 }
760
761 let day: u8 = parts[0].parse().ok()?;
762 let month = parse_month_name(parts[1])?;
763 let year: u16 = parse_year(parts[2])?;
764
765 let time_parts: Vec<&str> = parts[3].split(':').collect();
766 if time_parts.len() < 2 {
767 return None;
768 }
769
770 let hour: u8 = time_parts[0].parse().ok()?;
771 let minute: u8 = time_parts[1].parse().ok()?;
772 let second: u8 = time_parts.get(2).and_then(|s| s.parse().ok()).unwrap_or(0);
773
774 if day == 0 || day > 31 || hour > 23 || minute > 59 || second > 60 {
777 return None;
778 }
779
780 let tz_offset_minutes = parts.get(4).map_or(0, |tz| parse_timezone(tz));
781
782 Some(DateTime {
783 year,
784 month,
785 day,
786 hour,
787 minute,
788 second,
789 tz_offset_minutes,
790 })
791}
792
793fn parse_month_name(s: &str) -> Option<u8> {
795 match s.to_ascii_lowercase().as_str() {
796 "jan" => Some(1),
797 "feb" => Some(2),
798 "mar" => Some(3),
799 "apr" => Some(4),
800 "may" => Some(5),
801 "jun" => Some(6),
802 "jul" => Some(7),
803 "aug" => Some(8),
804 "sep" => Some(9),
805 "oct" => Some(10),
806 "nov" => Some(11),
807 "dec" => Some(12),
808 _ => None,
809 }
810}
811
812fn parse_year(s: &str) -> Option<u16> {
814 let y: u16 = s.parse().ok()?;
815 if y < 100 {
816 Some(if y >= 50 { 1900 + y } else { 2000 + y })
818 } else if y < 1000 {
819 Some(1900 + y)
821 } else {
822 Some(y)
823 }
824}
825
826fn parse_timezone(s: &str) -> i16 {
828 let s = s.trim();
829 if (s.starts_with('+') || s.starts_with('-')) && s.len() >= 5 {
831 let sign: i16 = if s.starts_with('-') { -1 } else { 1 };
832 if let (Ok(h), Ok(m)) = (s[1..3].parse::<i16>(), s[3..5].parse::<i16>()) {
833 return sign * (h * 60 + m);
834 }
835 }
836 match s.to_ascii_uppercase().as_str() {
838 "EST" | "CDT" => -300,
839 "EDT" => -240,
840 "CST" | "MDT" => -360,
841 "MST" | "PDT" => -420,
842 "PST" => -480,
843 _ => 0,
845 }
846}
847
848fn walk_mime_tree(
860 body: &[u8],
861 boundary: &str,
862 section_prefix: &str,
863 depth: u32,
864 is_digest: bool,
865) -> (Option<String>, Option<String>, Vec<ParsedAttachment>) {
866 if depth > MAX_MIME_DEPTH {
867 return (None, None, Vec::new());
868 }
869
870 let parts = split_mime_parts(body, boundary);
871 let mut body_text: Option<String> = None;
872 let mut body_html: Option<String> = None;
873 let mut attachments: Vec<ParsedAttachment> = Vec::new();
874
875 for (i, part) in parts.iter().enumerate() {
876 let section_num = i + 1;
877 let section = if section_prefix.is_empty() {
878 section_num.to_string()
879 } else {
880 format!("{section_prefix}.{section_num}")
881 };
882
883 let (part_header_bytes, part_body) = split_header_body(part);
884 let part_headers = parse_headers(part_header_bytes);
885
886 let default_ct = if is_digest {
890 "message/rfc822"
891 } else {
892 "text/plain; charset=us-ascii"
893 };
894 let ct = get_header_value(&part_headers, "content-type")
895 .unwrap_or_else(|| default_ct.to_string());
896 let cte = get_header_value(&part_headers, "content-transfer-encoding").unwrap_or_default();
897 let cd = get_header_value(&part_headers, "content-disposition").unwrap_or_default();
898 let content_id = get_header_value(&part_headers, "content-id");
899
900 if is_multipart(&ct) {
901 if let Some(inner_boundary) = extract_boundary(&ct) {
903 let inner_digest = extract_mime_type(&ct) == "multipart/digest";
904 let (t, h, a) = walk_mime_tree(
905 part_body,
906 &inner_boundary,
907 §ion,
908 depth + 1,
909 inner_digest,
910 );
911 if body_text.is_none() {
912 body_text = t;
913 }
914 if body_html.is_none() {
915 body_html = h;
916 }
917 attachments.extend(a);
918 }
919 } else {
920 let mime = extract_mime_type(&ct);
921 let cd_lower = cd.to_lowercase();
922 let is_explicit_attachment = cd_lower.starts_with("attachment");
923
924 if !is_explicit_attachment && mime == "text/plain" && body_text.is_none() {
925 let decoded = decode_body(part_body, &cte, &ct);
928 if !decoded.is_empty() {
929 body_text = Some(decoded);
930 }
931 } else if !is_explicit_attachment && mime == "text/html" && body_html.is_none() {
932 let decoded = decode_body(part_body, &cte, &ct);
934 if !decoded.is_empty() {
935 body_html = Some(decoded);
936 }
937 } else if !mime.starts_with("multipart/") {
938 let is_inline = cd_lower.starts_with("inline") || content_id.is_some();
940 let filename = extract_filename(&cd, &ct);
941
942 attachments.push(ParsedAttachment {
943 filename,
944 content_type: mime,
945 content_id: content_id
949 .map(|s| s.trim_matches(|c| c == '<' || c == '>').trim().to_string()),
950 is_inline,
951 size: Some(part_body.len() as u64),
952 section: Some(section),
953 });
954 }
955 }
956 }
957
958 (body_text, body_html, attachments)
959}
960
961fn split_mime_parts<'a>(body: &'a [u8], boundary: &str) -> Vec<&'a [u8]> {
969 let delim = format!("--{boundary}");
970 let delim_bytes = delim.as_bytes();
971 let end_delim = format!("--{boundary}--");
972 let end_delim_bytes = end_delim.as_bytes();
973
974 let mut parts: Vec<&'a [u8]> = Vec::new();
975 let mut search_from: usize = 0;
976 let mut part_start: Option<usize> = None;
977
978 loop {
979 let Some(rel_pos) = find_subsequence(&body[search_from..], delim_bytes) else {
980 if let Some(start) = part_start {
983 if start < body.len() {
984 parts.push(&body[start..]);
985 }
986 }
987 break;
988 };
989 let pos = search_from + rel_pos;
990
991 if pos > 0 && body[pos - 1] != b'\n' {
995 search_from = pos + delim_bytes.len();
996 continue;
997 }
998
999 if let Some(start) = part_start {
1001 let end = if pos >= 2 && body[pos - 2] == b'\r' && body[pos - 1] == b'\n' {
1002 pos - 2
1003 } else if pos >= 1 && body[pos - 1] == b'\n' {
1004 pos - 1
1005 } else {
1006 pos
1007 };
1008 if start <= end {
1009 parts.push(&body[start..end]);
1010 }
1011 }
1012
1013 if body[pos..].starts_with(end_delim_bytes) {
1015 break;
1016 }
1017
1018 let mut next = pos + delim_bytes.len();
1020 while next < body.len() && (body[next] == b' ' || body[next] == b'\t') {
1022 next += 1;
1023 }
1024 if next < body.len() && body[next] == b'\r' {
1025 next += 1;
1026 }
1027 if next < body.len() && body[next] == b'\n' {
1028 next += 1;
1029 }
1030
1031 part_start = Some(next);
1032 search_from = next;
1033 }
1034
1035 parts
1036}
1037
1038fn extract_simple_body(
1052 body: &[u8],
1053 content_type: &str,
1054 transfer_encoding: &str,
1055 content_disposition: &str,
1056 content_id: Option<&str>,
1057) -> (Option<String>, Option<String>, Vec<ParsedAttachment>) {
1058 if body.is_empty() {
1059 return (None, None, Vec::new());
1060 }
1061
1062 let mime = extract_mime_type(content_type);
1063 let cd_lower = content_disposition.to_lowercase();
1064 let is_explicit_attachment = cd_lower.starts_with("attachment");
1065
1066 if is_explicit_attachment || (mime != "text/plain" && mime != "text/html") {
1071 let is_inline = cd_lower.starts_with("inline") || content_id.is_some();
1072 let filename = extract_filename(content_disposition, content_type);
1073
1074 let attachment = ParsedAttachment {
1075 filename,
1076 content_type: mime,
1077 content_id: content_id
1080 .map(|s| s.trim_matches(|c| c == '<' || c == '>').trim().to_string()),
1081 is_inline,
1082 size: Some(body.len() as u64),
1083 section: Some("1".to_string()),
1085 };
1086 return (None, None, vec![attachment]);
1087 }
1088
1089 let text = decode_body(body, transfer_encoding, content_type);
1090
1091 if text.is_empty() {
1098 return (None, None, Vec::new());
1099 }
1100
1101 if mime == "text/html" {
1102 (None, Some(text), Vec::new())
1103 } else {
1104 (Some(text), None, Vec::new())
1106 }
1107}
1108
1109fn decode_body(data: &[u8], transfer_encoding: &str, content_type: &str) -> String {
1114 let decoded = decode_transfer_encoding(data, transfer_encoding);
1115 let charset = extract_param(content_type, "charset").unwrap_or_else(|| "us-ascii".to_string());
1117 let text = decode_charset(&charset, &decoded);
1118 if let Some(stripped) = text.strip_suffix("\r\n") {
1128 stripped.to_string()
1129 } else if let Some(stripped) = text.strip_suffix('\n') {
1130 stripped.to_string()
1131 } else {
1132 text
1133 }
1134}
1135
1136fn decode_transfer_encoding(data: &[u8], encoding: &str) -> Vec<u8> {
1138 match encoding.trim().to_ascii_lowercase().as_str() {
1139 "base64" => {
1140 let cleaned: Vec<u8> = data
1144 .iter()
1145 .copied()
1146 .filter(|b| b.is_ascii_alphanumeric() || *b == b'+' || *b == b'/' || *b == b'=')
1147 .collect();
1148 LENIENT_BASE64
1149 .decode(&cleaned)
1150 .unwrap_or_else(|_| data.to_vec())
1151 }
1152 "quoted-printable" => decode_quoted_printable(data),
1153 _ => data.to_vec(),
1155 }
1156}
1157
1158fn decode_quoted_printable(data: &[u8]) -> Vec<u8> {
1160 let mut result = Vec::with_capacity(data.len());
1161 let mut i = 0;
1162 while i < data.len() {
1163 if data[i] == b'=' {
1164 if i + 2 < data.len() {
1165 if data[i + 1] == b'\r' && i + 2 < data.len() && data[i + 2] == b'\n' {
1167 i += 3;
1168 continue;
1169 }
1170 if data[i + 1] == b'\n' {
1172 i += 2;
1173 continue;
1174 }
1175 if let Some(val) = decode_hex_pair(data[i + 1], data[i + 2]) {
1177 result.push(val);
1178 i += 3;
1179 continue;
1180 }
1181 } else if i + 1 < data.len() && data[i + 1] == b'\n' {
1182 i += 2;
1184 continue;
1185 } else if i + 1 < data.len() && data[i + 1] == b'\r' {
1186 i += 2;
1188 continue;
1189 } else if i + 1 == data.len() {
1190 break;
1193 }
1194 }
1196 result.push(data[i]);
1197 i += 1;
1198 }
1199 result
1200}
1201
1202fn decode_charset(charset: &str, bytes: &[u8]) -> String {
1210 let charset_lower = charset.to_lowercase();
1211 if charset_lower == "utf-8" || charset_lower == "utf8" {
1212 return String::from_utf8_lossy(bytes).into_owned();
1213 }
1214
1215 let encoding =
1216 encoding_rs::Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::UTF_8);
1217 let (decoded, _, _) = encoding.decode(bytes);
1218 decoded.into_owned()
1219}
1220
1221fn is_multipart(content_type: &str) -> bool {
1227 extract_mime_type(content_type).starts_with("multipart/")
1228}
1229
1230fn extract_mime_type(content_type: &str) -> String {
1235 let ct = content_type.trim();
1236 let end = ct.find(';').unwrap_or(ct.len());
1237 let raw = ct[..end].trim();
1240 strip_comments(raw).trim().to_lowercase()
1241}
1242
1243fn extract_boundary(content_type: &str) -> Option<String> {
1245 extract_param(content_type, "boundary")
1246}
1247
1248fn is_param_boundary(lower: &str, pos: usize) -> bool {
1261 pos == 0
1262 || lower
1263 .as_bytes()
1264 .get(pos - 1)
1265 .is_some_and(|&c| c == b';' || c == b' ' || c == b'\t')
1266}
1267
1268fn extract_param_value(rest: &str) -> Option<String> {
1274 let value = if let Some(stripped) = rest.strip_prefix('"') {
1275 let end = find_closing_quote(stripped);
1277 &stripped[..end]
1278 } else {
1279 let end = rest
1280 .find(|c: char| c == ';' || c.is_whitespace())
1281 .unwrap_or(rest.len());
1282 &rest[..end]
1283 };
1284 if value.is_empty() {
1285 None
1286 } else if rest.starts_with('"') {
1287 Some(unescape_quoted_string(value))
1289 } else {
1290 Some(value.to_string())
1291 }
1292}
1293
1294fn extract_param(header_value: &str, param_name: &str) -> Option<String> {
1295 let lower = header_value.to_ascii_lowercase();
1300 let pattern = format!("{param_name}=");
1301 let mut search_from = 0;
1302
1303 loop {
1304 let idx = lower[search_from..].find(&pattern)?;
1305 let abs_idx = search_from + idx;
1306
1307 if is_param_boundary(&lower, abs_idx) {
1309 if is_inside_quotes(&lower, abs_idx) {
1312 search_from = abs_idx + pattern.len();
1313 continue;
1314 }
1315
1316 let rest = &header_value[abs_idx + pattern.len()..];
1317 return extract_param_value(rest);
1318 }
1319
1320 search_from = abs_idx + pattern.len();
1321 }
1322}
1323
1324fn find_closing_quote(s: &str) -> usize {
1330 let bytes = s.as_bytes();
1331 let mut i = 0;
1332 while i < bytes.len() {
1333 if bytes[i] == b'\\' {
1334 i += 2;
1336 continue;
1337 }
1338 if bytes[i] == b'"' {
1339 return i;
1340 }
1341 i += 1;
1342 }
1343 bytes.len()
1344}
1345
1346fn is_inside_quotes(s: &str, pos: usize) -> bool {
1355 let bytes = s.as_bytes();
1356 let mut quote_count: u32 = 0;
1357 let mut i = 0;
1358 while i < pos && i < bytes.len() {
1359 if bytes[i] == b'\\' {
1360 i += 2;
1362 continue;
1363 }
1364 if bytes[i] == b'"' {
1365 quote_count += 1;
1366 }
1367 i += 1;
1368 }
1369 quote_count % 2 != 0
1371}
1372
1373fn extract_filename(disposition: &str, content_type: &str) -> Option<String> {
1386 if let Some(name) = extract_rfc2231_param(disposition, "filename") {
1388 return Some(name);
1389 }
1390 if let Some(name) = extract_rfc2231_continuation(disposition, "filename") {
1392 return Some(name);
1393 }
1394 if let Some(name) = extract_param(disposition, "filename") {
1396 return Some(decode_encoded_words(&name));
1397 }
1398 if let Some(name) = extract_rfc2231_param(content_type, "name") {
1400 return Some(name);
1401 }
1402 if let Some(name) = extract_rfc2231_continuation(content_type, "name") {
1404 return Some(name);
1405 }
1406 if let Some(name) = extract_param(content_type, "name") {
1408 return Some(decode_encoded_words(&name));
1409 }
1410 None
1411}
1412
1413fn extract_rfc2231_param(header_value: &str, param_name: &str) -> Option<String> {
1418 let lower = header_value.to_ascii_lowercase();
1419 let pattern = format!("{param_name}*=");
1420 let mut search_from = 0;
1421
1422 let idx = loop {
1423 let rel_idx = lower[search_from..].find(&pattern)?;
1424 let abs_idx = search_from + rel_idx;
1425 if is_param_boundary(&lower, abs_idx) {
1427 if is_inside_quotes(&lower, abs_idx) {
1429 search_from = abs_idx + pattern.len();
1430 continue;
1431 }
1432 break abs_idx;
1433 }
1434 search_from = abs_idx + pattern.len();
1435 };
1436
1437 let rest = &header_value[idx + pattern.len()..];
1438 let end = rest.find(';').unwrap_or(rest.len());
1439 let value = rest[..end].trim();
1440
1441 let mut parts_iter = value.splitn(3, '\'');
1443 let charset = parts_iter.next()?;
1444 let _language = parts_iter.next()?; let encoded = parts_iter.next()?;
1446
1447 let decoded_bytes = percent_decode(encoded);
1448 Some(decode_charset(charset, &decoded_bytes))
1449}
1450
1451fn extract_rfc2231_continuation(header_value: &str, param_name: &str) -> Option<String> {
1461 let lower = header_value.to_ascii_lowercase();
1462 let mut sections: Vec<(u32, bool, String)> = Vec::new(); let mut charset = String::new();
1464
1465 for section_idx in 0u32..100 {
1466 let encoded_pattern = format!("{param_name}*{section_idx}*=");
1468 if let Some(val) = find_param_value(&lower, header_value, &encoded_pattern) {
1469 if section_idx == 0 {
1470 let mut parts = val.splitn(3, '\'');
1472 if let (Some(cs), Some(_lang), Some(encoded)) =
1473 (parts.next(), parts.next(), parts.next())
1474 {
1475 charset = cs.to_string();
1476 sections.push((section_idx, true, encoded.to_string()));
1477 } else {
1478 sections.push((section_idx, true, val));
1479 }
1480 } else {
1481 sections.push((section_idx, true, val));
1483 }
1484 continue;
1485 }
1486
1487 let plain_pattern = format!("{param_name}*{section_idx}=");
1489 if let Some(val) = find_param_value(&lower, header_value, &plain_pattern) {
1490 sections.push((section_idx, false, val));
1491 continue;
1492 }
1493
1494 break;
1496 }
1497
1498 if sections.is_empty() {
1499 return None;
1500 }
1501
1502 sections.sort_by_key(|(idx, _, _)| *idx);
1504
1505 let mut raw_bytes: Vec<u8> = Vec::new();
1507 for (_, is_encoded, value) in §ions {
1508 if *is_encoded {
1509 raw_bytes.extend(percent_decode(value));
1510 } else {
1511 raw_bytes.extend(value.as_bytes());
1512 }
1513 }
1514
1515 if charset.is_empty() {
1516 charset = "utf-8".to_string();
1525 }
1526
1527 Some(decode_charset(&charset, &raw_bytes))
1528}
1529
1530fn find_param_value(lower: &str, original: &str, pattern: &str) -> Option<String> {
1535 let mut search_from = 0;
1536 loop {
1537 let rel_idx = lower[search_from..].find(pattern)?;
1538 let abs_idx = search_from + rel_idx;
1539
1540 if is_param_boundary(lower, abs_idx) {
1542 if is_inside_quotes(lower, abs_idx) {
1544 search_from = abs_idx + pattern.len();
1545 continue;
1546 }
1547
1548 let rest = &original[abs_idx + pattern.len()..];
1549 return extract_param_value(rest);
1550 }
1551
1552 search_from = abs_idx + pattern.len();
1553 }
1554}
1555
1556fn percent_decode(input: &str) -> Vec<u8> {
1558 let bytes = input.as_bytes();
1559 let mut result = Vec::with_capacity(bytes.len());
1560 let mut i = 0;
1561 while i < bytes.len() {
1562 if bytes[i] == b'%' && i + 2 < bytes.len() {
1563 if let Some(val) = decode_hex_pair(bytes[i + 1], bytes[i + 2]) {
1564 result.push(val);
1565 i += 3;
1566 continue;
1567 }
1568 }
1569 result.push(bytes[i]);
1570 i += 1;
1571 }
1572 result
1573}
1574
1575fn strip_comments(input: &str) -> String {
1588 let mut result = String::with_capacity(input.len());
1589 let mut depth: u32 = 0;
1590 let mut escaped = false;
1591 for c in input.chars() {
1592 if escaped {
1593 escaped = false;
1594 if depth == 0 {
1595 result.push(c);
1596 }
1597 continue;
1598 }
1599 match c {
1600 '\\' => {
1601 escaped = true;
1602 if depth == 0 {
1603 result.push(c);
1604 }
1605 }
1606 '(' => depth = depth.saturating_add(1),
1607 ')' if depth > 0 => depth = depth.saturating_sub(1),
1608 _ if depth == 0 => result.push(c),
1609 _ => {}
1610 }
1611 }
1612 result
1613}
1614
1615fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option<usize> {
1617 haystack.windows(needle.len()).position(|w| w == needle)
1618}
1619
1620fn decode_hex_pair(high: u8, low: u8) -> Option<u8> {
1622 let h = hex_digit(high)?;
1623 let l = hex_digit(low)?;
1624 Some(h * 16 + l)
1625}
1626
1627fn hex_digit(b: u8) -> Option<u8> {
1629 match b {
1630 b'0'..=b'9' => Some(b - b'0'),
1631 b'A'..=b'F' => Some(b - b'A' + 10),
1632 b'a'..=b'f' => Some(b - b'a' + 10),
1633 _ => None,
1634 }
1635}
1636
1637fn strip_outer_quotes(input: &str) -> &str {
1647 if input.len() >= 2 && input.starts_with('"') && input.ends_with('"') {
1648 &input[1..input.len() - 1]
1649 } else {
1650 input
1651 }
1652}
1653
1654fn unescape_quoted_string(input: &str) -> String {
1658 let mut result = String::with_capacity(input.len());
1659 let mut chars = input.chars();
1660 while let Some(c) = chars.next() {
1661 if c == '\\' {
1662 if let Some(next) = chars.next() {
1664 result.push(next);
1665 } else {
1666 result.push(c);
1667 }
1668 } else {
1669 result.push(c);
1670 }
1671 }
1672 result
1673}
1674
1675#[cfg(test)]
1680#[allow(clippy::unwrap_used, clippy::expect_used)]
1681mod tests {
1682 use super::*;
1683
1684 #[test]
1685 fn parse_simple_text_email() {
1686 let raw = b"From: sender@example.com\r\n\
1687 To: recipient@example.com\r\n\
1688 Subject: Test\r\n\
1689 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1690 Message-ID: <abc123@example.com>\r\n\
1691 Content-Type: text/plain; charset=utf-8\r\n\
1692 \r\n\
1693 Hello, World!";
1694
1695 let parsed = parse_email(raw).unwrap();
1696 assert_eq!(parsed.from.email, "sender@example.com");
1697 assert_eq!(parsed.to.len(), 1);
1698 assert_eq!(parsed.to[0].email, "recipient@example.com");
1699 assert_eq!(parsed.subject.as_deref(), Some("Test"));
1700 assert_eq!(parsed.message_id.as_deref(), Some("abc123@example.com"));
1701 assert_eq!(parsed.body_text.as_deref(), Some("Hello, World!"));
1702 assert!(parsed.body_html.is_none());
1703 assert!(parsed.attachments.is_empty());
1704 assert_eq!(parsed.size, raw.len() as u64);
1705 }
1706
1707 #[test]
1708 fn parse_multipart_alternative() {
1709 let raw = b"From: sender@example.com\r\n\
1710 To: recipient@example.com\r\n\
1711 Subject: Multi\r\n\
1712 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1713 MIME-Version: 1.0\r\n\
1714 Content-Type: multipart/alternative; boundary=\"bound42\"\r\n\
1715 \r\n\
1716 --bound42\r\n\
1717 Content-Type: text/plain; charset=utf-8\r\n\
1718 \r\n\
1719 Plain text body\r\n\
1720 --bound42\r\n\
1721 Content-Type: text/html; charset=utf-8\r\n\
1722 \r\n\
1723 <html><body>HTML body</body></html>\r\n\
1724 --bound42--";
1725
1726 let parsed = parse_email(raw).unwrap();
1727 assert_eq!(parsed.body_text.as_deref(), Some("Plain text body"));
1728 assert_eq!(
1729 parsed.body_html.as_deref(),
1730 Some("<html><body>HTML body</body></html>")
1731 );
1732 assert!(parsed.attachments.is_empty());
1733 }
1734
1735 #[test]
1736 fn parse_encoded_words_base64_subject() {
1737 let raw = b"From: sender@example.com\r\n\
1738 Subject: =?UTF-8?B?SGVsbG8gV29ybGQ=?=\r\n\
1739 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1740 \r\n\
1741 body";
1742
1743 let parsed = parse_email(raw).unwrap();
1744 assert_eq!(parsed.subject.as_deref(), Some("Hello World"));
1745 }
1746
1747 #[test]
1748 fn parse_encoded_words_q_subject() {
1749 let raw = b"From: sender@example.com\r\n\
1750 Subject: =?UTF-8?Q?Hello_World?=\r\n\
1751 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1752 \r\n\
1753 body";
1754
1755 let parsed = parse_email(raw).unwrap();
1756 assert_eq!(parsed.subject.as_deref(), Some("Hello World"));
1757 }
1758
1759 #[test]
1760 fn parse_encoded_words_in_display_name() {
1761 let raw = b"From: =?UTF-8?B?Sm9obiBEb2U=?= <john@example.com>\r\n\
1762 Subject: Test\r\n\
1763 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1764 \r\n";
1765
1766 let parsed = parse_email(raw).unwrap();
1767 assert_eq!(parsed.from.name.as_deref(), Some("John Doe"));
1768 assert_eq!(parsed.from.email, "john@example.com");
1769 }
1770
1771 #[test]
1772 fn parse_non_utf8_charset() {
1773 let raw = b"From: sender@example.com\r\n\
1775 Subject: =?ISO-8859-1?Q?H=E9llo?=\r\n\
1776 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1777 \r\n";
1778
1779 let parsed = parse_email(raw).unwrap();
1780 assert_eq!(parsed.subject.as_deref(), Some("Héllo"));
1781 }
1782
1783 #[test]
1784 fn parse_message_id_strips_brackets() {
1785 let raw = b"From: a@b.com\r\n\
1786 Message-ID: <unique-id@host.com>\r\n\
1787 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1788 \r\n";
1789
1790 let parsed = parse_email(raw).unwrap();
1791 assert_eq!(parsed.message_id.as_deref(), Some("unique-id@host.com"));
1792 }
1793
1794 #[test]
1795 fn parse_in_reply_to_first_only() {
1796 let raw = b"From: a@b.com\r\n\
1797 In-Reply-To: <first@host> <second@host>\r\n\
1798 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1799 \r\n";
1800
1801 let parsed = parse_email(raw).unwrap();
1802 assert_eq!(parsed.in_reply_to.as_deref(), Some("first@host"));
1803 }
1804
1805 #[test]
1806 fn parse_references_all_ids() {
1807 let raw = b"From: a@b.com\r\n\
1808 References: <ref1@host> <ref2@host> <ref3@host>\r\n\
1809 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1810 \r\n";
1811
1812 let parsed = parse_email(raw).unwrap();
1813 assert_eq!(
1814 parsed.references.as_deref(),
1815 Some("ref1@host ref2@host ref3@host")
1816 );
1817 }
1818
1819 #[test]
1820 fn parse_date_with_numeric_timezone() {
1821 let raw = b"From: a@b.com\r\n\
1822 Date: Thu, 13 Feb 2025 15:47:33 +0530\r\n\
1823 \r\n";
1824
1825 let parsed = parse_email(raw).unwrap();
1826 let date = parsed.date.unwrap();
1827 assert_eq!(date.year, 2025);
1828 assert_eq!(date.month, 2);
1829 assert_eq!(date.day, 13);
1830 assert_eq!(date.hour, 15);
1831 assert_eq!(date.minute, 47);
1832 assert_eq!(date.second, 33);
1833 assert_eq!(date.tz_offset_minutes, 330);
1834 }
1835
1836 #[test]
1837 fn parse_date_named_timezone() {
1838 let raw = b"From: a@b.com\r\n\
1839 Date: Thu, 13 Feb 2025 10:30:00 EST\r\n\
1840 \r\n";
1841
1842 let parsed = parse_email(raw).unwrap();
1843 let date = parsed.date.unwrap();
1844 assert_eq!(date.tz_offset_minutes, -300);
1845 }
1846
1847 #[test]
1848 fn parse_address_with_display_name() {
1849 let raw = b"From: \"John Doe\" <john@example.com>\r\n\
1850 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1851 \r\n";
1852
1853 let parsed = parse_email(raw).unwrap();
1854 assert_eq!(parsed.from.name.as_deref(), Some("John Doe"));
1855 assert_eq!(parsed.from.email, "john@example.com");
1856 }
1857
1858 #[test]
1859 fn parse_multiple_recipients() {
1860 let raw = b"From: a@b.com\r\n\
1861 To: one@x.com, \"Two\" <two@x.com>, three@x.com\r\n\
1862 Cc: cc1@x.com, cc2@x.com\r\n\
1863 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1864 \r\n";
1865
1866 let parsed = parse_email(raw).unwrap();
1867 assert_eq!(parsed.to.len(), 3);
1868 assert_eq!(parsed.to[1].name.as_deref(), Some("Two"));
1869 assert_eq!(parsed.cc.len(), 2);
1870 }
1871
1872 #[test]
1873 fn parse_multipart_with_attachment() {
1874 let raw = b"From: a@b.com\r\n\
1875 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1876 MIME-Version: 1.0\r\n\
1877 Content-Type: multipart/mixed; boundary=\"mixbound\"\r\n\
1878 \r\n\
1879 --mixbound\r\n\
1880 Content-Type: text/plain\r\n\
1881 \r\n\
1882 Message body\r\n\
1883 --mixbound\r\n\
1884 Content-Type: application/pdf\r\n\
1885 Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
1886 \r\n\
1887 PDF_CONTENT_HERE\r\n\
1888 --mixbound--";
1889
1890 let parsed = parse_email(raw).unwrap();
1891 assert_eq!(parsed.body_text.as_deref(), Some("Message body"));
1892 assert_eq!(parsed.attachments.len(), 1);
1893 assert_eq!(parsed.attachments[0].filename.as_deref(), Some("doc.pdf"));
1894 assert_eq!(parsed.attachments[0].content_type, "application/pdf");
1895 assert!(!parsed.attachments[0].is_inline);
1896 assert_eq!(parsed.attachments[0].section.as_deref(), Some("2"));
1897 }
1898
1899 #[test]
1900 fn parse_inline_attachment() {
1901 let raw = b"From: a@b.com\r\n\
1902 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1903 Content-Type: multipart/mixed; boundary=\"bound\"\r\n\
1904 \r\n\
1905 --bound\r\n\
1906 Content-Type: text/plain\r\n\
1907 \r\n\
1908 Body\r\n\
1909 --bound\r\n\
1910 Content-Type: image/png\r\n\
1911 Content-Disposition: inline\r\n\
1912 Content-ID: <img001>\r\n\
1913 \r\n\
1914 PNG_DATA\r\n\
1915 --bound--";
1916
1917 let parsed = parse_email(raw).unwrap();
1918 assert_eq!(parsed.attachments.len(), 1);
1919 assert!(parsed.attachments[0].is_inline);
1920 assert_eq!(parsed.attachments[0].content_id.as_deref(), Some("img001"));
1921 }
1922
1923 #[test]
1924 fn parse_headers_only_no_body() {
1925 let raw = b"From: a@b.com\r\n\
1926 Subject: Headers only\r\n\
1927 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n";
1928
1929 let parsed = parse_email(raw).unwrap();
1930 assert_eq!(parsed.subject.as_deref(), Some("Headers only"));
1931 assert!(parsed.body_text.is_none());
1932 assert!(parsed.body_html.is_none());
1933 }
1934
1935 #[test]
1936 fn parse_empty_input() {
1937 let result = parse_email(b"");
1938 assert!(matches!(result, Err(Error::EmptyInput)));
1939 }
1940
1941 #[test]
1942 fn parse_missing_from() {
1943 let raw = b"Subject: No from\r\n\
1944 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1945 \r\n";
1946
1947 let result = parse_email(raw);
1948 assert!(matches!(result, Err(Error::MissingFrom)));
1949 }
1950
1951 #[test]
1952 fn parse_quoted_printable_body() {
1953 let raw = b"From: a@b.com\r\n\
1954 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1955 Content-Type: text/plain; charset=utf-8\r\n\
1956 Content-Transfer-Encoding: quoted-printable\r\n\
1957 \r\n\
1958 Hello=20World=0D=0ASoft=\r\n break";
1959
1960 let parsed = parse_email(raw).unwrap();
1961 assert_eq!(
1962 parsed.body_text.as_deref(),
1963 Some("Hello World\r\nSoft break")
1964 );
1965 }
1966
1967 #[test]
1968 fn parse_base64_body() {
1969 let raw = b"From: a@b.com\r\n\
1970 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1971 Content-Type: text/plain; charset=utf-8\r\n\
1972 Content-Transfer-Encoding: base64\r\n\
1973 \r\n\
1974 SGVsbG8gV29ybGQ=\r\n";
1975
1976 let parsed = parse_email(raw).unwrap();
1977 assert_eq!(parsed.body_text.as_deref(), Some("Hello World"));
1978 }
1979
1980 #[test]
1981 fn parse_nested_multipart_section_numbers() {
1982 let raw = b"From: a@b.com\r\n\
1983 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
1984 Content-Type: multipart/mixed; boundary=\"outer\"\r\n\
1985 \r\n\
1986 --outer\r\n\
1987 Content-Type: multipart/alternative; boundary=\"inner\"\r\n\
1988 \r\n\
1989 --inner\r\n\
1990 Content-Type: text/plain\r\n\
1991 \r\n\
1992 Plain\r\n\
1993 --inner\r\n\
1994 Content-Type: text/html\r\n\
1995 \r\n\
1996 <b>HTML</b>\r\n\
1997 --inner--\r\n\
1998 --outer\r\n\
1999 Content-Type: application/pdf\r\n\
2000 Content-Disposition: attachment; filename=\"f.pdf\"\r\n\
2001 \r\n\
2002 DATA\r\n\
2003 --outer--";
2004
2005 let parsed = parse_email(raw).unwrap();
2006 assert_eq!(parsed.body_text.as_deref(), Some("Plain"));
2007 assert_eq!(parsed.body_html.as_deref(), Some("<b>HTML</b>"));
2008 assert_eq!(parsed.attachments.len(), 1);
2009 assert_eq!(parsed.attachments[0].section.as_deref(), Some("2"));
2011 }
2012
2013 #[test]
2014 fn parse_rfc2231_filename() {
2015 let raw = b"From: a@b.com\r\n\
2016 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2017 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
2018 \r\n\
2019 --b\r\n\
2020 Content-Type: text/plain\r\n\
2021 \r\n\
2022 Body\r\n\
2023 --b\r\n\
2024 Content-Type: application/pdf\r\n\
2025 Content-Disposition: attachment; filename*=UTF-8''r%C3%A9sum%C3%A9.pdf\r\n\
2026 \r\n\
2027 DATA\r\n\
2028 --b--";
2029
2030 let parsed = parse_email(raw).unwrap();
2031 assert_eq!(parsed.attachments.len(), 1);
2032 assert_eq!(
2033 parsed.attachments[0].filename.as_deref(),
2034 Some("résumé.pdf")
2035 );
2036 }
2037
2038 #[test]
2039 fn parse_raw_headers_preserved() {
2040 let raw = b"From: a@b.com\r\n\
2041 Subject: Test\r\n\
2042 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2043 \r\n\
2044 Body";
2045
2046 let parsed = parse_email(raw).unwrap();
2047 assert!(parsed.raw_headers.contains("From: a@b.com"));
2048 assert!(parsed.raw_headers.contains("Subject: Test"));
2049 }
2050
2051 #[test]
2052 fn parse_lf_only_line_endings() {
2053 let raw = b"From: a@b.com\n\
2054 Subject: LF\n\
2055 Date: Thu, 13 Feb 2025 15:47:33 +0000\n\
2056 \n\
2057 Body with LF";
2058
2059 let parsed = parse_email(raw).unwrap();
2060 assert_eq!(parsed.subject.as_deref(), Some("LF"));
2061 assert_eq!(parsed.body_text.as_deref(), Some("Body with LF"));
2062 }
2063
2064 #[test]
2065 fn parse_header_continuation_lines() {
2066 let raw = b"From: a@b.com\r\nSubject: This is a very long\r\n subject line that wraps\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
2069
2070 let parsed = parse_email(raw).unwrap();
2071 assert_eq!(
2072 parsed.subject.as_deref(),
2073 Some("This is a very long subject line that wraps")
2074 );
2075 }
2076
2077 #[test]
2078 fn parse_garbage_input_best_effort() {
2079 let result = parse_email(b"\x00\x01\x02\x03\xff\xfe");
2081 assert!(result.is_err());
2082 }
2083
2084 #[test]
2085 fn parse_truncated_multipart() {
2086 let raw = b"From: a@b.com\r\n\
2088 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2089 Content-Type: multipart/mixed; boundary=\"trunc\"\r\n\
2090 \r\n\
2091 --trunc\r\n\
2092 Content-Type: text/plain\r\n\
2093 \r\n\
2094 Some text here";
2095
2096 let parsed = parse_email(raw).unwrap();
2097 assert_eq!(parsed.body_text.as_deref(), Some("Some text here"));
2098 }
2099
2100 #[test]
2101 fn decode_adjacent_encoded_words() {
2102 let input = "=?UTF-8?B?SGVs?= =?UTF-8?B?bG8=?=";
2104 let decoded = decode_encoded_words(input);
2105 assert_eq!(decoded, "Hello");
2106 }
2107
2108 #[test]
2109 fn decode_iso8859_encoded_word() {
2110 let input = "=?ISO-8859-1?Q?caf=E9?=";
2112 let decoded = decode_encoded_words(input);
2113 assert_eq!(decoded, "café");
2114 }
2115
2116 #[test]
2117 fn parse_date_without_seconds() {
2118 let dt = parse_rfc5322_date("Thu, 13 Feb 2025 15:47 +0000").unwrap();
2119 assert_eq!(dt.hour, 15);
2120 assert_eq!(dt.minute, 47);
2121 assert_eq!(dt.second, 0);
2122 }
2123
2124 #[test]
2125 fn parse_two_digit_year() {
2126 let dt = parse_rfc5322_date("13 Feb 99 12:00:00 +0000").unwrap();
2127 assert_eq!(dt.year, 1999);
2128
2129 let dt = parse_rfc5322_date("13 Feb 25 12:00:00 +0000").unwrap();
2130 assert_eq!(dt.year, 2025);
2131 }
2132
2133 #[test]
2134 fn parse_three_digit_year_rfc5322_section_4_3() {
2135 let dt = parse_rfc5322_date("13 Feb 107 12:00:00 +0000").unwrap();
2137 assert_eq!(
2138 dt.year, 2007,
2139 "3-digit year 107 must map to 2007 per RFC 5322 Section 4.3"
2140 );
2141
2142 let dt = parse_rfc5322_date("13 Feb 100 12:00:00 +0000").unwrap();
2143 assert_eq!(
2144 dt.year, 2000,
2145 "3-digit year 100 must map to 2000 per RFC 5322 Section 4.3"
2146 );
2147
2148 let dt = parse_rfc5322_date("13 Feb 999 12:00:00 +0000").unwrap();
2149 assert_eq!(
2150 dt.year, 2899,
2151 "3-digit year 999 must map to 2899 per RFC 5322 Section 4.3"
2152 );
2153 }
2154
2155 #[test]
2156 fn parse_two_digit_year_rfc5322_section_4_3_cutoff() {
2157 let dt = parse_rfc5322_date("13 Feb 50 12:00:00 +0000").unwrap();
2162 assert_eq!(
2163 dt.year, 1950,
2164 "2-digit year 50 must map to 1950 per RFC 5322 Section 4.3"
2165 );
2166
2167 let dt = parse_rfc5322_date("13 Feb 69 12:00:00 +0000").unwrap();
2169 assert_eq!(
2170 dt.year, 1969,
2171 "2-digit year 69 must map to 1969 per RFC 5322 Section 4.3"
2172 );
2173
2174 let dt = parse_rfc5322_date("13 Feb 49 12:00:00 +0000").unwrap();
2176 assert_eq!(
2177 dt.year, 2049,
2178 "2-digit year 49 must map to 2049 per RFC 5322 Section 4.3"
2179 );
2180 }
2181
2182 #[test]
2183 fn parse_non_text_part_is_attachment() {
2184 let raw = b"From: a@b.com\r\n\
2185 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2186 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
2187 \r\n\
2188 --b\r\n\
2189 Content-Type: text/plain\r\n\
2190 \r\n\
2191 Text\r\n\
2192 --b\r\n\
2193 Content-Type: image/jpeg\r\n\
2194 \r\n\
2195 JPEG_DATA\r\n\
2196 --b--";
2197
2198 let parsed = parse_email(raw).unwrap();
2199 assert_eq!(parsed.attachments.len(), 1);
2201 assert_eq!(parsed.attachments[0].content_type, "image/jpeg");
2202 }
2203
2204 #[test]
2205 fn parse_windows1252_body() {
2206 let raw = b"From: a@b.com\r\n\
2208 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2209 Content-Type: text/plain; charset=windows-1252\r\n\
2210 \r\n\
2211 \x93Hello\x94"; let parsed = parse_email(raw).unwrap();
2214 let text = parsed.body_text.unwrap();
2215 assert!(text.contains("Hello"));
2216 assert!(text.contains('\u{201c}') || text.contains('\u{201d}'));
2218 }
2219
2220 #[test]
2221 fn parse_html_only_body() {
2222 let raw = b"From: a@b.com\r\n\
2223 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2224 Content-Type: text/html; charset=utf-8\r\n\
2225 \r\n\
2226 <html><body>Hello</body></html>";
2227
2228 let parsed = parse_email(raw).unwrap();
2229 assert!(parsed.body_text.is_none());
2230 assert_eq!(
2231 parsed.body_html.as_deref(),
2232 Some("<html><body>Hello</body></html>")
2233 );
2234 }
2235
2236 #[test]
2237 fn parse_bcc_addresses() {
2238 let raw = b"From: a@b.com\r\n\
2239 To: to@x.com\r\n\
2240 Bcc: hidden@x.com, secret@x.com\r\n\
2241 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2242 \r\n";
2243
2244 let parsed = parse_email(raw).unwrap();
2245 assert_eq!(parsed.bcc.len(), 2);
2246 assert_eq!(parsed.bcc[0].email, "hidden@x.com");
2247 }
2248
2249 #[test]
2250 fn mime_depth_limit() {
2251 let mut msg = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2254 Content-Type: multipart/mixed; boundary=\"b0\"\r\n\r\n"
2255 .to_vec();
2256
2257 for i in 0..70 {
2258 msg.extend_from_slice(
2259 format!(
2260 "--b{i}\r\nContent-Type: multipart/mixed; boundary=\"b{}\"\r\n\r\n",
2261 i + 1
2262 )
2263 .as_bytes(),
2264 );
2265 }
2266 msg.extend_from_slice(b"--b70\r\nContent-Type: text/plain\r\n\r\nDeep\r\n--b70--\r\n");
2267
2268 let parsed = parse_email(&msg).unwrap();
2269 assert!(parsed.body_text.is_none() || parsed.body_text.is_some());
2271 }
2272
2273 #[test]
2274 fn parse_reply_to() {
2275 let raw = b"From: a@b.com\r\n\
2276 Reply-To: noreply@example.com, support@example.com\r\n\
2277 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2278 \r\n";
2279
2280 let parsed = parse_email(raw).unwrap();
2281 assert_eq!(parsed.reply_to.len(), 2);
2282 assert_eq!(parsed.reply_to[0].email, "noreply@example.com");
2283 assert_eq!(parsed.reply_to[1].email, "support@example.com");
2284 }
2285
2286 #[test]
2287 fn parse_gb2312_encoded_word() {
2288 let raw = b"From: sender@example.com\r\n\
2291 Subject: =?GB2312?B?xOO6ww==?=\r\n\
2292 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2293 \r\n";
2294
2295 let parsed = parse_email(raw).unwrap();
2296 assert_eq!(parsed.subject.as_deref(), Some("你好"));
2297 }
2298
2299 #[test]
2300 fn parse_content_id_strips_brackets() {
2301 let raw = b"From: a@b.com\r\n\
2302 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2303 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
2304 \r\n\
2305 --b\r\n\
2306 Content-Type: text/plain\r\n\
2307 \r\n\
2308 Body\r\n\
2309 --b\r\n\
2310 Content-Type: image/png\r\n\
2311 Content-ID: <cid:image001@01D00000.00000000>\r\n\
2312 \r\n\
2313 PNG\r\n\
2314 --b--";
2315
2316 let parsed = parse_email(raw).unwrap();
2317 assert_eq!(
2318 parsed.attachments[0].content_id.as_deref(),
2319 Some("cid:image001@01D00000.00000000")
2320 );
2321 }
2322
2323 #[test]
2324 fn parse_attachment_without_filename() {
2325 let raw = b"From: a@b.com\r\n\
2327 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2328 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
2329 \r\n\
2330 --b\r\n\
2331 Content-Type: text/plain\r\n\
2332 \r\n\
2333 Body\r\n\
2334 --b\r\n\
2335 Content-Type: application/octet-stream\r\n\
2336 Content-Disposition: attachment\r\n\
2337 \r\n\
2338 BINARY\r\n\
2339 --b--";
2340
2341 let parsed = parse_email(raw).unwrap();
2342 assert_eq!(parsed.attachments.len(), 1);
2343 assert!(parsed.attachments[0].filename.is_none());
2344 assert_eq!(
2345 parsed.attachments[0].content_type,
2346 "application/octet-stream"
2347 );
2348 assert!(!parsed.attachments[0].is_inline);
2349 }
2350
2351 #[test]
2352 fn parse_content_type_without_charset_defaults() {
2353 let raw = b"From: a@b.com\r\n\
2355 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2356 Content-Type: text/plain\r\n\
2357 \r\n\
2358 Hello ASCII";
2359
2360 let parsed = parse_email(raw).unwrap();
2361 assert_eq!(parsed.body_text.as_deref(), Some("Hello ASCII"));
2362 }
2363
2364 #[test]
2368 fn parse_mime_part_no_content_type_defaults_to_us_ascii() {
2369 let raw = b"From: a@b.com\r\n\
2372 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2373 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
2374 \r\n\
2375 --b\r\n\
2376 Content-Transfer-Encoding: 7bit\r\n\
2377 \r\n\
2378 Hello ASCII\r\n\
2379 --b--";
2380
2381 let parsed = parse_email(raw).unwrap();
2382 assert_eq!(parsed.body_text.as_deref(), Some("Hello ASCII"));
2383 }
2384
2385 #[test]
2390 fn parse_mime_part_no_headers_at_all() {
2391 let raw = b"From: a@b.com\r\n\
2392 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2393 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
2394 \r\n\
2395 --b\r\n\
2396 \r\n\
2397 Headerless body\r\n\
2398 --b--";
2399
2400 let parsed = parse_email(raw).unwrap();
2401 assert_eq!(parsed.body_text.as_deref(), Some("Headerless body"));
2403 }
2404
2405 #[test]
2406 fn parse_multipart_only_attachments() {
2407 let raw = b"From: a@b.com\r\n\
2409 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2410 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
2411 \r\n\
2412 --b\r\n\
2413 Content-Type: application/pdf\r\n\
2414 Content-Disposition: attachment; filename=\"a.pdf\"\r\n\
2415 \r\n\
2416 PDF1\r\n\
2417 --b\r\n\
2418 Content-Type: image/png\r\n\
2419 Content-Disposition: attachment; filename=\"b.png\"\r\n\
2420 \r\n\
2421 PNG2\r\n\
2422 --b--";
2423
2424 let parsed = parse_email(raw).unwrap();
2425 assert!(parsed.body_text.is_none());
2426 assert!(parsed.body_html.is_none());
2427 assert_eq!(parsed.attachments.len(), 2);
2428 assert_eq!(parsed.attachments[0].section.as_deref(), Some("1"));
2429 assert_eq!(parsed.attachments[1].section.as_deref(), Some("2"));
2430 }
2431
2432 #[test]
2433 fn parse_unknown_charset_body_fallback() {
2434 let raw = b"From: a@b.com\r\n\
2436 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2437 Content-Type: text/plain; charset=x-unknown-fake\r\n\
2438 \r\n\
2439 Plain text in unknown charset";
2440
2441 let parsed = parse_email(raw).unwrap();
2442 assert!(parsed.body_text.is_some());
2444 assert!(parsed.body_text.unwrap().contains("Plain text"));
2445 }
2446
2447 #[test]
2448 fn parse_content_id_without_disposition_is_inline() {
2449 let raw = b"From: a@b.com\r\n\
2451 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2452 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
2453 \r\n\
2454 --b\r\n\
2455 Content-Type: text/plain\r\n\
2456 \r\n\
2457 Body\r\n\
2458 --b\r\n\
2459 Content-Type: image/gif\r\n\
2460 Content-ID: <img42>\r\n\
2461 \r\n\
2462 GIF89a\r\n\
2463 --b--";
2464
2465 let parsed = parse_email(raw).unwrap();
2466 assert_eq!(parsed.attachments.len(), 1);
2467 assert!(parsed.attachments[0].is_inline);
2468 assert_eq!(parsed.attachments[0].content_id.as_deref(), Some("img42"));
2469 }
2470
2471 #[test]
2472 fn parse_overlong_subject() {
2473 let long_subject = "A".repeat(10_000);
2475 let raw = format!(
2476 "From: a@b.com\r\n\
2477 Subject: {long_subject}\r\n\
2478 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2479 \r\n"
2480 );
2481
2482 let parsed = parse_email(raw.as_bytes()).unwrap();
2483 assert_eq!(parsed.subject.as_deref(), Some(long_subject.as_str()));
2484 }
2485
2486 #[test]
2487 fn parse_multiple_from_takes_first() {
2488 let raw = b"From: first@example.com, second@example.com\r\n\
2490 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2491 \r\n";
2492
2493 let parsed = parse_email(raw).unwrap();
2494 assert_eq!(parsed.from.email, "first@example.com");
2495 }
2496
2497 #[test]
2498 fn parse_multipart_no_boundary_param() {
2499 let raw = b"From: a@b.com\r\n\
2501 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2502 Content-Type: multipart/mixed\r\n\
2503 \r\n\
2504 Some text content";
2505
2506 let parsed = parse_email(raw).unwrap();
2507 assert!(parsed.body_text.is_some());
2509 }
2510
2511 #[test]
2512 fn parse_empty_body_after_headers() {
2513 let raw = b"From: a@b.com\r\n\
2515 Subject: Empty body\r\n\
2516 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2517 \r\n";
2518
2519 let parsed = parse_email(raw).unwrap();
2520 assert_eq!(parsed.subject.as_deref(), Some("Empty body"));
2521 assert!(parsed.body_text.is_none());
2522 }
2523
2524 #[test]
2525 fn parse_mixed_charset_encoded_words() {
2526 let raw = b"From: a@b.com\r\n\
2528 Subject: =?UTF-8?B?SGVsbG8=?= =?ISO-8859-1?Q?_caf=E9?=\r\n\
2529 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2530 \r\n";
2531
2532 let parsed = parse_email(raw).unwrap();
2533 assert_eq!(parsed.subject.as_deref(), Some("Hello caf\u{e9}"));
2534 }
2535
2536 #[test]
2537 fn parse_no_date_header() {
2538 let raw = b"From: a@b.com\r\n\
2540 Subject: No date\r\n\
2541 \r\n\
2542 Body";
2543
2544 let parsed = parse_email(raw).unwrap();
2545 assert!(parsed.date.is_none());
2546 assert_eq!(parsed.subject.as_deref(), Some("No date"));
2547 }
2548
2549 #[test]
2550 fn parse_explicit_attachment_text_plain() {
2551 let raw = b"From: a@b.com\r\n\
2553 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2554 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
2555 \r\n\
2556 --b\r\n\
2557 Content-Type: text/plain\r\n\
2558 \r\n\
2559 Body text\r\n\
2560 --b\r\n\
2561 Content-Type: text/plain\r\n\
2562 Content-Disposition: attachment; filename=\"log.txt\"\r\n\
2563 \r\n\
2564 Log file content\r\n\
2565 --b--";
2566
2567 let parsed = parse_email(raw).unwrap();
2568 assert_eq!(parsed.body_text.as_deref(), Some("Body text"));
2569 assert_eq!(parsed.attachments.len(), 1);
2570 assert_eq!(parsed.attachments[0].filename.as_deref(), Some("log.txt"));
2571 assert_eq!(parsed.attachments[0].content_type, "text/plain");
2572 }
2573
2574 #[test]
2575 fn parse_date_negative_timezone() {
2576 let raw = b"From: a@b.com\r\n\
2577 Date: Fri, 14 Feb 2025 09:15:00 -0800\r\n\
2578 \r\n";
2579
2580 let parsed = parse_email(raw).unwrap();
2581 let date = parsed.date.unwrap();
2582 assert_eq!(date.tz_offset_minutes, -480);
2583 }
2584
2585 #[test]
2586 fn parse_size_equals_input_length() {
2587 let raw = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\nBody";
2588 let parsed = parse_email(raw).unwrap();
2589 assert_eq!(parsed.size, raw.len() as u64);
2590 }
2591
2592 #[test]
2593 fn parse_binary_garbage_returns_error() {
2594 let garbage: Vec<u8> = (0..=255_u8).collect();
2596 let result = parse_email(&garbage);
2597 assert!(result.is_err());
2598 }
2599
2600 #[test]
2601 fn parse_folded_encoded_word_subject() {
2602 let raw = b"From: a@b.com\r\nSubject: =?UTF-8?B?SGVsbG8=?=\r\n =?UTF-8?B?V29ybGQ=?=\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
2604
2605 let parsed = parse_email(raw).unwrap();
2606 assert_eq!(parsed.subject.as_deref(), Some("HelloWorld"));
2607 }
2608
2609 #[test]
2614 fn parse_encoded_word_lowercase_encoding() {
2615 let raw = b"From: sender@example.com\r\n\
2617 Subject: =?utf-8?b?SGVsbG8=?= =?utf-8?q?_World?=\r\n\
2618 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2619 \r\n";
2620
2621 let parsed = parse_email(raw).unwrap();
2622 assert_eq!(parsed.subject.as_deref(), Some("Hello World"));
2623 }
2624
2625 #[test]
2626 fn parse_malformed_encoded_word_passthrough() {
2627 let raw = b"From: a@b.com\r\n\
2629 Subject: =?UTF-8?B?broken\r\n\
2630 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2631 \r\n";
2632
2633 let parsed = parse_email(raw).unwrap();
2634 assert!(parsed.subject.is_some());
2636 assert!(parsed.subject.unwrap().contains("=?"));
2637 }
2638
2639 #[test]
2640 fn parse_encoded_word_unknown_encoding_type() {
2641 let raw = b"From: a@b.com\r\n\
2643 Subject: =?UTF-8?X?data?=\r\n\
2644 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2645 \r\n";
2646
2647 let parsed = parse_email(raw).unwrap();
2648 assert!(parsed.subject.is_some());
2649 assert!(parsed.subject.unwrap().contains("=?"));
2651 }
2652
2653 #[test]
2654 fn parse_utf8_directly_in_headers_rfc6532() {
2655 let raw = "From: José <jose@example.com>\r\n\
2657 Subject: Ñoño café\r\n\
2658 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2659 \r\n\
2660 Body";
2661
2662 let parsed = parse_email(raw.as_bytes()).unwrap();
2663 assert_eq!(parsed.subject.as_deref(), Some("Ñoño café"));
2664 assert_eq!(parsed.from.name.as_deref(), Some("José"));
2665 assert_eq!(parsed.from.email, "jose@example.com");
2666 }
2667
2668 #[test]
2669 fn parse_multipart_with_preamble() {
2670 let raw = b"From: a@b.com\r\n\
2672 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2673 Content-Type: multipart/mixed; boundary=\"preamble-test\"\r\n\
2674 \r\n\
2675 This is the preamble, which should be ignored.\r\n\
2676 --preamble-test\r\n\
2677 Content-Type: text/plain\r\n\
2678 \r\n\
2679 Actual body\r\n\
2680 --preamble-test--";
2681
2682 let parsed = parse_email(raw).unwrap();
2683 assert_eq!(parsed.body_text.as_deref(), Some("Actual body"));
2684 }
2685
2686 #[test]
2687 fn parse_attachment_name_from_content_type() {
2688 let raw = b"From: a@b.com\r\n\
2690 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2691 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
2692 \r\n\
2693 --b\r\n\
2694 Content-Type: text/plain\r\n\
2695 \r\n\
2696 Body\r\n\
2697 --b\r\n\
2698 Content-Type: application/pdf; name=\"report.pdf\"\r\n\
2699 Content-Disposition: attachment\r\n\
2700 \r\n\
2701 PDF\r\n\
2702 --b--";
2703
2704 let parsed = parse_email(raw).unwrap();
2705 assert_eq!(parsed.attachments.len(), 1);
2706 assert_eq!(
2707 parsed.attachments[0].filename.as_deref(),
2708 Some("report.pdf")
2709 );
2710 }
2711
2712 #[test]
2713 fn parse_qp_soft_break_lf_only() {
2714 let raw = b"From: a@b.com\r\n\
2716 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2717 Content-Type: text/plain; charset=utf-8\r\n\
2718 Content-Transfer-Encoding: quoted-printable\r\n\
2719 \r\n\
2720 Hello=\nWorld";
2721
2722 let parsed = parse_email(raw).unwrap();
2723 assert_eq!(parsed.body_text.as_deref(), Some("HelloWorld"));
2724 }
2725
2726 #[test]
2727 fn parse_subject_mixed_encoded_and_plain() {
2728 let raw = b"From: a@b.com\r\n\
2730 Subject: Re: =?UTF-8?B?SGVsbG8=?= there\r\n\
2731 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2732 \r\n";
2733
2734 let parsed = parse_email(raw).unwrap();
2735 assert_eq!(parsed.subject.as_deref(), Some("Re: Hello there"));
2736 }
2737
2738 #[test]
2739 fn parse_whitespace_only_body() {
2740 let raw = b"From: a@b.com\r\n\
2742 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2743 Content-Type: text/plain; charset=utf-8\r\n\
2744 \r\n\
2745 \r\n \r\n";
2746
2747 let parsed = parse_email(raw).unwrap();
2748 assert!(parsed.body_text.is_some());
2750 }
2751
2752 #[test]
2753 fn parse_date_missing_timezone() {
2754 let raw = b"From: a@b.com\r\n\
2756 Date: 13 Feb 2025 12:00:00\r\n\
2757 \r\n";
2758
2759 let parsed = parse_email(raw).unwrap();
2760 let date = parsed.date.unwrap();
2761 assert_eq!(date.year, 2025);
2762 assert_eq!(date.tz_offset_minutes, 0);
2763 }
2764
2765 #[test]
2766 fn parse_deeply_nested_section_dot_notation() {
2767 let raw = b"From: a@b.com\r\n\
2769 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2770 Content-Type: multipart/mixed; boundary=\"outer\"\r\n\
2771 \r\n\
2772 --outer\r\n\
2773 Content-Type: multipart/related; boundary=\"rel\"\r\n\
2774 \r\n\
2775 --rel\r\n\
2776 Content-Type: text/html\r\n\
2777 \r\n\
2778 <img src=\"cid:img1\">\r\n\
2779 --rel\r\n\
2780 Content-Type: image/png\r\n\
2781 Content-ID: <img1>\r\n\
2782 \r\n\
2783 PNG_DATA\r\n\
2784 --rel--\r\n\
2785 --outer\r\n\
2786 Content-Type: application/pdf\r\n\
2787 Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
2788 \r\n\
2789 PDF\r\n\
2790 --outer--";
2791
2792 let parsed = parse_email(raw).unwrap();
2793 assert!(parsed.body_html.is_some());
2795 assert!(parsed.body_html.unwrap().contains("cid:img1"));
2796 let inline_att = parsed
2798 .attachments
2799 .iter()
2800 .find(|a| a.content_type == "image/png")
2801 .unwrap();
2802 assert_eq!(inline_att.section.as_deref(), Some("1.2"));
2803 assert!(inline_att.is_inline);
2804 let pdf_att = parsed
2806 .attachments
2807 .iter()
2808 .find(|a| a.content_type == "application/pdf")
2809 .unwrap();
2810 assert_eq!(pdf_att.section.as_deref(), Some("2"));
2811 }
2812
2813 #[test]
2814 fn parse_non_ascii_bytes_in_body() {
2815 let raw = b"From: a@b.com\r\n\
2817 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2818 Content-Type: text/plain\r\n\
2819 \r\n\
2820 Hello \xff\xfe world";
2821
2822 let parsed = parse_email(raw).unwrap();
2823 assert!(parsed.body_text.is_some());
2825 }
2826
2827 #[test]
2828 fn parse_base64_body_with_line_breaks() {
2829 let raw = b"From: a@b.com\r\n\
2831 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2832 Content-Type: text/plain; charset=utf-8\r\n\
2833 Content-Transfer-Encoding: base64\r\n\
2834 \r\n\
2835 SGVs\r\nbG8g\r\nV29y\r\nbGQ=";
2836
2837 let parsed = parse_email(raw).unwrap();
2838 assert_eq!(parsed.body_text.as_deref(), Some("Hello World"));
2839 }
2840
2841 #[test]
2842 fn parse_date_extra_whitespace() {
2843 let raw = b"From: a@b.com\r\n\
2845 Date: Thu, 13 Feb 2025 15:47:33 +0000 \r\n\
2846 \r\n";
2847
2848 let parsed = parse_email(raw).unwrap();
2849 let date = parsed.date.unwrap();
2850 assert_eq!(date.year, 2025);
2851 assert_eq!(date.month, 2);
2852 assert_eq!(date.day, 13);
2853 }
2854
2855 #[test]
2856 fn parse_multipart_related_with_inline_images() {
2857 let raw = b"From: a@b.com\r\n\
2859 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2860 Content-Type: multipart/related; boundary=\"rel\"\r\n\
2861 \r\n\
2862 --rel\r\n\
2863 Content-Type: text/html\r\n\
2864 \r\n\
2865 <html><img src=\"cid:logo\"></html>\r\n\
2866 --rel\r\n\
2867 Content-Type: image/jpeg\r\n\
2868 Content-ID: <logo>\r\n\
2869 Content-Disposition: inline; filename=\"logo.jpg\"\r\n\
2870 \r\n\
2871 JPEG_DATA\r\n\
2872 --rel--";
2873
2874 let parsed = parse_email(raw).unwrap();
2875 assert!(parsed.body_html.is_some());
2876 assert_eq!(parsed.attachments.len(), 1);
2877 assert!(parsed.attachments[0].is_inline);
2878 assert_eq!(parsed.attachments[0].content_id.as_deref(), Some("logo"));
2879 assert_eq!(parsed.attachments[0].filename.as_deref(), Some("logo.jpg"));
2880 }
2881
2882 #[test]
2883 fn parse_minimal_message_from_only() {
2884 let raw = b"From: a@b.com\r\n\r\n";
2886
2887 let parsed = parse_email(raw).unwrap();
2888 assert_eq!(parsed.from.email, "a@b.com");
2889 assert!(parsed.subject.is_none());
2890 assert!(parsed.date.is_none());
2891 assert!(parsed.body_text.is_none());
2892 }
2893
2894 #[test]
2895 fn parse_multiple_same_headers() {
2896 let raw = b"From: first@example.com\r\n\
2898 From: second@example.com\r\n\
2899 Subject: First\r\n\
2900 Subject: Second\r\n\
2901 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2902 \r\n";
2903
2904 let parsed = parse_email(raw).unwrap();
2905 assert_eq!(parsed.from.email, "first@example.com");
2906 assert_eq!(parsed.subject.as_deref(), Some("First"));
2907 }
2908
2909 #[test]
2910 fn parse_date_all_named_timezones() {
2911 let test_cases = [
2912 ("EST", -300),
2913 ("EDT", -240),
2914 ("CST", -360),
2915 ("CDT", -300),
2916 ("MST", -420),
2917 ("MDT", -360),
2918 ("PST", -480),
2919 ("PDT", -420),
2920 ("GMT", 0),
2921 ("UTC", 0),
2922 ("UT", 0),
2923 ];
2924 for (tz_name, expected_offset) in test_cases {
2925 let raw = format!("From: a@b.com\r\nDate: Thu, 13 Feb 2025 12:00:00 {tz_name}\r\n\r\n");
2926 let parsed = parse_email(raw.as_bytes()).unwrap();
2927 let date = parsed.date.unwrap();
2928 assert_eq!(
2929 date.tz_offset_minutes, expected_offset,
2930 "Failed for timezone {tz_name}"
2931 );
2932 }
2933 }
2934
2935 #[test]
2936 fn parse_boundary_with_special_chars() {
2937 let raw = b"From: a@b.com\r\n\
2939 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2940 Content-Type: multipart/mixed; boundary=\"----=_Part_123+abc\"\r\n\
2941 \r\n\
2942 ------=_Part_123+abc\r\n\
2943 Content-Type: text/plain\r\n\
2944 \r\n\
2945 Body text\r\n\
2946 ------=_Part_123+abc--";
2947
2948 let parsed = parse_email(raw).unwrap();
2949 assert_eq!(parsed.body_text.as_deref(), Some("Body text"));
2950 }
2951
2952 #[test]
2953 fn parse_truncated_base64_body() {
2954 let raw = b"From: a@b.com\r\n\
2956 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2957 Content-Type: text/plain; charset=utf-8\r\n\
2958 Content-Transfer-Encoding: base64\r\n\
2959 \r\n\
2960 SGVsbG8gV29yb===invalid";
2961
2962 let parsed = parse_email(raw).unwrap();
2963 assert!(parsed.body_text.is_some());
2965 }
2966
2967 #[test]
2968 fn parse_address_group_syntax() {
2969 let raw = b"From: sender@example.com\r\n\
2972 To: Undisclosed:;\r\n\
2973 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2974 \r\n";
2975
2976 let parsed = parse_email(raw).unwrap();
2977 assert_eq!(parsed.from.email, "sender@example.com");
2979 }
2980
2981 #[test]
2982 fn parse_iso2022jp_encoded_word() {
2983 let raw = b"From: a@b.com\r\n\
2986 Subject: =?ISO-2022-JP?B?GyRCJUYlOSVIGyhC?=\r\n\
2987 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
2988 \r\n";
2989
2990 let parsed = parse_email(raw).unwrap();
2991 assert!(parsed.subject.is_some());
2993 assert_eq!(parsed.subject.as_deref(), Some("テスト"));
2994 }
2995
2996 #[test]
2997 fn parse_multipart_missing_parts_tolerance() {
2998 let raw = b"From: a@b.com\r\n\
3000 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3001 Content-Type: multipart/mixed; boundary=\"never-appears\"\r\n\
3002 \r\n\
3003 This body doesn't contain any boundaries at all.";
3004
3005 let parsed = parse_email(raw).unwrap();
3006 assert!(parsed.body_text.is_none());
3008 assert!(parsed.body_html.is_none());
3009 }
3010
3011 #[test]
3012 fn parse_encoded_word_in_multiple_header_types() {
3013 let raw = b"From: =?UTF-8?Q?M=C3=BCller?= <mueller@example.com>\r\n\
3015 To: =?UTF-8?B?U21pdGg=?= <smith@example.com>\r\n\
3016 Subject: =?UTF-8?Q?Caf=C3=A9?=\r\n\
3017 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3018 \r\n";
3019
3020 let parsed = parse_email(raw).unwrap();
3021 assert_eq!(parsed.from.name.as_deref(), Some("Müller"));
3022 assert_eq!(parsed.to[0].name.as_deref(), Some("Smith"));
3023 assert_eq!(parsed.subject.as_deref(), Some("Café"));
3024 }
3025
3026 #[test]
3027 fn parse_attachment_size_reflects_part_body() {
3028 let raw = b"From: a@b.com\r\n\
3030 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3031 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
3032 \r\n\
3033 --b\r\n\
3034 Content-Type: text/plain\r\n\
3035 \r\n\
3036 Body\r\n\
3037 --b\r\n\
3038 Content-Type: application/pdf\r\n\
3039 Content-Disposition: attachment; filename=\"f.pdf\"\r\n\
3040 \r\n\
3041 0123456789\r\n\
3042 --b--";
3043
3044 let parsed = parse_email(raw).unwrap();
3045 assert_eq!(parsed.attachments.len(), 1);
3046 assert_eq!(parsed.attachments[0].size, Some(10));
3047 }
3048
3049 #[test]
3050 fn parse_unquoted_boundary() {
3051 let raw = b"From: a@b.com\r\n\
3053 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3054 Content-Type: multipart/mixed; boundary=simple_boundary\r\n\
3055 \r\n\
3056 --simple_boundary\r\n\
3057 Content-Type: text/plain\r\n\
3058 \r\n\
3059 Text\r\n\
3060 --simple_boundary--";
3061
3062 let parsed = parse_email(raw).unwrap();
3063 assert_eq!(parsed.body_text.as_deref(), Some("Text"));
3064 }
3065
3066 #[test]
3067 fn parse_message_id_without_angle_brackets() {
3068 let raw = b"From: a@b.com\r\n\
3071 Message-ID: bare-id@host.com\r\n\
3072 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3073 \r\n";
3074
3075 let parsed = parse_email(raw).unwrap();
3076 assert_eq!(parsed.message_id.as_deref(), Some("bare-id@host.com"));
3077 }
3078
3079 #[test]
3080 fn parse_empty_references_header() {
3081 let raw = b"From: a@b.com\r\n\
3083 References: \r\n\
3084 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3085 \r\n";
3086
3087 let parsed = parse_email(raw).unwrap();
3088 assert!(parsed.references.is_none());
3089 }
3090
3091 #[test]
3092 fn parse_large_multipart_many_attachments() {
3093 let mut raw = b"From: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3095 Content-Type: multipart/mixed; boundary=\"multi\"\r\n\r\n"
3096 .to_vec();
3097
3098 raw.extend_from_slice(b"--multi\r\nContent-Type: text/plain\r\n\r\nBody\r\n");
3099 for i in 1..=5 {
3100 raw.extend_from_slice(
3101 format!(
3102 "--multi\r\nContent-Type: application/octet-stream\r\n\
3103 Content-Disposition: attachment; filename=\"file{i}.bin\"\r\n\r\n\
3104 DATA{i}\r\n"
3105 )
3106 .as_bytes(),
3107 );
3108 }
3109 raw.extend_from_slice(b"--multi--");
3110
3111 let parsed = parse_email(&raw).unwrap();
3112 assert_eq!(parsed.body_text.as_deref(), Some("Body"));
3113 assert_eq!(parsed.attachments.len(), 5);
3114 for (i, att) in parsed.attachments.iter().enumerate() {
3115 assert_eq!(
3116 att.section.as_deref(),
3117 Some(&(i + 2).to_string() as &str),
3118 "Wrong section for attachment {i}"
3119 );
3120 assert_eq!(
3121 att.filename.as_deref(),
3122 Some(&format!("file{}.bin", i + 1) as &str)
3123 );
3124 }
3125 }
3126
3127 #[test]
3128 fn parse_message_id_empty_brackets() {
3129 let raw = b"From: a@b.com\r\n\
3131 Message-ID: <>\r\n\
3132 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3133 \r\n";
3134
3135 let parsed = parse_email(raw).unwrap();
3136 assert!(parsed.message_id.is_none());
3137 }
3138
3139 #[test]
3140 fn parse_message_id_empty_value() {
3141 let raw = b"From: a@b.com\r\n\
3143 Message-ID: \r\n\
3144 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3145 \r\n";
3146
3147 let parsed = parse_email(raw).unwrap();
3148 assert!(parsed.message_id.is_none());
3149 }
3150
3151 #[test]
3152 fn rfc2231_param_boundary_check() {
3153 let disposition = "attachment; xfilename*=UTF-8''bad.pdf; filename*=UTF-8''good.pdf";
3156 let result = extract_rfc2231_param(disposition, "filename");
3157 assert_eq!(result.as_deref(), Some("good.pdf"));
3158 }
3159
3160 #[test]
3161 fn rfc2231_param_at_start() {
3162 let value = "filename*=UTF-8''test.pdf";
3164 let result = extract_rfc2231_param(value, "filename");
3165 assert_eq!(result.as_deref(), Some("test.pdf"));
3166 }
3167
3168 #[test]
3169 fn parse_quoted_display_name_with_comma() {
3170 let raw = b"From: \"Doe, John\" <john@example.com>\r\n\
3172 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3173 \r\n";
3174
3175 let parsed = parse_email(raw).unwrap();
3176 assert_eq!(parsed.from.name.as_deref(), Some("Doe, John"));
3177 assert_eq!(parsed.from.email, "john@example.com");
3178 }
3179
3180 #[test]
3181 fn parse_quoted_display_name_with_escaped_chars() {
3182 let raw = b"From: \"John \\\"Doc\\\" Doe\" <john@example.com>\r\n\
3184 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3185 \r\n";
3186
3187 let parsed = parse_email(raw).unwrap();
3188 assert_eq!(parsed.from.name.as_deref(), Some("John \"Doc\" Doe"));
3189 assert_eq!(parsed.from.email, "john@example.com");
3190 }
3191
3192 #[test]
3193 fn unescape_quoted_string_backslash() {
3194 assert_eq!(unescape_quoted_string("hello"), "hello");
3195 assert_eq!(unescape_quoted_string("a\\\\b"), "a\\b");
3196 assert_eq!(unescape_quoted_string("a\\\"b"), "a\"b");
3197 assert_eq!(unescape_quoted_string("trailing\\"), "trailing\\");
3198 }
3199
3200 #[test]
3201 fn parse_address_list_with_escaped_quotes_in_display_name() {
3202 let raw = b"From: a@b.com\r\n\
3205 To: \"A\\\"B\" <a@x.com>, c@d.com\r\n\
3206 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3207 \r\n";
3208
3209 let parsed = parse_email(raw).unwrap();
3210 assert_eq!(
3211 parsed.to.len(),
3212 2,
3213 "Expected 2 To addresses but got {:?}",
3214 parsed.to
3215 );
3216 assert_eq!(parsed.to[0].email, "a@x.com");
3217 assert_eq!(parsed.to[0].name.as_deref(), Some("A\"B"));
3218 assert_eq!(parsed.to[1].email, "c@d.com");
3219 }
3220
3221 #[test]
3222 fn parse_rfc2231_continuation_filename() {
3223 let raw = b"From: a@b.com\r\n\
3226 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3227 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
3228 \r\n\
3229 --b\r\n\
3230 Content-Type: text/plain\r\n\
3231 \r\n\
3232 Body\r\n\
3233 --b\r\n\
3234 Content-Type: application/pdf\r\n\
3235 Content-Disposition: attachment; filename*0=\"very_long_\"; filename*1=\"filename.pdf\"\r\n\
3236 \r\n\
3237 DATA\r\n\
3238 --b--";
3239
3240 let parsed = parse_email(raw).unwrap();
3241 assert_eq!(parsed.attachments.len(), 1);
3242 assert_eq!(
3243 parsed.attachments[0].filename.as_deref(),
3244 Some("very_long_filename.pdf"),
3245 "RFC 2231 continuation filename not reassembled"
3246 );
3247 }
3248
3249 #[test]
3250 fn parse_rfc2231_continuation_with_charset() {
3251 let raw = b"From: a@b.com\r\n\
3254 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3255 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
3256 \r\n\
3257 --b\r\n\
3258 Content-Type: text/plain\r\n\
3259 \r\n\
3260 Body\r\n\
3261 --b\r\n\
3262 Content-Type: application/pdf\r\n\
3263 Content-Disposition: attachment; filename*0*=UTF-8''r%C3%A9sum; filename*1*=%C3%A9.pdf\r\n\
3264 \r\n\
3265 DATA\r\n\
3266 --b--";
3267
3268 let parsed = parse_email(raw).unwrap();
3269 assert_eq!(parsed.attachments.len(), 1);
3270 assert_eq!(
3271 parsed.attachments[0].filename.as_deref(),
3272 Some("résumé.pdf"),
3273 "RFC 2231 continuation with charset not reassembled"
3274 );
3275 }
3276
3277 #[test]
3281 fn parse_address_comment_with_comma() {
3282 let raw = b"From: sender@example.com\r\n\
3283 To: user@example.com (Doe, John), other@example.com\r\n\
3284 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3285 \r\n";
3286
3287 let parsed = parse_email(raw).unwrap();
3288 assert_eq!(
3289 parsed.to.len(),
3290 2,
3291 "Expected 2 To addresses but got {:?}",
3292 parsed.to
3293 );
3294 assert_eq!(parsed.to[0].email, "user@example.com");
3295 assert_eq!(
3296 parsed.to[0].name.as_deref(),
3297 Some("Doe, John"),
3298 "Display name from comment should be preserved intact"
3299 );
3300 assert_eq!(parsed.to[1].email, "other@example.com");
3301 }
3302
3303 #[test]
3304 fn parse_header_unfolding_preserves_wsp() {
3305 let raw = b"From: a@b.com\r\nSubject: Hello\r\n\tWorld\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
3308
3309 let parsed = parse_email(raw).unwrap();
3310 assert_eq!(
3312 parsed.subject.as_deref(),
3313 Some("Hello\tWorld"),
3314 "Tab from continuation line should be preserved per RFC 5322 Section 2.2.3"
3315 );
3316 }
3317
3318 #[test]
3319 fn datetime_to_unix_timestamp() {
3320 use crate::types::DateTime;
3321
3322 let dt = DateTime {
3324 year: 2025,
3325 month: 2,
3326 day: 13,
3327 hour: 15,
3328 minute: 47,
3329 second: 33,
3330 tz_offset_minutes: 0,
3331 };
3332 assert_eq!(dt.to_unix_timestamp(), 1_739_461_653);
3334
3335 let dt_offset = DateTime {
3337 year: 2025,
3338 month: 2,
3339 day: 13,
3340 hour: 21,
3341 minute: 17,
3342 second: 33,
3343 tz_offset_minutes: 330,
3344 };
3345 assert_eq!(dt_offset.to_unix_timestamp(), dt.to_unix_timestamp());
3346 }
3347
3348 #[test]
3349 fn datetime_from_unix_timestamp() {
3350 use crate::types::DateTime;
3351
3352 let ts = 1_739_461_653_i64; let dt = DateTime::from_unix_timestamp(ts, 0);
3354 assert_eq!(dt.year, 2025);
3355 assert_eq!(dt.month, 2);
3356 assert_eq!(dt.day, 13);
3357 assert_eq!(dt.hour, 15);
3358 assert_eq!(dt.minute, 47);
3359 assert_eq!(dt.second, 33);
3360 assert_eq!(dt.tz_offset_minutes, 0);
3361
3362 let dt_offset = DateTime::from_unix_timestamp(ts, 330);
3364 assert_eq!(dt_offset.hour, 21);
3365 assert_eq!(dt_offset.minute, 17);
3366 }
3367
3368 #[test]
3369 fn datetime_round_trip_timestamp() {
3370 use crate::types::DateTime;
3371
3372 let dt = DateTime {
3373 year: 2025,
3374 month: 12,
3375 day: 31,
3376 hour: 23,
3377 minute: 59,
3378 second: 59,
3379 tz_offset_minutes: -480,
3380 };
3381 let ts = dt.to_unix_timestamp();
3382 let restored = DateTime::from_unix_timestamp(ts, -480);
3383 assert_eq!(dt, restored);
3384 }
3385
3386 #[test]
3387 fn datetime_ord_comparison() {
3388 use crate::types::DateTime;
3389
3390 let utc = DateTime {
3392 year: 2025,
3393 month: 1,
3394 day: 1,
3395 hour: 12,
3396 minute: 0,
3397 second: 0,
3398 tz_offset_minutes: 0,
3399 };
3400 let est = DateTime {
3401 year: 2025,
3402 month: 1,
3403 day: 1,
3404 hour: 7,
3405 minute: 0,
3406 second: 0,
3407 tz_offset_minutes: -300,
3408 };
3409 assert_eq!(utc.cmp(&est), std::cmp::Ordering::Equal);
3410
3411 let later = DateTime {
3413 year: 2025,
3414 month: 1,
3415 day: 1,
3416 hour: 13,
3417 minute: 0,
3418 second: 0,
3419 tz_offset_minutes: 0,
3420 };
3421 assert!(later > utc);
3422 }
3423
3424 #[test]
3425 fn datetime_epoch() {
3426 use crate::types::DateTime;
3427
3428 let epoch = DateTime::from_unix_timestamp(0, 0);
3429 assert_eq!(epoch.year, 1970);
3430 assert_eq!(epoch.month, 1);
3431 assert_eq!(epoch.day, 1);
3432 assert_eq!(epoch.hour, 0);
3433 assert_eq!(epoch.minute, 0);
3434 assert_eq!(epoch.second, 0);
3435 assert_eq!(epoch.to_unix_timestamp(), 0);
3436 }
3437
3438 #[test]
3439 fn parse_headers_only_extracts_metadata() {
3440 let raw = b"From: sender@example.com\r\n\
3441 To: recipient@example.com\r\n\
3442 Subject: Test\r\n\
3443 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3444 Message-ID: <abc123@example.com>\r\n\
3445 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
3446 \r\n\
3447 --b\r\n\
3448 Content-Type: text/plain\r\n\
3449 \r\n\
3450 This body should NOT be parsed\r\n\
3451 --b\r\n\
3452 Content-Type: application/pdf\r\n\
3453 Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
3454 \r\n\
3455 PDF_DATA\r\n\
3456 --b--";
3457
3458 let parsed = parse_headers_only(raw).unwrap();
3459
3460 assert_eq!(parsed.from.email, "sender@example.com");
3462 assert_eq!(parsed.to.len(), 1);
3463 assert_eq!(parsed.subject.as_deref(), Some("Test"));
3464 assert_eq!(parsed.message_id.as_deref(), Some("abc123@example.com"));
3465 assert!(parsed.date.is_some());
3466
3467 assert!(parsed.body_text.is_none());
3469 assert!(parsed.body_html.is_none());
3470 assert!(parsed.attachments.is_empty());
3471 }
3472
3473 #[test]
3474 fn parse_headers_only_empty_input() {
3475 let result = parse_headers_only(b"");
3476 assert!(matches!(result, Err(Error::EmptyInput)));
3477 }
3478
3479 #[test]
3480 fn parse_headers_only_missing_from() {
3481 let raw = b"Subject: No From\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
3482 let result = parse_headers_only(raw);
3483 assert!(matches!(result, Err(Error::MissingFrom)));
3484 }
3485
3486 #[test]
3491 fn extract_param_unescapes_backslash_in_filename() {
3492 let raw = b"From: a@b.com\r\n\
3495 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3496 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
3497 \r\n\
3498 --b\r\n\
3499 Content-Type: text/plain\r\n\
3500 \r\n\
3501 Body\r\n\
3502 --b\r\n\
3503 Content-Type: application/pdf\r\n\
3504 Content-Disposition: attachment; filename=\"path\\\\file.pdf\"\r\n\
3505 \r\n\
3506 DATA\r\n\
3507 --b--";
3508
3509 let parsed = parse_email(raw).unwrap();
3510 assert_eq!(parsed.attachments.len(), 1);
3511 assert_eq!(
3514 parsed.attachments[0].filename.as_deref(),
3515 Some("path\\file.pdf"),
3516 "Backslash in quoted-string filename must be unescaped per RFC 5322 Section 3.2.4"
3517 );
3518 }
3519
3520 #[test]
3521 fn extract_param_handles_escaped_quote_in_filename() {
3522 let raw = b"From: a@b.com\r\n\
3526 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3527 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
3528 \r\n\
3529 --b\r\n\
3530 Content-Type: text/plain\r\n\
3531 \r\n\
3532 Body\r\n\
3533 --b\r\n\
3534 Content-Type: application/pdf\r\n\
3535 Content-Disposition: attachment; filename=\"file\\\"name.pdf\"\r\n\
3536 \r\n\
3537 DATA\r\n\
3538 --b--";
3539
3540 let parsed = parse_email(raw).unwrap();
3541 assert_eq!(parsed.attachments.len(), 1);
3542 assert_eq!(
3545 parsed.attachments[0].filename.as_deref(),
3546 Some("file\"name.pdf"),
3547 "Escaped quote in quoted-string filename must be handled per RFC 5322 Section 3.2.4"
3548 );
3549 }
3550
3551 #[test]
3552 fn build_then_parse_filename_with_backslash_round_trip() {
3553 let email = crate::types::OutgoingEmail {
3557 from: Address {
3558 name: None,
3559 email: "a@b.com".into(),
3560 },
3561 to: vec![Address {
3562 name: None,
3563 email: "to@b.com".into(),
3564 }],
3565 cc: vec![],
3566 bcc: vec![],
3567 reply_to: None,
3568 subject: "test".into(),
3569 body_text: Some("Body".into()),
3570 body_html: None,
3571 in_reply_to: None,
3572 references: None,
3573 attachments: vec![crate::types::OutgoingAttachment {
3574 filename: "path\\file.pdf".into(),
3575 content_type: "application/pdf".into(),
3576 data: b"data".to_vec(),
3577 }],
3578 };
3579
3580 let built = crate::build_message(&email).unwrap();
3581 let parsed = parse_email(&built.raw).unwrap();
3582
3583 assert_eq!(parsed.attachments.len(), 1);
3584 assert_eq!(
3585 parsed.attachments[0].filename.as_deref(),
3586 Some("path\\file.pdf"),
3587 "Round-trip filename with backslash must be preserved"
3588 );
3589 }
3590
3591 #[test]
3592 fn build_then_parse_filename_with_quote_round_trip() {
3593 let email = crate::types::OutgoingEmail {
3595 from: Address {
3596 name: None,
3597 email: "a@b.com".into(),
3598 },
3599 to: vec![Address {
3600 name: None,
3601 email: "to@b.com".into(),
3602 }],
3603 cc: vec![],
3604 bcc: vec![],
3605 reply_to: None,
3606 subject: "test".into(),
3607 body_text: Some("Body".into()),
3608 body_html: None,
3609 in_reply_to: None,
3610 references: None,
3611 attachments: vec![crate::types::OutgoingAttachment {
3612 filename: "file\"name.pdf".into(),
3613 content_type: "application/pdf".into(),
3614 data: b"data".to_vec(),
3615 }],
3616 };
3617
3618 let built = crate::build_message(&email).unwrap();
3619 let parsed = parse_email(&built.raw).unwrap();
3620
3621 assert_eq!(parsed.attachments.len(), 1);
3622 assert_eq!(
3623 parsed.attachments[0].filename.as_deref(),
3624 Some("file\"name.pdf"),
3625 "Round-trip filename with double-quote must be preserved"
3626 );
3627 }
3628
3629 #[test]
3634 fn extract_param_with_non_ascii_before_param() {
3635 let header_value = "attachment; description=\"\u{0130}stanbul\"; filename=\"report.pdf\"";
3645 let result = extract_param(header_value, "filename");
3646 assert_eq!(
3647 result.as_deref(),
3648 Some("report.pdf"),
3649 "extract_param must work when non-ASCII chars that change byte length \
3650 under Unicode lowercasing appear before the target parameter (RFC 6532)"
3651 );
3652 }
3653
3654 #[test]
3659 fn parse_date_rejects_invalid_hour() {
3660 let raw = b"From: a@b.com\r\n\
3662 Date: Thu, 13 Feb 2025 25:00:00 +0000\r\n\
3663 \r\n";
3664 let parsed = parse_email(raw).unwrap();
3665 assert!(
3667 parsed.date.is_none(),
3668 "Date with hour=25 should be rejected per RFC 5322 Section 3.3"
3669 );
3670 }
3671
3672 #[test]
3673 fn parse_date_rejects_invalid_minute() {
3674 let raw = b"From: a@b.com\r\n\
3676 Date: Thu, 13 Feb 2025 12:60:00 +0000\r\n\
3677 \r\n";
3678 let parsed = parse_email(raw).unwrap();
3679 assert!(
3680 parsed.date.is_none(),
3681 "Date with minute=60 should be rejected per RFC 5322 Section 3.3"
3682 );
3683 }
3684
3685 #[test]
3686 fn parse_date_rejects_invalid_second() {
3687 let raw = b"From: a@b.com\r\n\
3689 Date: Thu, 13 Feb 2025 12:00:61 +0000\r\n\
3690 \r\n";
3691 let parsed = parse_email(raw).unwrap();
3692 assert!(
3693 parsed.date.is_none(),
3694 "Date with second=61 should be rejected per RFC 5322 Section 3.3"
3695 );
3696 }
3697
3698 #[test]
3699 fn parse_date_rejects_invalid_day() {
3700 let raw = b"From: a@b.com\r\n\
3702 Date: Thu, 32 Feb 2025 12:00:00 +0000\r\n\
3703 \r\n";
3704 let parsed = parse_email(raw).unwrap();
3705 assert!(
3706 parsed.date.is_none(),
3707 "Date with day=32 should be rejected per RFC 5322 Section 3.3"
3708 );
3709 }
3710
3711 #[test]
3712 fn parse_date_rejects_day_zero() {
3713 let raw = b"From: a@b.com\r\n\
3715 Date: Thu, 00 Feb 2025 12:00:00 +0000\r\n\
3716 \r\n";
3717 let parsed = parse_email(raw).unwrap();
3718 assert!(
3719 parsed.date.is_none(),
3720 "Date with day=0 should be rejected per RFC 5322 Section 3.3"
3721 );
3722 }
3723
3724 #[test]
3725 fn parse_date_allows_leap_second() {
3726 let raw = b"From: a@b.com\r\n\
3728 Date: Thu, 30 Jun 2015 23:59:60 +0000\r\n\
3729 \r\n";
3730 let parsed = parse_email(raw).unwrap();
3731 let date = parsed.date.expect("Leap second (60) should be accepted");
3732 assert_eq!(date.second, 60);
3733 }
3734
3735 #[test]
3736 fn parse_date_with_comment_between_tokens() {
3737 let raw = b"From: a@b.com\r\n\
3742 Date: Thu, 13 (February) Feb 2025 15:47:33 +0000\r\n\
3743 \r\n";
3744
3745 let parsed = parse_email(raw).unwrap();
3746 let date = parsed
3747 .date
3748 .expect("Date with CFWS comment must parse per RFC 5322 Section 4.3");
3749 assert_eq!(date.year, 2025);
3750 assert_eq!(date.month, 2);
3751 assert_eq!(date.day, 13);
3752 }
3753
3754 #[test]
3755 fn parse_date_with_trailing_comment() {
3756 let raw = b"From: a@b.com\r\n\
3759 Date: Thu, 13 Feb 2025 15:47:33 +0000 (UTC)\r\n\
3760 \r\n";
3761
3762 let parsed = parse_email(raw).unwrap();
3763 let date = parsed.date.expect("Date with trailing comment must parse");
3764 assert_eq!(date.year, 2025);
3765 assert_eq!(date.tz_offset_minutes, 0);
3766 }
3767
3768 #[test]
3769 fn parse_date_with_nested_comments() {
3770 let raw = b"From: a@b.com\r\n\
3772 Date: Thu, 13 Feb 2025 15:47:33 +0000 (UTC (nested))\r\n\
3773 \r\n";
3774
3775 let parsed = parse_email(raw).unwrap();
3776 let date = parsed.date.expect("Date with nested comment must parse");
3777 assert_eq!(date.year, 2025);
3778 }
3779
3780 #[test]
3781 fn parse_display_name_ending_with_escaped_quote() {
3782 let raw = b"From: \"She said \\\"hello\\\"\" <she@example.com>\r\n\
3787 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3788 \r\n";
3789
3790 let parsed = parse_email(raw).unwrap();
3791 assert_eq!(
3792 parsed.from.name.as_deref(),
3793 Some("She said \"hello\""),
3794 "Display name ending with escaped quote must be parsed correctly \
3795 per RFC 5322 Section 3.2.4"
3796 );
3797 }
3798
3799 #[test]
3800 fn address_from_str_ending_with_escaped_quote() {
3801 let addr: Address = "\"She said \\\"hello\\\"\" <she@example.com>"
3803 .parse()
3804 .unwrap();
3805 assert_eq!(
3806 addr.name.as_deref(),
3807 Some("She said \"hello\""),
3808 "Address::from_str must handle display names ending with escaped quotes"
3809 );
3810 }
3811
3812 #[test]
3813 fn boundary_must_be_at_line_start() {
3814 let raw = b"From: a@b.com\r\n\
3819 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3820 Content-Type: multipart/mixed; boundary=\"BOUND\"\r\n\
3821 \r\n\
3822 --BOUND\r\n\
3823 Content-Type: text/plain\r\n\
3824 \r\n\
3825 This line mentions --BOUND in the middle\r\n\
3826 --BOUND--";
3827
3828 let parsed = parse_email(raw).unwrap();
3829 let text = parsed.body_text.as_deref().unwrap_or("");
3832 assert!(
3833 text.contains("--BOUND"),
3834 "Mid-line boundary must be treated as literal text per RFC 2046 Section 5.1.1, \
3835 but body_text was: {text:?}"
3836 );
3837 }
3838
3839 #[test]
3840 fn mime_type_exact_match_not_prefix() {
3841 let raw = b"From: a@b.com\r\n\
3845 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3846 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
3847 \r\n\
3848 --b\r\n\
3849 Content-Type: text/plaintext\r\n\
3850 \r\n\
3851 Not really plain text\r\n\
3852 --b--";
3853
3854 let parsed = parse_email(raw).unwrap();
3855 assert!(
3858 parsed.body_text.is_none(),
3859 "text/plaintext must not be treated as text/plain body"
3860 );
3861 assert_eq!(
3862 parsed.attachments.len(),
3863 1,
3864 "text/plaintext should be treated as an attachment"
3865 );
3866 }
3867
3868 #[test]
3869 fn parse_single_part_non_text_is_attachment() {
3870 let raw = b"From: a@b.com\r\n\
3874 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3875 Content-Type: image/jpeg\r\n\
3876 Content-Transfer-Encoding: base64\r\n\
3877 \r\n\
3878 /9j/4AAQSkZJRg==";
3879
3880 let parsed = parse_email(raw).unwrap();
3881 assert!(
3882 parsed.body_text.is_none(),
3883 "image/jpeg single-part must not populate body_text"
3884 );
3885 assert_eq!(
3886 parsed.attachments.len(),
3887 1,
3888 "image/jpeg single-part must be treated as an attachment"
3889 );
3890 assert_eq!(parsed.attachments[0].content_type, "image/jpeg");
3891 assert_eq!(parsed.attachments[0].section.as_deref(), Some("1"));
3892 }
3893
3894 #[test]
3895 fn parse_single_part_application_pdf_is_attachment() {
3896 let raw = b"From: a@b.com\r\n\
3898 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3899 Content-Type: application/pdf; name=\"doc.pdf\"\r\n\
3900 Content-Disposition: attachment; filename=\"doc.pdf\"\r\n\
3901 Content-Transfer-Encoding: base64\r\n\
3902 \r\n\
3903 JVBERi0xLjQK";
3904
3905 let parsed = parse_email(raw).unwrap();
3906 assert!(
3907 parsed.body_text.is_none(),
3908 "application/pdf must not populate body_text"
3909 );
3910 assert_eq!(parsed.attachments.len(), 1);
3911 assert_eq!(parsed.attachments[0].content_type, "application/pdf");
3912 assert_eq!(parsed.attachments[0].filename.as_deref(), Some("doc.pdf"));
3913 assert!(!parsed.attachments[0].is_inline);
3914 }
3915
3916 #[test]
3917 fn parse_single_part_text_plain_with_attachment_disposition() {
3918 let raw = b"From: a@b.com\r\n\
3922 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3923 Content-Type: text/plain; charset=utf-8\r\n\
3924 Content-Disposition: attachment; filename=\"log.txt\"\r\n\
3925 \r\n\
3926 Server log data here";
3927
3928 let parsed = parse_email(raw).unwrap();
3929 assert!(
3930 parsed.body_text.is_none(),
3931 "text/plain with disposition:attachment must not populate body_text"
3932 );
3933 assert_eq!(parsed.attachments.len(), 1);
3934 assert_eq!(parsed.attachments[0].content_type, "text/plain");
3935 assert_eq!(parsed.attachments[0].filename.as_deref(), Some("log.txt"));
3936 }
3937
3938 #[test]
3939 fn parse_group_address_empty_undisclosed() {
3940 let raw = b"From: a@b.com\r\n\
3944 To: undisclosed-recipients:;\r\n\
3945 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3946 \r\n";
3947
3948 let parsed = parse_email(raw).unwrap();
3949 assert!(
3951 parsed.to.is_empty(),
3952 "empty group undisclosed-recipients:; must produce no addresses, got {:?}",
3953 parsed.to
3954 );
3955 }
3956
3957 #[test]
3958 fn parse_group_address_with_members() {
3959 let raw = b"From: a@b.com\r\n\
3962 To: friends:one@x.com, two@x.com;\r\n\
3963 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3964 \r\n";
3965
3966 let parsed = parse_email(raw).unwrap();
3967 assert_eq!(
3968 parsed.to.len(),
3969 2,
3970 "group with 2 members must produce 2 addresses, got {:?}",
3971 parsed.to
3972 );
3973 assert_eq!(parsed.to[0].email, "one@x.com");
3974 assert_eq!(parsed.to[1].email, "two@x.com");
3975 }
3976
3977 #[test]
3978 fn parse_group_address_mixed_with_regular() {
3979 let raw = b"From: a@b.com\r\n\
3981 To: solo@x.com, friends:one@x.com, two@x.com;, last@x.com\r\n\
3982 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
3983 \r\n";
3984
3985 let parsed = parse_email(raw).unwrap();
3986 let emails: Vec<&str> = parsed.to.iter().map(|a| a.email.as_str()).collect();
3987 assert_eq!(
3988 emails,
3989 vec!["solo@x.com", "one@x.com", "two@x.com", "last@x.com"],
3990 "must extract all 4 addresses from mixed regular+group syntax"
3991 );
3992 }
3993
3994 #[test]
3995 fn decode_qp_trailing_equals_is_soft_break() {
3996 let result = decode_quoted_printable(b"Hello=");
3999 assert_eq!(
4000 result, b"Hello",
4001 "trailing '=' must be treated as soft line break per RFC 2045 Section 6.7"
4002 );
4003 }
4004
4005 #[test]
4006 fn decode_qp_trailing_equals_cr_is_soft_break() {
4007 let result = decode_quoted_printable(b"Hello=\r");
4010 assert_eq!(
4011 result, b"Hello",
4012 "trailing '=\\r' must be treated as soft line break"
4013 );
4014 }
4015
4016 #[test]
4017 fn parse_bare_address_with_trailing_comment() {
4018 let raw = b"From: sender@example.com\r\n\
4023 To: user@example.com (Display Name)\r\n\
4024 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4025 \r\n";
4026
4027 let parsed = parse_email(raw).unwrap();
4028 assert_eq!(parsed.to.len(), 1);
4029 assert_eq!(
4030 parsed.to[0].email, "user@example.com",
4031 "email must not contain the trailing comment"
4032 );
4033 assert_eq!(
4034 parsed.to[0].name.as_deref(),
4035 Some("Display Name"),
4036 "trailing comment should become display name per RFC 5322 Section 3.4.1"
4037 );
4038 }
4039
4040 #[test]
4041 fn parse_bare_address_with_leading_comment() {
4042 let raw = b"From: sender@example.com\r\n\
4046 To: (Comment) user@example.com\r\n\
4047 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4048 \r\n";
4049
4050 let parsed = parse_email(raw).unwrap();
4051 assert_eq!(parsed.to.len(), 1);
4052 assert_eq!(
4053 parsed.to[0].email, "user@example.com",
4054 "email must not contain the leading comment"
4055 );
4056 assert_eq!(
4057 parsed.to[0].name, None,
4058 "leading comment must not become display name"
4059 );
4060 }
4061
4062 #[test]
4063 fn extract_param_skips_quoted_values() {
4064 let header = "text/html; boundary=\"has charset=bad inside\"; charset=utf-8";
4067 let charset = extract_param(header, "charset");
4068 assert_eq!(
4069 charset.as_deref(),
4070 Some("utf-8"),
4071 "Should skip match inside quoted boundary value"
4072 );
4073 }
4074
4075 #[test]
4076 fn multipart_part_without_charset_uses_us_ascii_default() {
4077 let raw = b"From: a@b.com\r\n\
4085 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4086 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
4087 \r\n\
4088 --b\r\n\
4089 Content-Type: text/plain\r\n\
4090 \r\n\
4091 Hello \x93World\r\n\
4092 --b--";
4093
4094 let parsed = parse_email(raw).unwrap();
4095 let text = parsed.body_text.unwrap();
4096
4097 assert!(
4100 text.contains('\u{201c}'),
4101 "Part with text/plain (no charset) should use US-ASCII default per \
4102 RFC 2045 Section 5.2, decoding 0x93 as U+201C. Got: {text:?}"
4103 );
4104 assert!(
4105 !text.contains('\u{FFFD}'),
4106 "Part with text/plain (no charset) should not produce UTF-8 replacement \
4107 characters. Got: {text:?}"
4108 );
4109 }
4110
4111 #[test]
4119 fn parse_header_unfold_preserves_trailing_whitespace() {
4120 let raw = b"From: a@b.com\r\nSubject: Hello \r\n World\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
4123 let parsed = parse_email(raw).unwrap();
4124 assert_eq!(
4125 parsed.subject.as_deref(),
4126 Some("Hello World"),
4127 "Trailing whitespace on first line must be preserved during unfolding \
4128 (RFC 5322 Section 2.2.3)"
4129 );
4130 }
4131
4132 #[test]
4138 fn parse_single_part_body_no_trailing_crlf() {
4139 let raw = b"From: a@b.com\r\n\
4141 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4142 Content-Type: text/plain; charset=utf-8\r\n\
4143 \r\n\
4144 Hello, World!\r\n";
4145
4146 let parsed = parse_email(raw).unwrap();
4147 assert_eq!(
4148 parsed.body_text.as_deref(),
4149 Some("Hello, World!"),
4150 "Single-part body text must not include trailing CRLF"
4151 );
4152 }
4153
4154 #[test]
4156 fn parse_single_part_html_no_trailing_crlf() {
4157 let raw = b"From: a@b.com\r\n\
4158 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4159 Content-Type: text/html; charset=utf-8\r\n\
4160 \r\n\
4161 <p>Hello</p>\r\n";
4162
4163 let parsed = parse_email(raw).unwrap();
4164 assert_eq!(
4165 parsed.body_html.as_deref(),
4166 Some("<p>Hello</p>"),
4167 "Single-part HTML body must not include trailing CRLF"
4168 );
4169 }
4170
4171 #[test]
4173 fn round_trip_single_part_body_text() {
4174 let email = crate::types::OutgoingEmail {
4175 from: crate::types::Address {
4176 name: None,
4177 email: "a@b.com".into(),
4178 },
4179 to: vec![crate::types::Address {
4180 name: None,
4181 email: "c@d.com".into(),
4182 }],
4183 cc: vec![],
4184 bcc: vec![],
4185 reply_to: None,
4186 subject: "Test".into(),
4187 body_text: Some("Hello, World!".into()),
4188 body_html: None,
4189 in_reply_to: None,
4190 references: None,
4191 attachments: vec![],
4192 };
4193
4194 let built = crate::build_message(&email).unwrap();
4195 let parsed = parse_email(&built.raw).unwrap();
4196 assert_eq!(
4197 parsed.body_text.as_deref(),
4198 Some("Hello, World!"),
4199 "Single-part body text must round-trip without trailing CRLF"
4200 );
4201 }
4202
4203 #[test]
4213 fn parse_encoded_word_display_name_with_comma() {
4214 let raw = b"From: =?UTF-8?B?Sm9obiwgRG9l?= <john@example.com>\r\n\
4217 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4218 \r\n";
4219
4220 let parsed = parse_email(raw).unwrap();
4221 assert_eq!(
4222 parsed.from.name.as_deref(),
4223 Some("John, Doe"),
4224 "RFC 2047 encoded display name with comma must be preserved \
4225 (RFC 2047 Section 5 rule 3): decode AFTER address parsing"
4226 );
4227 assert_eq!(parsed.from.email, "john@example.com");
4228 }
4229
4230 #[test]
4235 fn parse_base64_body_ignores_non_alphabet_chars() {
4236 let raw = b"From: a@b.com\r\n\
4240 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4241 Content-Type: text/plain; charset=utf-8\r\n\
4242 Content-Transfer-Encoding: base64\r\n\
4243 \r\n\
4244 SGVs!bG8#gV29~ybGQ=";
4245
4246 let parsed = parse_email(raw).unwrap();
4247 assert_eq!(
4248 parsed.body_text.as_deref(),
4249 Some("Hello World"),
4250 "RFC 2045 Section 6.8: non-alphabet characters must be ignored in base64 data"
4251 );
4252 }
4253
4254 #[test]
4257 fn parse_encoded_word_display_name_with_comma_in_to() {
4258 let raw = b"From: sender@example.com\r\n\
4261 To: =?UTF-8?B?Sm9obiwgRG9l?= <john@example.com>, other@example.com\r\n\
4262 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4263 \r\n";
4264
4265 let parsed = parse_email(raw).unwrap();
4266 assert_eq!(
4267 parsed.to.len(),
4268 2,
4269 "Must parse exactly 2 addresses, not 3 (encoded comma is not a separator)"
4270 );
4271 assert_eq!(
4272 parsed.to[0].name.as_deref(),
4273 Some("John, Doe"),
4274 "First recipient display name must be 'John, Doe'"
4275 );
4276 assert_eq!(parsed.to[0].email, "john@example.com");
4277 assert_eq!(parsed.to[1].email, "other@example.com");
4278 }
4279
4280 #[test]
4294 fn round_trip_empty_body_text_is_none() {
4295 let email = crate::types::OutgoingEmail {
4298 from: crate::types::Address {
4299 name: None,
4300 email: "a@b.com".into(),
4301 },
4302 to: vec![crate::types::Address {
4303 name: None,
4304 email: "c@d.com".into(),
4305 }],
4306 cc: vec![],
4307 bcc: vec![],
4308 reply_to: None,
4309 subject: "Empty body".into(),
4310 body_text: None,
4311 body_html: None,
4312 in_reply_to: None,
4313 references: None,
4314 attachments: vec![crate::types::OutgoingAttachment {
4315 filename: "test.txt".into(),
4316 content_type: "text/plain".into(),
4317 data: b"attachment data".to_vec(),
4318 }],
4319 };
4320
4321 let built = crate::build_message(&email).unwrap();
4322 let parsed = parse_email(&built.raw).unwrap();
4323
4324 assert_eq!(
4325 parsed.body_text, None,
4326 "Empty body_text must round-trip as None, not Some(\"\")"
4327 );
4328 }
4329
4330 #[test]
4336 fn round_trip_empty_body_html_in_alternative_is_none() {
4337 let email = crate::types::OutgoingEmail {
4340 from: crate::types::Address {
4341 name: None,
4342 email: "a@b.com".into(),
4343 },
4344 to: vec![crate::types::Address {
4345 name: None,
4346 email: "c@d.com".into(),
4347 }],
4348 cc: vec![],
4349 bcc: vec![],
4350 reply_to: None,
4351 subject: "Text only".into(),
4352 body_text: Some("Plain text".into()),
4353 body_html: Some(String::new()),
4354 in_reply_to: None,
4355 references: None,
4356 attachments: vec![],
4357 };
4358
4359 let built = crate::build_message(&email).unwrap();
4360 let parsed = parse_email(&built.raw).unwrap();
4361
4362 assert_eq!(
4363 parsed.body_html, None,
4364 "Empty body_html must parse as None, not Some(\"\")"
4365 );
4366 assert_eq!(
4367 parsed.body_text.as_deref(),
4368 Some("Plain text"),
4369 "body_text must be preserved"
4370 );
4371 }
4372
4373 #[test]
4374 fn extract_mime_type_strips_rfc5322_comments() {
4375 let raw = b"From: a@b.com\r\n\
4384 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4385 Content-Type: text/plain (this is a comment); charset=utf-8\r\n\
4386 \r\n\
4387 Hello with comment";
4388
4389 let parsed = parse_email(raw).unwrap();
4390 assert_eq!(
4391 parsed.body_text.as_deref(),
4392 Some("Hello with comment"),
4393 "Body must be extracted as body_text when Content-Type has an RFC 5322 comment"
4394 );
4395 assert!(
4396 parsed.attachments.is_empty(),
4397 "No attachments expected for a plain text/plain message with a comment"
4398 );
4399 }
4400
4401 #[test]
4406 fn multipart_digest_default_content_type_is_message_rfc822() {
4407 let raw = b"From: sender@example.com\r\n\
4408 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4409 Subject: Digest\r\n\
4410 Content-Type: multipart/digest; boundary=\"digestboundary\"\r\n\
4411 \r\n\
4412 --digestboundary\r\n\
4413 \r\n\
4414 From: nested@example.com\r\n\
4415 Subject: Nested message\r\n\
4416 \r\n\
4417 Nested body text\r\n\
4418 --digestboundary--\r\n";
4419
4420 let parsed = parse_email(raw).unwrap();
4421
4422 assert!(
4426 parsed.body_text.is_none(),
4427 "multipart/digest parts without Content-Type should default to \
4428 message/rfc822, not text/plain — body_text should be None"
4429 );
4430 assert_eq!(
4431 parsed.attachments.len(),
4432 1,
4433 "multipart/digest part should be treated as message/rfc822 attachment"
4434 );
4435 assert_eq!(
4436 parsed.attachments[0].content_type, "message/rfc822",
4437 "default Content-Type in multipart/digest must be message/rfc822 \
4438 (RFC 2046 Section 5.1.5)"
4439 );
4440 }
4441
4442 #[test]
4450 fn content_id_whitespace_inside_brackets_trimmed() {
4451 let raw = b"From: a@b.com\r\n\
4453 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4454 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
4455 \r\n\
4456 --b\r\n\
4457 Content-Type: text/plain\r\n\
4458 \r\n\
4459 Body\r\n\
4460 --b\r\n\
4461 Content-Type: image/png\r\n\
4462 Content-ID: < cid@example.com >\r\n\
4463 \r\n\
4464 PNG\r\n\
4465 --b--";
4466 let parsed = parse_email(raw).unwrap();
4467 assert_eq!(
4468 parsed.attachments[0].content_id.as_deref(),
4469 Some("cid@example.com"),
4470 "Content-ID must be trimmed after bracket stripping (RFC 2392)"
4471 );
4472
4473 let raw_single = b"From: a@b.com\r\n\
4475 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4476 Content-Type: image/png\r\n\
4477 Content-ID: < cid2@example.com >\r\n\
4478 \r\n\
4479 PNG";
4480 let parsed_single = parse_email(raw_single).unwrap();
4481 assert_eq!(
4482 parsed_single.attachments[0].content_id.as_deref(),
4483 Some("cid2@example.com"),
4484 "Content-ID in single-part message must be trimmed (RFC 2392)"
4485 );
4486 }
4487
4488 #[test]
4489 fn parse_headers_only_all_fields_verified() {
4490 let raw = b"From: sender@example.com\r\n\
4493 To: to@example.com\r\n\
4494 Cc: cc@example.com\r\n\
4495 Bcc: bcc@example.com\r\n\
4496 Reply-To: reply@example.com\r\n\
4497 Subject: Full test\r\n\
4498 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4499 Message-ID: <msg1@example.com>\r\n\
4500 In-Reply-To: <parent@example.com>\r\n\
4501 References: <ref1@example.com> <ref2@example.com>\r\n\
4502 \r\n\
4503 Body that should be ignored";
4504
4505 let parsed = parse_headers_only(raw).unwrap();
4506
4507 assert_eq!(parsed.from.email, "sender@example.com");
4508 assert_eq!(parsed.to.len(), 1);
4509 assert_eq!(parsed.to[0].email, "to@example.com");
4510 assert_eq!(parsed.cc.len(), 1);
4511 assert_eq!(parsed.cc[0].email, "cc@example.com");
4512 assert_eq!(parsed.bcc.len(), 1);
4513 assert_eq!(parsed.bcc[0].email, "bcc@example.com");
4514 assert_eq!(parsed.reply_to.len(), 1);
4515 assert_eq!(parsed.reply_to[0].email, "reply@example.com");
4516 assert_eq!(parsed.subject.as_deref(), Some("Full test"));
4517 assert!(parsed.date.is_some());
4518 assert_eq!(parsed.message_id.as_deref(), Some("msg1@example.com"));
4519 assert_eq!(parsed.in_reply_to.as_deref(), Some("parent@example.com"));
4520 assert_eq!(
4521 parsed.references.as_deref(),
4522 Some("ref1@example.com ref2@example.com")
4523 );
4524
4525 assert!(parsed.body_text.is_none());
4527 assert!(parsed.body_html.is_none());
4528 assert!(parsed.attachments.is_empty());
4529 }
4530
4531 #[test]
4532 fn parse_missing_optional_headers_returns_none_or_empty() {
4533 let raw = b"From: a@b.com\r\n\r\n";
4536
4537 let parsed = parse_email(raw).unwrap();
4538
4539 assert_eq!(parsed.from.email, "a@b.com");
4540 assert!(parsed.subject.is_none());
4541 assert!(parsed.date.is_none());
4542 assert!(parsed.message_id.is_none());
4543 assert!(parsed.in_reply_to.is_none());
4544 assert!(parsed.references.is_none());
4545 assert!(parsed.to.is_empty());
4546 assert!(parsed.cc.is_empty());
4547 assert!(parsed.bcc.is_empty());
4548 assert!(parsed.reply_to.is_empty());
4549 }
4550
4551 #[test]
4552 fn extract_param_rejects_substring_match() {
4553 let value = "attachment; xfilename=\"bad.pdf\"; filename=\"good.pdf\"";
4556 let result = extract_param(value, "filename");
4557 assert_eq!(
4558 result.as_deref(),
4559 Some("good.pdf"),
4560 "Must not match xfilename as filename"
4561 );
4562 }
4563
4564 #[test]
4565 fn extract_param_rejects_suffix_only_match() {
4566 let value = "attachment; notfilename=\"only.pdf\"";
4568 let result = extract_param(value, "filename");
4569 assert!(
4570 result.is_none(),
4571 "Must not match 'filename' inside 'notfilename'"
4572 );
4573 }
4574
4575 #[test]
4582 fn parse_group_address_empty() {
4583 let addrs = parse_address_list("undisclosed-recipients:;");
4584 assert!(
4585 addrs.is_empty(),
4586 "empty group must produce no addresses, got {addrs:?}"
4587 );
4588 }
4589
4590 #[test]
4592 fn parse_group_address_with_two_members() {
4593 let addrs = parse_address_list("Friends: a@x.com, b@x.com;");
4594 assert_eq!(addrs.len(), 2, "group with 2 members: {addrs:?}");
4595 assert_eq!(addrs[0].email, "a@x.com");
4596 assert_eq!(addrs[1].email, "b@x.com");
4597 }
4598
4599 #[test]
4601 fn parse_multiple_groups_and_solo() {
4602 let addrs =
4603 parse_address_list("Team A: a1@x.com, a2@x.com;, Team B: b1@x.com;, solo@x.com");
4604 assert_eq!(addrs.len(), 4, "2 groups + 1 solo: {addrs:?}");
4605 assert_eq!(addrs[0].email, "a1@x.com");
4606 assert_eq!(addrs[1].email, "a2@x.com");
4607 assert_eq!(addrs[2].email, "b1@x.com");
4608 assert_eq!(addrs[3].email, "solo@x.com");
4609 }
4610
4611 #[test]
4614 fn parse_address_comment_with_comma_audit() {
4615 let addrs = parse_address_list("user@x.com (Last, First), other@x.com");
4616 assert_eq!(
4617 addrs.len(),
4618 2,
4619 "comma inside comment must not split: {addrs:?}"
4620 );
4621 assert_eq!(addrs[0].email, "user@x.com");
4622 assert_eq!(addrs[1].email, "other@x.com");
4623 }
4624
4625 #[test]
4632 fn rfc2231_continuation_gap_stops() {
4633 let header = "attachment; filename*0=\"hello\"; filename*2=\"skipped\"";
4634 let result = extract_rfc2231_continuation(header, "filename");
4636 assert_eq!(
4637 result.as_deref(),
4638 Some("hello"),
4639 "continuation must stop at missing section index"
4640 );
4641 }
4642
4643 #[test]
4645 fn rfc2231_continuation_single_section() {
4646 let header = "attachment; filename*0=\"report.pdf\"";
4647 let result = extract_rfc2231_continuation(header, "filename");
4648 assert_eq!(result.as_deref(), Some("report.pdf"));
4649 }
4650
4651 #[test]
4659 fn rfc2231_continuation_no_charset_defaults_to_utf8() {
4660 let raw = b"From: a@b.com\r\n\
4663 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4664 Content-Type: multipart/mixed; boundary=\"b\"\r\n\
4665 \r\n\
4666 --b\r\n\
4667 Content-Type: text/plain\r\n\
4668 \r\n\
4669 Body\r\n\
4670 --b\r\n\
4671 Content-Type: application/octet-stream\r\n\
4672 Content-Disposition: attachment; filename*0=\"annual_\"; filename*1=\"report_\"; filename*2=\"2025.pdf\"\r\n\
4673 \r\n\
4674 DATA\r\n\
4675 --b--";
4676
4677 let parsed = parse_email(raw).unwrap();
4678 assert_eq!(parsed.attachments.len(), 1);
4679 assert_eq!(
4680 parsed.attachments[0].filename.as_deref(),
4681 Some("annual_report_2025.pdf"),
4682 "RFC 2231 continuation without charset should decode as UTF-8"
4683 );
4684 }
4685
4686 #[test]
4694 fn base64_with_embedded_spaces() {
4695 let data = b"SGVs bG8g V29y bGQ=";
4697 let decoded = decode_transfer_encoding(data, "base64");
4698 assert_eq!(
4699 std::str::from_utf8(&decoded).unwrap(),
4700 "Hello World",
4701 "base64 decoder must strip non-alphabet characters (RFC 2045 Section 6.8)"
4702 );
4703 }
4704
4705 #[test]
4707 fn base64_with_tabs() {
4708 let data = b"SGVs\tbG8g\tV29ybGQ=";
4709 let decoded = decode_transfer_encoding(data, "base64");
4710 assert_eq!(
4711 std::str::from_utf8(&decoded).unwrap(),
4712 "Hello World",
4713 "base64 decoder must strip tabs (RFC 2045 Section 6.8)"
4714 );
4715 }
4716
4717 #[test]
4724 fn qp_trailing_equals_stripped() {
4725 let data = b"Hello=";
4726 let decoded = decode_quoted_printable(data);
4727 assert_eq!(
4728 std::str::from_utf8(&decoded).unwrap(),
4729 "Hello",
4730 "trailing '=' is a soft break (RFC 2045 Section 6.7)"
4731 );
4732 }
4733
4734 #[test]
4737 fn qp_malformed_hex_passthrough() {
4738 let data = b"Hello=ZZ World";
4739 let decoded = decode_quoted_printable(data);
4740 assert_eq!(
4741 std::str::from_utf8(&decoded).unwrap(),
4742 "Hello=ZZ World",
4743 "malformed =ZZ must pass through literally (Postel's law)"
4744 );
4745 }
4746
4747 #[test]
4754 fn q_encoding_malformed_hex_passthrough() {
4755 let decoded = decode_q_encoding("Hello=ZZWorld");
4756 assert_eq!(
4757 std::str::from_utf8(&decoded).unwrap(),
4758 "Hello=ZZWorld",
4759 "malformed =ZZ in Q-encoding must pass through literally"
4760 );
4761 }
4762
4763 #[test]
4766 fn q_encoding_trailing_equals() {
4767 let decoded = decode_q_encoding("Hello=");
4768 assert_eq!(
4769 std::str::from_utf8(&decoded).unwrap(),
4770 "Hello=",
4771 "trailing '=' in Q-encoding must pass through literally"
4772 );
4773 }
4774
4775 #[test]
4783 fn multipart_digest_default_content_type_full_email() {
4784 let raw = b"From: a@b.com\r\n\
4785 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
4786 Content-Type: multipart/digest; boundary=\"dg\"\r\n\
4787 \r\n\
4788 --dg\r\n\
4789 \r\n\
4790 From: nested@example.com\r\n\
4791 Subject: Inner\r\n\
4792 \r\n\
4793 Inner body\r\n\
4794 --dg--";
4795
4796 let parsed = parse_email(raw).unwrap();
4797 assert!(
4799 parsed.body_text.is_none(),
4800 "digest part must NOT be treated as text/plain"
4801 );
4802 assert_eq!(parsed.attachments.len(), 1);
4803 assert_eq!(parsed.attachments[0].content_type, "message/rfc822");
4804 }
4805
4806 #[test]
4814 fn parse_headers_leading_space_skipped() {
4815 let raw = b" continuation without header\r\nFrom: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\nBody";
4818 let parsed = parse_email(raw).unwrap();
4819 assert_eq!(parsed.from.email, "a@b.com");
4820 assert_eq!(parsed.body_text.as_deref(), Some("Body"));
4821 }
4822
4823 #[test]
4826 fn parse_headers_leading_tab_skipped() {
4827 let raw = b"\tcontinuation without header\r\nFrom: a@b.com\r\nDate: Thu, 13 Feb 2025 15:47:33 +0000\r\n\r\n";
4828 let parsed = parse_email(raw).unwrap();
4829 assert_eq!(parsed.from.email, "a@b.com");
4830 }
4831
4832 #[test]
4840 fn encoded_word_bad_base64_passthrough() {
4841 let input = "=?UTF-8?B?=====?=";
4843 let result = decode_encoded_words(input);
4844 assert!(
4846 result.contains("=?"),
4847 "Bad base64 encoded word should pass through literally, got: {result:?}"
4848 );
4849 }
4850
4851 #[test]
4854 fn encoded_word_unknown_charset_fallback() {
4855 let input = "=?x-nonexistent-charset?B?SGVsbG8=?=";
4857 let result = decode_encoded_words(input);
4858 assert!(
4861 result.contains("Hello"),
4862 "Unknown charset should fall back to UTF-8, got: {result:?}"
4863 );
4864 }
4865
4866 #[test]
4869 fn encoded_word_truncated_no_closing() {
4870 let input = "Start =?UTF-8?B?SGVsbG8= End";
4871 let result = decode_encoded_words(input);
4872 assert!(
4875 result.contains("=?"),
4876 "Truncated encoded word should pass through, got: {result:?}"
4877 );
4878 }
4879
4880 #[test]
4887 fn rfc2231_continuation_mixed_encoded_and_plain() {
4888 let header = "attachment; filename*0*=UTF-8''r%C3%A9sum; filename*1=\"e.pdf\"";
4889 let result = extract_rfc2231_continuation(header, "filename");
4890 assert_eq!(
4893 result.as_deref(),
4894 Some("r\u{e9}sume.pdf"),
4895 "RFC 2231 mixed encoded/plain continuation should reassemble correctly"
4896 );
4897 }
4898
4899 #[test]
4901 fn rfc2231_continuation_three_sections() {
4902 let header =
4903 "attachment; filename*0=\"part1_\"; filename*1=\"part2_\"; filename*2=\"part3.pdf\"";
4904 let result = extract_rfc2231_continuation(header, "filename");
4905 assert_eq!(result.as_deref(), Some("part1_part2_part3.pdf"));
4906 }
4907
4908 #[test]
4916 fn extract_param_unterminated_quoted_value() {
4917 let header = "text/plain; charset=\"utf-8";
4918 let result = extract_param(header, "charset");
4919 assert_eq!(
4922 result.as_deref(),
4923 Some("utf-8"),
4924 "Unterminated quoted-string should extract to end of string"
4925 );
4926 }
4927
4928 #[test]
4930 fn extract_param_quoted_with_backslash_escape() {
4931 let header = "attachment; filename=\"file\\\\name.txt\"";
4932 let result = extract_param(header, "filename");
4933 assert_eq!(
4934 result.as_deref(),
4935 Some("file\\name.txt"),
4936 "Backslash escape in quoted param value must be unescaped"
4937 );
4938 }
4939
4940 #[test]
4942 fn extract_param_empty_quoted_value() {
4943 let header = "attachment; filename=\"\"";
4944 let result = extract_param(header, "filename");
4945 assert!(
4946 result.is_none(),
4947 "Empty quoted-string value should return None, got: {result:?}"
4948 );
4949 }
4950
4951 #[test]
4953 fn extract_comment_text_nested_parens() {
4954 let result = extract_comment_text("(outer (inner) text)");
4955 assert_eq!(
4956 result.as_deref(),
4957 Some("outer (inner) text"),
4958 "Nested parens should be included in comment text"
4959 );
4960 }
4961
4962 #[test]
4964 fn extract_comment_text_escaped_chars() {
4965 let result = extract_comment_text("(hello \\(world\\))");
4966 assert_eq!(
4967 result.as_deref(),
4968 Some("hello (world)"),
4969 "Escaped parens inside comments should be unescaped"
4970 );
4971 }
4972
4973 #[test]
4975 fn extract_comment_text_empty() {
4976 let result = extract_comment_text("()");
4977 assert!(result.is_none(), "Empty comment should return None");
4978 }
4979
4980 #[test]
4982 fn extract_comment_text_no_paren() {
4983 let result = extract_comment_text("not a comment");
4984 assert!(
4985 result.is_none(),
4986 "Non-parenthesized input should return None"
4987 );
4988 }
4989
4990 #[test]
4997 fn strip_comments_nested_and_escaped() {
4998 let result = strip_comments("Hello (outer (inner) comment) World");
5000 assert_eq!(result, "Hello World");
5001
5002 let result = strip_comments("Hello (comment with \\) escaped) World");
5004 assert_eq!(result, "Hello World");
5005
5006 let result = strip_comments("Hello \\\\ World");
5008 assert_eq!(result, "Hello \\\\ World");
5009
5010 let result = strip_comments("Before (escaped \\( paren) After");
5012 assert_eq!(result, "Before After");
5013 }
5014
5015 #[test]
5017 fn strip_comments_escaped_outside_comment() {
5018 let result = strip_comments("no \\(comment\\) here");
5019 assert_eq!(
5035 result, "no \\(comment\\) here",
5036 "Escaped parens outside comments should not open/close comments"
5037 );
5038 }
5039
5040 #[test]
5046 fn parse_date_too_few_parts() {
5047 assert!(
5049 parse_rfc5322_date("13 Feb").is_none(),
5050 "Date with too few parts should return None"
5051 );
5052 }
5053
5054 #[test]
5057 fn parse_date_time_no_colon() {
5058 assert!(
5059 parse_rfc5322_date("13 Feb 2025 1547 +0000").is_none(),
5060 "Time without colon should return None"
5061 );
5062 }
5063
5064 #[test]
5066 fn parse_date_unknown_month() {
5067 assert!(
5068 parse_rfc5322_date("13 Foo 2025 12:00:00 +0000").is_none(),
5069 "Unknown month name should return None"
5070 );
5071 }
5072
5073 #[test]
5075 fn parse_date_completely_malformed() {
5076 assert!(parse_rfc5322_date("not a date at all").is_none());
5077 assert!(parse_rfc5322_date("").is_none());
5078 assert!(parse_rfc5322_date(" ").is_none());
5079 }
5080
5081 #[test]
5083 fn parse_date_unknown_timezone_defaults_zero() {
5084 let dt = parse_rfc5322_date("13 Feb 2025 12:00:00 ZULU").unwrap();
5085 assert_eq!(
5086 dt.tz_offset_minutes, 0,
5087 "Unknown timezone abbreviation should default to +0000"
5088 );
5089 }
5090
5091 #[test]
5093 fn parse_date_non_numeric_day() {
5094 assert!(
5095 parse_rfc5322_date("XX Feb 2025 12:00:00 +0000").is_none(),
5096 "Non-numeric day should return None"
5097 );
5098 }
5099
5100 #[test]
5102 fn parse_date_non_numeric_year() {
5103 assert!(
5104 parse_rfc5322_date("13 Feb XXXX 12:00:00 +0000").is_none(),
5105 "Non-numeric year should return None"
5106 );
5107 }
5108
5109 #[test]
5117 fn split_mime_parts_lf_only_boundaries() {
5118 let body = b"--boundary\nContent-Type: text/plain\n\nPart 1\n--boundary\nContent-Type: text/plain\n\nPart 2\n--boundary--";
5119 let parts = split_mime_parts(body, "boundary");
5120 assert_eq!(
5121 parts.len(),
5122 2,
5123 "Should find 2 parts with LF-only boundaries"
5124 );
5125 }
5126
5127 #[test]
5130 fn split_mime_parts_boundary_at_start() {
5131 let body = b"--b\r\nContent-Type: text/plain\r\n\r\nOnly part\r\n--b--";
5132 let parts = split_mime_parts(body, "b");
5133 assert_eq!(
5134 parts.len(),
5135 1,
5136 "Should find 1 part when boundary is at start"
5137 );
5138 let text = String::from_utf8_lossy(parts[0]);
5139 assert!(text.contains("Only part"));
5140 }
5141
5142 #[test]
5144 fn split_mime_parts_midline_boundary_ignored() {
5145 let body =
5146 b"--b\r\nContent-Type: text/plain\r\n\r\nText mentioning --b in the middle\r\n--b--";
5147 let parts = split_mime_parts(body, "b");
5148 assert_eq!(parts.len(), 1, "Mid-line boundary must not split");
5149 let text = String::from_utf8_lossy(parts[0]);
5150 assert!(
5151 text.contains("--b in the middle"),
5152 "Mid-line boundary text should be preserved"
5153 );
5154 }
5155
5156 #[test]
5160 fn split_mime_parts_boundary_with_trailing_whitespace() {
5161 let body = b"--b \t\r\nContent-Type: text/plain\r\n\r\nBody text\r\n--b--";
5162 let parts = split_mime_parts(body, "b");
5163 assert_eq!(
5164 parts.len(),
5165 1,
5166 "Boundary with trailing whitespace should be recognized"
5167 );
5168 }
5169
5170 #[test]
5173 fn split_mime_parts_boundary_not_at_line_start_skipped() {
5174 let body = b"--bound\r\n\r\nSome text has --bound embedded\r\n--bound--";
5176 let parts = split_mime_parts(body, "bound");
5177 assert_eq!(parts.len(), 1);
5178 let text = String::from_utf8_lossy(parts[0]);
5179 assert!(text.contains("--bound embedded"));
5180 }
5181
5182 #[test]
5190 fn parse_quoted_transfer_encoding() {
5191 let raw = b"From: a@b.com\r\n\
5192 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
5193 Content-Type: text/plain; charset=utf-8\r\n\
5194 Content-Transfer-Encoding: \"base64\"\r\n\
5195 \r\n\
5196 SGVsbG8gV29ybGQ=\r\n";
5197 let parsed = parse_email(raw).unwrap();
5198 assert!(
5204 parsed.body_text.is_some(),
5205 "Message with quoted CTE should still produce body_text"
5206 );
5207 }
5208
5209 #[test]
5212 fn parse_transfer_encoding_with_whitespace() {
5213 let raw = b"From: a@b.com\r\n\
5214 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
5215 Content-Type: text/plain; charset=utf-8\r\n\
5216 Content-Transfer-Encoding: base64 \r\n\
5217 \r\n\
5218 SGVsbG8gV29ybGQ=\r\n";
5219 let parsed = parse_email(raw).unwrap();
5220 assert_eq!(
5221 parsed.body_text.as_deref(),
5222 Some("Hello World"),
5223 "CTE with whitespace should still decode correctly"
5224 );
5225 }
5226
5227 #[test]
5235 fn qp_soft_line_break_crlf() {
5236 let data = b"Hello=\r\n World";
5237 let decoded = decode_quoted_printable(data);
5238 assert_eq!(
5239 std::str::from_utf8(&decoded).unwrap(),
5240 "Hello World",
5241 "=\\r\\n soft break should be removed (RFC 2045 Section 6.7)"
5242 );
5243 }
5244
5245 #[test]
5248 fn qp_soft_line_break_lf_only() {
5249 let data = b"Hello=\nWorld";
5250 let decoded = decode_quoted_printable(data);
5251 assert_eq!(
5252 std::str::from_utf8(&decoded).unwrap(),
5253 "HelloWorld",
5254 "=\\n soft break should be removed"
5255 );
5256 }
5257
5258 #[test]
5261 fn qp_soft_break_lf_at_end() {
5262 let data = b"Hi=\n";
5263 let decoded = decode_quoted_printable(data);
5264 assert_eq!(
5265 std::str::from_utf8(&decoded).unwrap(),
5266 "Hi",
5267 "=\\n at end of data should be a soft break"
5268 );
5269 }
5270
5271 #[test]
5274 fn qp_invalid_hex_passthrough() {
5275 let data = b"=GG=4F=4B";
5276 let decoded = decode_quoted_printable(data);
5277 assert_eq!(
5279 std::str::from_utf8(&decoded).unwrap(),
5280 "=GGOK",
5281 "Invalid hex =GG should pass through, valid =4F=4B should decode"
5282 );
5283 }
5284
5285 #[test]
5291 fn base64_empty_body() {
5292 let decoded = decode_transfer_encoding(b"", "base64");
5293 assert!(
5294 decoded.is_empty(),
5295 "Empty base64 input should produce empty output"
5296 );
5297 }
5298
5299 #[test]
5302 fn base64_whitespace_only() {
5303 let decoded = decode_transfer_encoding(b" \r\n \r\n", "base64");
5304 assert!(
5305 decoded.is_empty(),
5306 "Whitespace-only base64 input should produce empty output"
5307 );
5308 }
5309
5310 #[test]
5312 fn find_closing_quote_unterminated() {
5313 assert_eq!(find_closing_quote("no closing quote here"), 21);
5314 }
5315
5316 #[test]
5318 fn find_closing_quote_skips_escaped() {
5319 assert_eq!(find_closing_quote("hello\\\"world\""), 12);
5321 }
5322
5323 #[test]
5332 fn multipart_crlf_before_boundary() {
5333 let raw = b"From: a@b.com\r\n\
5334 Date: Thu, 13 Feb 2025 15:47:33 +0000\r\n\
5335 Content-Type: multipart/mixed; boundary=\"mp\"\r\n\
5336 \r\n\
5337 --mp\r\n\
5338 Content-Type: text/plain\r\n\
5339 \r\n\
5340 Part A\r\n\
5341 --mp\r\n\
5342 Content-Type: text/html\r\n\
5343 \r\n\
5344 <b>Part B</b>\r\n\
5345 --mp--";
5346 let parsed = parse_email(raw).unwrap();
5347 assert_eq!(parsed.body_text.as_deref(), Some("Part A"));
5348 assert_eq!(parsed.body_html.as_deref(), Some("<b>Part B</b>"));
5349 }
5350
5351 #[test]
5355 fn multipart_lf_only_before_boundary() {
5356 let raw = b"From: a@b.com\nDate: Thu, 13 Feb 2025 15:47:33 +0000\nContent-Type: multipart/mixed; boundary=\"mp\"\n\n--mp\nContent-Type: text/plain\n\nPart A\n--mp\nContent-Type: text/html\n\n<b>Part B</b>\n--mp--";
5357 let parsed = parse_email(raw).unwrap();
5358 assert_eq!(parsed.body_text.as_deref(), Some("Part A"));
5359 assert_eq!(parsed.body_html.as_deref(), Some("<b>Part B</b>"));
5360 }
5361
5362 #[test]
5365 fn multipart_truncated_no_closing() {
5366 let body = b"--bnd\r\n\r\nFirst part\r\n--bnd\r\n\r\nSecond part with no closing boundary";
5367 let parts = split_mime_parts(body, "bnd");
5368 assert_eq!(
5369 parts.len(),
5370 2,
5371 "Should find 2 parts even without closing boundary"
5372 );
5373 let text2 = String::from_utf8_lossy(parts[1]);
5374 assert!(text2.contains("Second part"));
5375 }
5376
5377 #[test]
5384 fn strip_comments_escaped_paren_inside_comment() {
5385 let result = strip_comments("Before (escaped \\) paren) After");
5386 assert_eq!(
5389 result.trim(),
5390 "Before After",
5391 "Escaped close-paren inside comment must not end the comment"
5392 );
5393 }
5394
5395 #[test]
5398 fn strip_comments_escaped_open_paren_inside_comment() {
5399 let result = strip_comments("X (comment \\( not nested) Y");
5400 assert_eq!(
5401 result.trim(),
5402 "X Y",
5403 "Escaped open-paren inside comment must not increase nesting depth"
5404 );
5405 }
5406
5407 #[test]
5413 fn hex_digit_lowercase() {
5414 assert_eq!(hex_digit(b'a'), Some(10));
5415 assert_eq!(hex_digit(b'f'), Some(15));
5416 assert_eq!(hex_digit(b'c'), Some(12));
5417 assert_eq!(hex_digit(b'A'), Some(10));
5419 assert_eq!(hex_digit(b'F'), Some(15));
5420 assert_eq!(hex_digit(b'0'), Some(0));
5421 assert_eq!(hex_digit(b'9'), Some(9));
5422 assert_eq!(hex_digit(b'g'), None);
5424 assert_eq!(hex_digit(b'G'), None);
5425 assert_eq!(hex_digit(b' '), None);
5426 }
5427
5428 #[test]
5430 fn qp_lowercase_hex_digits() {
5431 let data = b"caf=c3=a9";
5433 let decoded = decode_quoted_printable(data);
5434 assert_eq!(decoded, b"caf\xc3\xa9");
5435 let text = String::from_utf8_lossy(&decoded);
5436 assert_eq!(
5437 text, "caf\u{e9}",
5438 "Lowercase hex digits in QP should decode correctly (RFC 2045 Section 6.7)"
5439 );
5440 }
5441
5442 #[test]
5444 fn decode_hex_pair_lowercase() {
5445 assert_eq!(decode_hex_pair(b'f', b'f'), Some(0xFF));
5446 assert_eq!(decode_hex_pair(b'a', b'0'), Some(0xA0));
5447 assert_eq!(decode_hex_pair(b'0', b'a'), Some(0x0A));
5448 }
5449
5450 #[test]
5457 fn parse_single_address_empty_angle_brackets() {
5458 let result = parse_single_address("Display Name <>");
5459 assert!(
5460 result.is_none(),
5461 "Empty angle brackets should not produce an address"
5462 );
5463 }
5464
5465 #[test]
5468 fn parse_single_address_reversed_angles() {
5469 let result = parse_single_address(">bad<user@example.com");
5470 assert!(result.is_some());
5474 }
5475
5476 #[test]
5478 fn parse_single_address_no_at_no_brackets() {
5479 let result = parse_single_address("just plain text");
5480 assert!(
5481 result.is_none(),
5482 "Text without @ or <> should not produce an address"
5483 );
5484 }
5485
5486 #[test]
5492 fn is_inside_quotes_with_escapes() {
5493 assert!(is_inside_quotes("\"hello \\\" world\"end", 15));
5495 assert!(!is_inside_quotes("\"hello\"", 0));
5497 assert!(!is_inside_quotes("\"hello\" world", 8));
5499 }
5500
5501 #[test]
5507 fn strip_outer_quotes_short_input() {
5508 assert_eq!(strip_outer_quotes("\""), "\"");
5509 assert_eq!(strip_outer_quotes(""), "");
5510 assert_eq!(strip_outer_quotes("x"), "x");
5511 }
5512
5513 #[test]
5515 fn strip_outer_quotes_one_sided() {
5516 assert_eq!(strip_outer_quotes("\"hello"), "\"hello");
5517 assert_eq!(strip_outer_quotes("hello\""), "hello\"");
5518 }
5519
5520 #[test]
5526 fn split_header_body_starts_with_lf() {
5527 let (headers, body) = split_header_body(b"\nBody text here");
5528 assert!(
5529 headers.is_empty(),
5530 "Headers should be empty when input starts with \\n"
5531 );
5532 assert_eq!(body, b"Body text here");
5533 }
5534
5535 #[test]
5537 fn split_header_body_starts_with_crlf() {
5538 let (headers, body) = split_header_body(b"\r\nBody text here");
5539 assert!(
5540 headers.is_empty(),
5541 "Headers should be empty when input starts with \\r\\n"
5542 );
5543 assert_eq!(body, b"Body text here");
5544 }
5545
5546 #[test]
5552 fn decode_body_strips_trailing_lf_only() {
5553 let result = decode_body(b"Hello\n", "", "text/plain; charset=utf-8");
5554 assert_eq!(result, "Hello", "Trailing bare LF should be stripped");
5555 }
5556
5557 #[test]
5559 fn decode_body_no_trailing_newline() {
5560 let result = decode_body(b"Hello", "", "text/plain; charset=utf-8");
5561 assert_eq!(
5562 result, "Hello",
5563 "No trailing newline should leave content unchanged"
5564 );
5565 }
5566
5567 #[test]
5569 fn percent_decode_lowercase_hex() {
5570 let decoded = percent_decode("%c3%a9");
5571 assert_eq!(decoded, vec![0xC3, 0xA9]);
5573 }
5574
5575 #[test]
5577 fn percent_decode_invalid_hex() {
5578 let decoded = percent_decode("%ZZ");
5579 assert_eq!(decoded, b"%ZZ");
5580 }
5581
5582 #[test]
5584 fn percent_decode_truncated() {
5585 let decoded = percent_decode("hello%2");
5586 assert_eq!(decoded, b"hello%2");
5587 }
5588
5589 #[test]
5596 fn parse_address_colon_with_at_sign() {
5597 let addrs = parse_address_list("user:tag@example.com");
5600 assert!(!addrs.is_empty(), "Should parse at least one address");
5604 }
5605}