daaki_message/parser/mod.rs
1//! RFC 5322 email message parser.
2//!
3//! Parses raw email message bytes into a structured [`ParsedEmail`] representation.
4//! Handles partial messages (headers + truncated body) gracefully, extracting
5//! whatever content is available.
6//!
7//! The parser is split into two layers:
8//! - **Wire parser** ([`wire`]) — purely syntactic, splits bytes into structure
9//! (headers, body) without semantic interpretation.
10//! - **Interpreter** ([`interpret`]) — converts wire-parsed data into the
11//! public [`ParsedEmail`] type by performing RFC 2047 decoding, address
12//! parsing, date parsing, MIME tree walking, and charset conversion.
13//!
14//! # References
15//! - RFC 5322 Sections 2–3 (message syntax, header/body separation, field definitions)
16//! - RFC 2045 Sections 5–6 (Content-Type, Content-Transfer-Encoding)
17//! - RFC 2046 Sections 4–5 (discrete/composite media types, multipart boundaries)
18//! - RFC 2047 Sections 2–4 (encoded-word syntax and decoding)
19//! - RFC 2183 Section 2 (Content-Disposition field)
20//! - RFC 2231 Sections 3–4 (MIME parameter continuations, charset/language)
21//! - RFC 6532 Section 3 (internationalized UTF-8 headers)
22
23mod interpret;
24mod wire;
25
26use crate::error::Error;
27use crate::types::ParsedEmail;
28
29// Re-export pub(crate) helpers used by other modules in the crate.
30// `decode_encoded_words` is used by `crate::fuzz` behind `#[cfg(fuzzing)]`,
31// which is not active during normal compilation — suppress the warning.
32#[allow(unused_imports)]
33pub(crate) use interpret::decode_encoded_words;
34pub(crate) use interpret::find_paren_outside_quotes;
35pub(crate) use interpret::normalize_display_name_phrase;
36pub use interpret::parse_address_list;
37pub(crate) use interpret::parse_rfc5322_date;
38pub(crate) use interpret::strip_comments;
39
40/// Parses raw email message bytes into a structured representation.
41///
42/// Handles partial messages (headers + truncated body) gracefully,
43/// extracting whatever content is available. Returns [`Error::EmptyInput`]
44/// for empty input. Malformed but still usable messages that omit the
45/// required `From` header are accepted with an empty `from` list.
46///
47/// # References
48/// - RFC 5322 Sections 2.1–2.3 (header/body separation, line length)
49/// - RFC 5322 Sections 3.3–3.6 (date-time, address, field definitions)
50/// - RFC 2045 Section 6 (Content-Transfer-Encoding decoding)
51/// - RFC 2046 Sections 5.1–5.2 (multipart boundary parsing)
52/// - RFC 2047 Sections 3–4 (encoded-word decoding in headers)
53/// - RFC 2183 Section 2 (Content-Disposition parsing)
54/// - RFC 2231 Sections 3–4 (parameter continuations, charset/language)
55/// - RFC 6532 Section 3 (internationalized header fields)
56pub fn parse_email(raw: &[u8]) -> Result<ParsedEmail, Error> {
57 let wire_msg = wire::parse_wire(raw)?;
58 interpret::interpret(&wire_msg, false)
59}
60
61/// Parses only the headers of a raw email message, skipping body/MIME processing.
62///
63/// This is faster than [`parse_email`] when only metadata is needed (e.g.,
64/// building a message list). Body-related fields (`body_text`, `body_html`,
65/// `attachments`) are always empty/`None`.
66///
67/// # References
68/// - RFC 5322 Sections 2.1–2.2 (header/body separation, header folding)
69/// - RFC 5322 Sections 3.3–3.6 (date-time, address, field definitions)
70/// - RFC 2047 Sections 3–4 (encoded-word decoding in headers)
71/// - RFC 6532 Section 3 (internationalized header fields)
72pub fn parse_headers_only(raw: &[u8]) -> Result<ParsedEmail, Error> {
73 let wire_msg = wire::parse_wire(raw)?;
74 interpret::interpret(&wire_msg, true)
75}
76
77// ---------------------------------------------------------------------------
78// Tests
79// ---------------------------------------------------------------------------
80
81// Tests are placed in mod.rs because they exercise the full parse pipeline
82// (wire → interpret) through the public `parse_email`/`parse_headers_only`
83// entry points, plus direct calls to internal functions from both layers.
84// Using `use` imports below makes all needed functions available to `super::*`
85// in the test module.
86
87#[cfg(test)]
88#[path = "tests.rs"]
89mod tests;