daaki_message/parser/interpret/address.rs
1//! RFC 5322 Section 3.4 address parsing.
2//!
3//! Parses comma-separated address lists, single addresses (name-addr and
4//! addr-spec), RFC 5322 group syntax, parenthesized comments, and
5//! display-name phrase normalization.
6//!
7//! # References
8//! - RFC 5322 Section 3.4 (address specification)
9//! - RFC 5322 Section 3.2.2 (comments)
10//! - RFC 5322 Section 3.2.4 (quoted-string)
11//! - RFC 5322 Section 3.2.5 (phrase / display-name)
12//! - RFC 2047 Section 5 (encoded-words in phrase context)
13
14use super::{encoded_words, get_header_value};
15
16use crate::types::Address;
17
18/// Extracts all `From` addresses.
19///
20/// RFC 5322 Section 3.6.2: `from = "From:" mailbox-list CRLF` — multiple
21/// originator mailboxes are valid and all must be preserved.
22///
23/// Address structure is parsed first on the raw header value, then RFC 2047
24/// encoded words are decoded in each address's display name. Decoding before
25/// parsing would break address splitting when an encoded-word display name
26/// contains address-significant characters (`,`, `<`, `>`, `:`, `;`).
27///
28/// Extracts addresses from ALL occurrences of the `From` header.
29///
30/// RFC 5322 Section 3.6 specifies that `From` SHOULD appear at most once.
31/// However, broken mailers sometimes produce duplicate headers. Per Postel's
32/// law ("be liberal in what you accept"), we concatenate addresses from every
33/// occurrence — consistent with how [`extract_address_list`] handles
34/// To/Cc/Bcc/Reply-To.
35///
36/// # References
37/// - RFC 5322 Section 3.6.2 — originator fields (from = mailbox-list)
38/// - RFC 2047 Section 5 rule (3) — encoded-words in phrase context
39/// - RFC 5322 Section 3.4 — address specification
40pub(crate) fn extract_from(headers: &[(String, String)]) -> Vec<Address> {
41 // Iterate ALL matching "from" headers, not just the first, so that
42 // duplicate From headers produced by broken mailers are concatenated
43 // rather than silently dropped.
44 headers
45 .iter()
46 .filter(|(k, _)| k == "from")
47 .flat_map(|(_, v)| decode_address_names(parse_address_list(v)))
48 .collect()
49}
50
51/// Extracts the Sender mailbox from the `Sender` header (RFC 5322 Section 3.6.2).
52///
53/// Unlike `From` (which is a `mailbox-list`), `Sender` contains exactly one
54/// `mailbox`. If the header contains multiple addresses, only the first is
55/// used (Postel's law — be liberal in what you accept).
56///
57/// Returns `None` when the `Sender` header is absent.
58///
59/// # References
60/// - RFC 5322 Section 3.6.2 (sender field)
61pub(crate) fn extract_sender(headers: &[(String, String)]) -> Option<Address> {
62 let value = get_header_value(headers, "sender")?;
63 // Reuse the same parse-then-decode pipeline as From/To/Cc.
64 // RFC 5322 Section 3.6.2: sender = "Sender:" mailbox CRLF
65 let addrs = decode_address_names(parse_address_list(&value));
66 addrs.into_iter().next()
67}
68
69/// Extracts an address list from ALL occurrences of the named header.
70///
71/// RFC 5322 Section 3.6 specifies that destination address fields (To, Cc,
72/// Bcc) and Reply-To SHOULD appear at most once. However, broken mailers
73/// sometimes produce duplicate headers. Per Postel's law ("be liberal in
74/// what you accept"), we concatenate addresses from every occurrence to
75/// avoid silently dropping recipients.
76///
77/// Parses address structure first, then decodes RFC 2047 encoded words in
78/// display names — see [`extract_from`] for rationale.
79///
80/// # References
81/// - RFC 5322 Section 3.6.3 (destination address fields)
82pub(crate) fn extract_address_list(headers: &[(String, String)], name: &str) -> Vec<Address> {
83 headers
84 .iter()
85 .filter(|(k, _)| k == name)
86 .flat_map(|(_, v)| decode_address_names(parse_address_list(v)))
87 .collect()
88}
89
90/// Returns addresses unchanged — RFC 2047 decoding is now performed inside
91/// [`parse_single_address`] where the quoted-string vs unquoted-phrase
92/// context is known.
93///
94/// RFC 2047 Section 5: encoded-words MUST NOT appear inside a quoted-string.
95/// By decoding only in the unquoted-phrase path (and comment path) within
96/// `parse_single_address`, we correctly preserve encoded-word literals that
97/// appear inside quoted-strings.
98///
99/// This function is retained as a pass-through to avoid churning callers.
100///
101/// # References
102/// - RFC 2047 Section 5 (encoded-words placement rules)
103fn decode_address_names(addrs: Vec<Address>) -> Vec<Address> {
104 addrs
105}
106
107/// Parses a comma-separated address list, respecting quoted strings, angle
108/// brackets, parenthesized comments, and RFC 5322 group syntax
109/// (RFC 5322 Section 3.4).
110///
111/// This is the **liberal** address parser used internally to interpret
112/// inbound `From`/`To`/`Cc`/`Bcc`/`Reply-To`/`Sender` headers, exposed
113/// publicly so consumers can apply the same Postel-compliant parsing to
114/// other "be liberal in what you accept" inputs — for example,
115/// user-typed recipient strings in a compose form, or addresses already
116/// extracted from an IMAP `ENVELOPE` response.
117///
118/// # Behavior
119///
120/// - Returns `Vec<Address>` with one entry per recognized mailbox. The
121/// parser never errors: malformed segments are best-effort-recovered
122/// or silently dropped (Postel's law, RFC 1122 Section 1.2.2).
123/// - Group syntax (`display-name ":" [group-list] ";"`) is unwrapped
124/// and member addresses are flattened into the result. Empty groups
125/// (e.g., `undisclosed-recipients:;`) contribute no addresses.
126/// - Parenthesized comments (RFC 5322 Section 3.2.2) may appear in
127/// addr-spec CFWS contexts and can contain commas, angle brackets,
128/// and other address-significant characters; these are not treated
129/// as separators.
130/// - Domain-literals (`[192.0.2.1]`, `[IPv6:...]`) are preserved
131/// intact per RFC 5321 Section 4.1.3.
132/// - Display names are normalized: quoted-strings are unescaped, CFWS
133/// comments are stripped, and RFC 2047 encoded-words are decoded
134/// only in unquoted phrase spans (RFC 2047 Section 5 rule (3)).
135///
136/// # No outgoing validation
137///
138/// The returned [`Address`] records are constructed via
139/// [`Address::new_unchecked`] and may contain syntax that is technically
140/// non-conformant but still meaningful — exactly what is needed when
141/// receiving from the network. **They have not been validated against
142/// the strict outgoing-mail rules in RFC 5322 Section 3.4.**
143///
144/// If you are about to send mail — or otherwise need to enforce strict
145/// validation — pass each result through [`Address::new`] or
146/// [`Address::with_name`] afterwards. Those constructors apply the same
147/// rules the message builder uses and will reject malformed input at
148/// construction time rather than at send time.
149///
150/// # Input expectations
151///
152/// The input is a single, already-decoded address-list string.
153/// This function does **not** perform RFC 5322 Section 2.2.3 header
154/// unfolding, charset detection, or transfer-encoding decoding.
155/// Feeding it raw header bytes with CRLF folds, 8-bit content from
156/// unknown charsets, or quoted-printable sequences will produce wrong
157/// results — use [`parse_email`](crate::parse_email) for raw message
158/// bytes, and use this function for text that has already crossed the
159/// wire/semantic boundary (user input in a UTF-8 terminal, a decoded
160/// header value, etc.).
161///
162/// # Example
163///
164/// ```
165/// use daaki_message::{parse_address_list, Address};
166///
167/// let raw = r#""Doe, Jane" <jane@example.com>, alice@example.com"#;
168/// let addrs = parse_address_list(raw);
169///
170/// assert_eq!(addrs.len(), 2);
171/// assert_eq!(addrs[0].name.as_deref(), Some("Doe, Jane"));
172/// assert_eq!(addrs[0].email, "jane@example.com");
173/// assert_eq!(addrs[1].name, None);
174/// assert_eq!(addrs[1].email, "alice@example.com");
175///
176/// // For outgoing mail, re-validate each result through the strict
177/// // constructors so malformed input is rejected before send time.
178/// let validated: Result<Vec<Address>, _> = addrs
179/// .into_iter()
180/// .map(|a| match a.name {
181/// Some(name) => Address::with_name(name, a.email),
182/// None => Address::new(a.email),
183/// })
184/// .collect();
185/// assert!(validated.is_ok());
186/// ```
187///
188/// # References
189/// - RFC 5322 Section 3.4 (address specification)
190/// - RFC 5322 Section 3.2.2 (comments)
191/// - RFC 5322 Section 3.2.4 (quoted-string)
192/// - RFC 5322 Section 3.2.5 (phrase / display-name)
193/// - RFC 5321 Section 4.1.3 (domain-literal)
194/// - RFC 2047 Section 5 (encoded-words in phrase context)
195/// - RFC 1122 Section 1.2.2 (robustness principle)
196pub fn parse_address_list(input: &str) -> Vec<Address> {
197 let mut addresses = Vec::new();
198 let mut current = String::new();
199 let mut in_quotes = false;
200 let mut escaped = false;
201 let mut angle_depth: i32 = 0;
202 // Track parenthesized comment depth (RFC 5322 Section 3.2.2).
203 // Commas and other structural characters inside comments must not
204 // be treated as address separators.
205 let mut paren_depth: i32 = 0;
206 // Track whether we're inside a group construct (after ':' but before ';').
207 // RFC 5322 Section 3.4: group = display-name ":" [group-list] ";"
208 let mut in_group = false;
209 // Track whether we're inside a domain-literal `[...]`
210 // (RFC 5321 Section 4.1.3: domain-literal = "[" *dtext "]").
211 // Characters inside brackets (e.g., commas in IPv6 or non-standard
212 // domain-literals) must not be treated as structural separators.
213 let mut in_brackets = false;
214
215 for ch in input.chars() {
216 // Inside a quoted-string, a backslash escapes the next character
217 // (RFC 5322 Section 3.2.4 quoted-pair).
218 if escaped {
219 current.push(ch);
220 escaped = false;
221 continue;
222 }
223 match ch {
224 '\\' if in_quotes || paren_depth > 0 => {
225 // Backslash escapes next character in quoted-strings
226 // (RFC 5322 Section 3.2.4) and inside comments
227 // (RFC 5322 Section 3.2.2 quoted-pair in ccontent).
228 escaped = true;
229 current.push(ch);
230 }
231 '"' if paren_depth == 0 => {
232 in_quotes = !in_quotes;
233 current.push(ch);
234 }
235 // RFC 5322 Section 3.2.2: parenthesized comments may be nested.
236 // Track depth so that commas inside comments are not treated as
237 // address separators.
238 '(' if !in_quotes => {
239 paren_depth += 1;
240 current.push(ch);
241 }
242 ')' if !in_quotes && paren_depth > 0 => {
243 paren_depth -= 1;
244 current.push(ch);
245 }
246 // RFC 5321 Section 4.1.3: domain-literal = "[" *dtext "]".
247 // Track bracket depth so that commas and other structural
248 // characters inside domain-literals are not misinterpreted.
249 '[' if !in_quotes && paren_depth == 0 => {
250 in_brackets = true;
251 current.push(ch);
252 }
253 ']' if !in_quotes && paren_depth == 0 && in_brackets => {
254 in_brackets = false;
255 current.push(ch);
256 }
257 '<' if !in_quotes && paren_depth == 0 => {
258 angle_depth += 1;
259 current.push(ch);
260 }
261 '>' if !in_quotes && paren_depth == 0 && angle_depth > 0 => {
262 angle_depth -= 1;
263 current.push(ch);
264 }
265 // RFC 5322 Section 3.4: ':' starts a group construct when
266 // we're not inside quotes, angle brackets, comments, or an
267 // existing group.
268 // Heuristic: only treat as group if the current token contains
269 // no '@' outside of quoted strings and parenthesized comments
270 // (i.e., it's a display-name, not a bare addr-spec). An '@'
271 // inside a quoted display-name (e.g., `"user@host":`) or a
272 // comment (e.g., `Group (user@host):`) must not prevent
273 // group detection (RFC 5322 Sections 3.2.2, 3.2.4).
274 ':' if !in_quotes
275 && angle_depth == 0
276 && paren_depth == 0
277 && !in_group
278 && !in_brackets =>
279 {
280 if contains_at_outside_quotes(current.trim()) {
281 current.push(ch);
282 } else {
283 // Enter group: discard the display-name portion
284 in_group = true;
285 current.clear();
286 }
287 }
288 // RFC 5322 Section 3.4: ';' terminates the group construct.
289 ';' if !in_quotes
290 && angle_depth == 0
291 && paren_depth == 0
292 && in_group
293 && !in_brackets =>
294 {
295 // Emit any pending address inside the group
296 if let Some(addr) = parse_single_address(¤t) {
297 addresses.push(addr);
298 }
299 current.clear();
300 in_group = false;
301 }
302 ',' if !in_quotes && angle_depth == 0 && paren_depth == 0 && !in_brackets => {
303 if let Some(addr) = parse_single_address(¤t) {
304 addresses.push(addr);
305 }
306 current.clear();
307 }
308 _ => current.push(ch),
309 }
310 }
311 if let Some(addr) = parse_single_address(¤t) {
312 addresses.push(addr);
313 }
314
315 addresses
316}
317
318/// Parses a single address: either `Display Name <email>` or bare `email`.
319///
320/// Handles RFC 5322 Section 3.2.2 comments (parenthesized text) that may
321/// appear before or after a bare addr-spec per Section 3.4.1 CFWS rules.
322/// A trailing comment like `(Display Name)` is used as the display name,
323/// following the common RFC 822 convention.
324///
325/// # References
326/// - RFC 5322 Section 3.4 (address specification)
327/// - RFC 5322 Section 3.4.1 (addr-spec)
328/// - RFC 5322 Section 3.2.2 (comments)
329pub(crate) fn parse_single_address(input: &str) -> Option<Address> {
330 let input = input.trim();
331 if input.is_empty() {
332 return None;
333 }
334
335 // Try "Display Name <email@domain>" form (RFC 5322 Section 3.4)
336 if let Some(angle_start) = input.rfind('<') {
337 if let Some(angle_end) = input.rfind('>') {
338 if angle_end > angle_start {
339 let mut email = input[angle_start + 1..angle_end].trim().to_string();
340 // RFC 5322 Section 4.4: strip obsolete source route
341 // (obs-route = obs-domain-list ":"). Example:
342 // `<@hop1,@hop2:user@domain>` → `user@domain`.
343 if email.starts_with('@') {
344 if let Some(colon) = email.find(':') {
345 email = email[colon + 1..].trim().to_string();
346 }
347 }
348 let name_part = input[..angle_start].trim();
349 let name = normalize_display_name_phrase(name_part);
350 if !email.is_empty() {
351 return Some(Address { name, email });
352 }
353 }
354 }
355 }
356
357 // Bare email address — may have RFC 5322 Section 3.2.2 comments
358 // (parenthesized text) before or after the addr-spec per Section 3.4.1.
359 //
360 // Use `contains_at_outside_quotes` instead of plain `contains('@')`
361 // so that a quoted local-part containing `@` (e.g., `"user@internal"`)
362 // is not mistaken for an addr-spec when there is no structural `@`
363 // outside the quoted-string (RFC 5322 Section 3.4.1).
364 if contains_at_outside_quotes(input) {
365 // Check for a trailing comment like "user@example.com (Display Name)".
366 // RFC 822 convention: trailing parenthesized comment is the display name.
367 //
368 // Use `find_paren_outside_quotes` instead of plain `find('(')` so
369 // that parentheses inside a quoted local-part (RFC 5322 Section 3.2.4)
370 // are not mistaken for comment delimiters.
371 if let Some(paren_start) = find_paren_outside_quotes(input) {
372 let email_part = input[..paren_start].trim();
373 let comment_and_rest = input[paren_start..].trim();
374 let name = if !email_part.is_empty() && contains_at_outside_quotes(email_part) {
375 // Trailing comment: extract text between parentheses
376 // as display name (RFC 822 convention, RFC 5322 Section 3.4.1 CFWS).
377 // Decode RFC 2047 encoded words in the comment text
378 // (RFC 2047 Section 5 rule (2): encoded-words may appear in comments).
379 extract_comment_text(comment_and_rest)
380 .map(|n| encoded_words::decode_encoded_words(&n))
381 } else if email_part.is_empty() || !contains_at_outside_quotes(email_part) {
382 // Leading comment: the comment appears before the addr-spec.
383 // RFC 5322 Section 3.2.2 allows comments in CFWS positions,
384 // and the common RFC 822 convention uses a leading comment as
385 // the display name (e.g., `(John Doe) user@example.com`).
386 // Verify the text after the comment contains an addr-spec.
387 let after_comment = strip_comments(comment_and_rest);
388 if contains_at_outside_quotes(after_comment.trim()) {
389 extract_comment_text(comment_and_rest)
390 .map(|n| encoded_words::decode_encoded_words(&n))
391 } else {
392 None
393 }
394 } else {
395 None
396 };
397 // Strip all comments to get the bare addr-spec
398 // (RFC 5322 Section 3.2.2)
399 let stripped = strip_comments(input);
400 let email = stripped.trim().to_string();
401 if !email.is_empty() && contains_at_outside_quotes(&email) {
402 return Some(Address { name, email });
403 }
404 }
405 return Some(Address {
406 name: None,
407 email: input.to_string(),
408 });
409 }
410
411 None
412}
413
414/// Extracts the text content from a parenthesized RFC 5322 comment string.
415///
416/// Given a string like `(Display Name)`, returns `Some("Display Name")`.
417/// Handles nested parentheses and backslash-escaped characters per
418/// RFC 5322 Section 3.2.2.
419///
420/// # References
421/// - RFC 5322 Section 3.2.2 (comment syntax)
422pub(crate) fn extract_comment_text(s: &str) -> Option<String> {
423 let s = s.trim();
424 if !s.starts_with('(') {
425 return None;
426 }
427 // Find the matching closing paren, respecting nesting and escapes
428 let mut depth: u32 = 0;
429 let mut result = String::new();
430 let mut escaped = false;
431 let mut started = false;
432 for c in s.chars() {
433 if escaped {
434 escaped = false;
435 result.push(c);
436 continue;
437 }
438 match c {
439 '\\' => {
440 escaped = true;
441 }
442 '(' => {
443 if started {
444 // Nested paren — include literally
445 result.push(c);
446 }
447 depth = depth.saturating_add(1);
448 started = true;
449 }
450 ')' => {
451 depth = depth.saturating_sub(1);
452 if depth == 0 {
453 break;
454 }
455 // Nested closing paren — include literally
456 result.push(c);
457 }
458 _ => {
459 if depth > 0 {
460 result.push(c);
461 }
462 }
463 }
464 }
465 let trimmed = result.trim().to_string();
466 if trimmed.is_empty() {
467 None
468 } else {
469 Some(trimmed)
470 }
471}
472
473/// Returns `true` if `s` contains an `@` character outside of quoted strings
474/// and parenthesized comments.
475///
476/// Used by the group-address heuristic: an `@` inside a quoted display-name
477/// (e.g., `"user@host"`) or inside a parenthesized comment (e.g.,
478/// `Group (user@host):`) is not an addr-spec indicator and must not prevent
479/// recognition of group syntax (RFC 5322 Section 3.4).
480///
481/// # References
482/// - RFC 5322 Section 3.4 (group syntax)
483/// - RFC 5322 Section 3.2.2 (comment, quoted-pair inside comments)
484/// - RFC 5322 Section 3.2.4 (quoted-string, quoted-pair)
485pub(crate) fn contains_at_outside_quotes(s: &str) -> bool {
486 let mut in_quotes = false;
487 let mut paren_depth: u32 = 0;
488 let mut escaped = false;
489 for c in s.chars() {
490 if escaped {
491 escaped = false;
492 continue;
493 }
494 match c {
495 // Quoted-pair: backslash escapes the next character inside
496 // quoted-strings (RFC 5322 Section 3.2.4) and comments
497 // (RFC 5322 Section 3.2.2).
498 '\\' if in_quotes || paren_depth > 0 => escaped = true,
499 '"' if paren_depth == 0 => in_quotes = !in_quotes,
500 // RFC 5322 Section 3.2.2: comments nest and are delimited by
501 // parentheses. Only track outside of quoted strings.
502 '(' if !in_quotes => paren_depth = paren_depth.saturating_add(1),
503 ')' if !in_quotes && paren_depth > 0 => paren_depth -= 1,
504 '@' if !in_quotes && paren_depth == 0 => return true,
505 _ => {}
506 }
507 }
508 false
509}
510
511/// Returns the byte offset of the first `(` that is not inside a quoted-string.
512///
513/// Parentheses inside a quoted local-part (e.g., `"user(foo)"@example.com`)
514/// are literal per RFC 5322 Section 3.2.4 and must not be treated as comment
515/// delimiters. This function walks the string respecting quoted-string
516/// boundaries so that only structural `(` characters are found.
517///
518/// # References
519/// - RFC 5322 Section 3.2.2 (comment syntax)
520/// - RFC 5322 Section 3.2.4 (quoted-string)
521pub(crate) fn find_paren_outside_quotes(s: &str) -> Option<usize> {
522 let mut in_quotes = false;
523 let mut escaped = false;
524 for (i, c) in s.char_indices() {
525 if escaped {
526 escaped = false;
527 continue;
528 }
529 match c {
530 // Quoted-pair: backslash escapes the next character inside
531 // quoted-strings (RFC 5322 Section 3.2.4).
532 '\\' if in_quotes => escaped = true,
533 '"' => in_quotes = !in_quotes,
534 '(' if !in_quotes => return Some(i),
535 _ => {}
536 }
537 }
538 None
539}
540
541/// Strips parenthesized comments from a string.
542///
543/// RFC 5322 Section 3.2.2 defines comments as text enclosed in parentheses,
544/// which may be nested. A backslash escapes the next character inside a comment.
545/// Parentheses inside quoted-strings (RFC 5322 Section 3.2.4) are literal
546/// characters and do not open or close comments.
547///
548/// # References
549/// - RFC 5322 Section 3.2.2 (comment syntax)
550/// - RFC 5322 Section 3.2.4 (quoted-string: parens are literal inside quotes)
551/// - RFC 5322 Section 4.3 (CFWS in obsolete date syntax)
552pub(crate) fn strip_comments(input: &str) -> String {
553 let mut result = String::with_capacity(input.len());
554 let mut depth: u32 = 0;
555 let mut escaped = false;
556 let mut in_quotes = false;
557 for c in input.chars() {
558 if escaped {
559 escaped = false;
560 if depth == 0 {
561 result.push(c);
562 }
563 continue;
564 }
565 // Inside a quoted-string, only backslash and closing quote are special.
566 // Parentheses are literal per RFC 5322 Section 3.2.4.
567 if in_quotes && depth == 0 {
568 match c {
569 '\\' => {
570 escaped = true;
571 result.push(c);
572 }
573 '"' => {
574 in_quotes = false;
575 result.push(c);
576 }
577 _ => result.push(c),
578 }
579 continue;
580 }
581 match c {
582 '\\' => {
583 escaped = true;
584 if depth == 0 {
585 result.push(c);
586 }
587 }
588 '"' if depth == 0 => {
589 in_quotes = true;
590 result.push(c);
591 }
592 '(' => depth = depth.saturating_add(1),
593 ')' if depth > 0 => depth = depth.saturating_sub(1),
594 _ if depth == 0 => result.push(c),
595 _ => {}
596 }
597 }
598 result
599}
600
601/// Normalizes a `display-name` phrase from a `name-addr`.
602///
603/// RFC 5322 Section 3.2.5 defines `display-name = phrase`, where each `word`
604/// may be either an atom or a quoted-string. RFC 5322 Section 3.2.2 allows
605/// CFWS comments between those words, but comments are semantically
606/// invisible. RFC 2047 Section 5 additionally allows encoded-words only in
607/// the unquoted phrase context, never inside quoted-strings.
608///
609/// This helper therefore:
610/// - strips CFWS comments from the phrase,
611/// - collapses inter-word WSP to single spaces,
612/// - unquotes quoted-string words while preserving their literal contents,
613/// - decodes RFC 2047 encoded-words only in unquoted phrase spans.
614///
615/// # References
616/// - RFC 5322 Section 3.2.5 (phrase / display-name)
617/// - RFC 5322 Section 3.2.2 (comments)
618/// - RFC 2047 Section 5 (encoded-words in phrase context)
619pub(crate) fn normalize_display_name_phrase(name_part: &str) -> Option<String> {
620 let stripped = strip_comments(name_part);
621 let mut segments: Vec<String> = Vec::new();
622 let mut raw = String::new();
623 let mut quoted = String::new();
624 let mut in_quotes = false;
625 let mut escaped = false;
626
627 for c in stripped.chars() {
628 if in_quotes {
629 if escaped {
630 quoted.push(c);
631 escaped = false;
632 continue;
633 }
634
635 match c {
636 '\\' => {
637 escaped = true;
638 quoted.push(c);
639 }
640 '"' => {
641 let unescaped = unescape_quoted_string("ed);
642 if !unescaped.is_empty() {
643 segments.push(unescaped);
644 }
645 quoted.clear();
646 in_quotes = false;
647 }
648 _ => quoted.push(c),
649 }
650 } else if c == '"' {
651 push_decoded_phrase_segment(&mut segments, &raw);
652 raw.clear();
653 in_quotes = true;
654 } else {
655 raw.push(c);
656 }
657 }
658
659 // Unterminated quoted-string: fall back to treating the remainder as a
660 // raw phrase fragment rather than dropping it outright (Postel's law,
661 // RFC 1122 Section 1.2.2).
662 if in_quotes {
663 raw.push('"');
664 raw.push_str("ed);
665 }
666 push_decoded_phrase_segment(&mut segments, &raw);
667
668 if segments.is_empty() {
669 None
670 } else {
671 Some(segments.join(" "))
672 }
673}
674
675/// Normalizes an unquoted phrase span by collapsing CFWS-equivalent WSP.
676///
677/// # References
678/// - RFC 5322 Section 3.2.5 (phrase whitespace)
679fn normalize_phrase_whitespace(input: &str) -> String {
680 input.split_ascii_whitespace().collect::<Vec<_>>().join(" ")
681}
682
683/// Decodes one unquoted `phrase` span and appends it to `segments` when it
684/// carries any semantic content.
685///
686/// # References
687/// - RFC 5322 Section 3.2.5 (phrase)
688/// - RFC 2047 Section 5 (encoded-words in phrases)
689fn push_decoded_phrase_segment(segments: &mut Vec<String>, raw: &str) {
690 let normalized = normalize_phrase_whitespace(raw);
691 if normalized.is_empty() {
692 return;
693 }
694
695 let decoded = encoded_words::decode_encoded_words(&normalized);
696 let decoded = normalize_phrase_whitespace(&decoded);
697 if !decoded.is_empty() {
698 segments.push(decoded);
699 }
700}
701
702/// Unescapes a quoted-string: removes backslash from `\\` → `\` and `\"` → `"`.
703///
704/// Per RFC 5322 Section 3.2.4, a `quoted-pair` is `"\" (VCHAR / WSP)`.
705///
706/// # References
707/// - RFC 5322 Section 3.2.4 (quoted-pair)
708pub(crate) fn unescape_quoted_string(input: &str) -> String {
709 let mut result = String::with_capacity(input.len());
710 let mut chars = input.chars();
711 while let Some(c) = chars.next() {
712 if c == '\\' {
713 // Consume the escaped character (RFC 5322 Section 3.2.4)
714 if let Some(next) = chars.next() {
715 result.push(next);
716 } else {
717 result.push(c);
718 }
719 } else {
720 result.push(c);
721 }
722 }
723 result
724}