daaki-message 0.2.0

RFC 5322 email message parser and builder
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
//! RFC 5322 Section 3.4 address parsing.
//!
//! Parses comma-separated address lists, single addresses (name-addr and
//! addr-spec), RFC 5322 group syntax, parenthesized comments, and
//! display-name phrase normalization.
//!
//! # References
//! - RFC 5322 Section 3.4 (address specification)
//! - RFC 5322 Section 3.2.2 (comments)
//! - RFC 5322 Section 3.2.4 (quoted-string)
//! - RFC 5322 Section 3.2.5 (phrase / display-name)
//! - RFC 2047 Section 5 (encoded-words in phrase context)

use super::{encoded_words, get_header_value};

use crate::types::Address;

/// Extracts all `From` addresses.
///
/// RFC 5322 Section 3.6.2: `from = "From:" mailbox-list CRLF` — multiple
/// originator mailboxes are valid and all must be preserved.
///
/// Address structure is parsed first on the raw header value, then RFC 2047
/// encoded words are decoded in each address's display name. Decoding before
/// parsing would break address splitting when an encoded-word display name
/// contains address-significant characters (`,`, `<`, `>`, `:`, `;`).
///
/// Extracts addresses from ALL occurrences of the `From` header.
///
/// RFC 5322 Section 3.6 specifies that `From` SHOULD appear at most once.
/// However, broken mailers sometimes produce duplicate headers. Per Postel's
/// law ("be liberal in what you accept"), we concatenate addresses from every
/// occurrence — consistent with how [`extract_address_list`] handles
/// To/Cc/Bcc/Reply-To.
///
/// # References
/// - RFC 5322 Section 3.6.2 — originator fields (from = mailbox-list)
/// - RFC 2047 Section 5 rule (3) — encoded-words in phrase context
/// - RFC 5322 Section 3.4 — address specification
pub(crate) fn extract_from(headers: &[(String, String)]) -> Vec<Address> {
    // Iterate ALL matching "from" headers, not just the first, so that
    // duplicate From headers produced by broken mailers are concatenated
    // rather than silently dropped.
    headers
        .iter()
        .filter(|(k, _)| k == "from")
        .flat_map(|(_, v)| decode_address_names(parse_address_list(v)))
        .collect()
}

/// Extracts the Sender mailbox from the `Sender` header (RFC 5322 Section 3.6.2).
///
/// Unlike `From` (which is a `mailbox-list`), `Sender` contains exactly one
/// `mailbox`. If the header contains multiple addresses, only the first is
/// used (Postel's law — be liberal in what you accept).
///
/// Returns `None` when the `Sender` header is absent.
///
/// # References
/// - RFC 5322 Section 3.6.2 (sender field)
pub(crate) fn extract_sender(headers: &[(String, String)]) -> Option<Address> {
    let value = get_header_value(headers, "sender")?;
    // Reuse the same parse-then-decode pipeline as From/To/Cc.
    // RFC 5322 Section 3.6.2: sender = "Sender:" mailbox CRLF
    let addrs = decode_address_names(parse_address_list(&value));
    addrs.into_iter().next()
}

/// Extracts an address list from ALL occurrences of the named header.
///
/// RFC 5322 Section 3.6 specifies that destination address fields (To, Cc,
/// Bcc) and Reply-To SHOULD appear at most once. However, broken mailers
/// sometimes produce duplicate headers. Per Postel's law ("be liberal in
/// what you accept"), we concatenate addresses from every occurrence to
/// avoid silently dropping recipients.
///
/// Parses address structure first, then decodes RFC 2047 encoded words in
/// display names — see [`extract_from`] for rationale.
///
/// # References
/// - RFC 5322 Section 3.6.3 (destination address fields)
pub(crate) fn extract_address_list(headers: &[(String, String)], name: &str) -> Vec<Address> {
    headers
        .iter()
        .filter(|(k, _)| k == name)
        .flat_map(|(_, v)| decode_address_names(parse_address_list(v)))
        .collect()
}

/// Returns addresses unchanged — RFC 2047 decoding is now performed inside
/// [`parse_single_address`] where the quoted-string vs unquoted-phrase
/// context is known.
///
/// RFC 2047 Section 5: encoded-words MUST NOT appear inside a quoted-string.
/// By decoding only in the unquoted-phrase path (and comment path) within
/// `parse_single_address`, we correctly preserve encoded-word literals that
/// appear inside quoted-strings.
///
/// This function is retained as a pass-through to avoid churning callers.
///
/// # References
/// - RFC 2047 Section 5 (encoded-words placement rules)
fn decode_address_names(addrs: Vec<Address>) -> Vec<Address> {
    addrs
}

/// Parses a comma-separated address list, respecting quoted strings, angle
/// brackets, parenthesized comments, and RFC 5322 group syntax
/// (RFC 5322 Section 3.4).
///
/// This is the **liberal** address parser used internally to interpret
/// inbound `From`/`To`/`Cc`/`Bcc`/`Reply-To`/`Sender` headers, exposed
/// publicly so consumers can apply the same Postel-compliant parsing to
/// other "be liberal in what you accept" inputs — for example,
/// user-typed recipient strings in a compose form, or addresses already
/// extracted from an IMAP `ENVELOPE` response.
///
/// # Behavior
///
/// - Returns `Vec<Address>` with one entry per recognized mailbox. The
///   parser never errors: malformed segments are best-effort-recovered
///   or silently dropped (Postel's law, RFC 1122 Section 1.2.2).
/// - Group syntax (`display-name ":" [group-list] ";"`) is unwrapped
///   and member addresses are flattened into the result. Empty groups
///   (e.g., `undisclosed-recipients:;`) contribute no addresses.
/// - Parenthesized comments (RFC 5322 Section 3.2.2) may appear in
///   addr-spec CFWS contexts and can contain commas, angle brackets,
///   and other address-significant characters; these are not treated
///   as separators.
/// - Domain-literals (`[192.0.2.1]`, `[IPv6:...]`) are preserved
///   intact per RFC 5321 Section 4.1.3.
/// - Display names are normalized: quoted-strings are unescaped, CFWS
///   comments are stripped, and RFC 2047 encoded-words are decoded
///   only in unquoted phrase spans (RFC 2047 Section 5 rule (3)).
///
/// # No outgoing validation
///
/// The returned [`Address`] records are constructed via
/// [`Address::new_unchecked`] and may contain syntax that is technically
/// non-conformant but still meaningful — exactly what is needed when
/// receiving from the network. **They have not been validated against
/// the strict outgoing-mail rules in RFC 5322 Section 3.4.**
///
/// If you are about to send mail — or otherwise need to enforce strict
/// validation — pass each result through [`Address::new`] or
/// [`Address::with_name`] afterwards. Those constructors apply the same
/// rules the message builder uses and will reject malformed input at
/// construction time rather than at send time.
///
/// # Input expectations
///
/// The input is a single, already-decoded address-list string.
/// This function does **not** perform RFC 5322 Section 2.2.3 header
/// unfolding, charset detection, or transfer-encoding decoding.
/// Feeding it raw header bytes with CRLF folds, 8-bit content from
/// unknown charsets, or quoted-printable sequences will produce wrong
/// results — use [`parse_email`](crate::parse_email) for raw message
/// bytes, and use this function for text that has already crossed the
/// wire/semantic boundary (user input in a UTF-8 terminal, a decoded
/// header value, etc.).
///
/// # Example
///
/// ```
/// use daaki_message::{parse_address_list, Address};
///
/// let raw = r#""Doe, Jane" <jane@example.com>, alice@example.com"#;
/// let addrs = parse_address_list(raw);
///
/// assert_eq!(addrs.len(), 2);
/// assert_eq!(addrs[0].name.as_deref(), Some("Doe, Jane"));
/// assert_eq!(addrs[0].email, "jane@example.com");
/// assert_eq!(addrs[1].name, None);
/// assert_eq!(addrs[1].email, "alice@example.com");
///
/// // For outgoing mail, re-validate each result through the strict
/// // constructors so malformed input is rejected before send time.
/// let validated: Result<Vec<Address>, _> = addrs
///     .into_iter()
///     .map(|a| match a.name {
///         Some(name) => Address::with_name(name, a.email),
///         None => Address::new(a.email),
///     })
///     .collect();
/// assert!(validated.is_ok());
/// ```
///
/// # References
/// - RFC 5322 Section 3.4 (address specification)
/// - RFC 5322 Section 3.2.2 (comments)
/// - RFC 5322 Section 3.2.4 (quoted-string)
/// - RFC 5322 Section 3.2.5 (phrase / display-name)
/// - RFC 5321 Section 4.1.3 (domain-literal)
/// - RFC 2047 Section 5 (encoded-words in phrase context)
/// - RFC 1122 Section 1.2.2 (robustness principle)
pub fn parse_address_list(input: &str) -> Vec<Address> {
    let mut addresses = Vec::new();
    let mut current = String::new();
    let mut in_quotes = false;
    let mut escaped = false;
    let mut angle_depth: i32 = 0;
    // Track parenthesized comment depth (RFC 5322 Section 3.2.2).
    // Commas and other structural characters inside comments must not
    // be treated as address separators.
    let mut paren_depth: i32 = 0;
    // Track whether we're inside a group construct (after ':' but before ';').
    // RFC 5322 Section 3.4: group = display-name ":" [group-list] ";"
    let mut in_group = false;
    // Track whether we're inside a domain-literal `[...]`
    // (RFC 5321 Section 4.1.3: domain-literal = "[" *dtext "]").
    // Characters inside brackets (e.g., commas in IPv6 or non-standard
    // domain-literals) must not be treated as structural separators.
    let mut in_brackets = false;

    for ch in input.chars() {
        // Inside a quoted-string, a backslash escapes the next character
        // (RFC 5322 Section 3.2.4 quoted-pair).
        if escaped {
            current.push(ch);
            escaped = false;
            continue;
        }
        match ch {
            '\\' if in_quotes || paren_depth > 0 => {
                // Backslash escapes next character in quoted-strings
                // (RFC 5322 Section 3.2.4) and inside comments
                // (RFC 5322 Section 3.2.2 quoted-pair in ccontent).
                escaped = true;
                current.push(ch);
            }
            '"' if paren_depth == 0 => {
                in_quotes = !in_quotes;
                current.push(ch);
            }
            // RFC 5322 Section 3.2.2: parenthesized comments may be nested.
            // Track depth so that commas inside comments are not treated as
            // address separators.
            '(' if !in_quotes => {
                paren_depth += 1;
                current.push(ch);
            }
            ')' if !in_quotes && paren_depth > 0 => {
                paren_depth -= 1;
                current.push(ch);
            }
            // RFC 5321 Section 4.1.3: domain-literal = "[" *dtext "]".
            // Track bracket depth so that commas and other structural
            // characters inside domain-literals are not misinterpreted.
            '[' if !in_quotes && paren_depth == 0 => {
                in_brackets = true;
                current.push(ch);
            }
            ']' if !in_quotes && paren_depth == 0 && in_brackets => {
                in_brackets = false;
                current.push(ch);
            }
            '<' if !in_quotes && paren_depth == 0 => {
                angle_depth += 1;
                current.push(ch);
            }
            '>' if !in_quotes && paren_depth == 0 && angle_depth > 0 => {
                angle_depth -= 1;
                current.push(ch);
            }
            // RFC 5322 Section 3.4: ':' starts a group construct when
            // we're not inside quotes, angle brackets, comments, or an
            // existing group.
            // Heuristic: only treat as group if the current token contains
            // no '@' outside of quoted strings and parenthesized comments
            // (i.e., it's a display-name, not a bare addr-spec).  An '@'
            // inside a quoted display-name (e.g., `"user@host":`) or a
            // comment (e.g., `Group (user@host):`) must not prevent
            // group detection (RFC 5322 Sections 3.2.2, 3.2.4).
            ':' if !in_quotes
                && angle_depth == 0
                && paren_depth == 0
                && !in_group
                && !in_brackets =>
            {
                if contains_at_outside_quotes(current.trim()) {
                    current.push(ch);
                } else {
                    // Enter group: discard the display-name portion
                    in_group = true;
                    current.clear();
                }
            }
            // RFC 5322 Section 3.4: ';' terminates the group construct.
            ';' if !in_quotes
                && angle_depth == 0
                && paren_depth == 0
                && in_group
                && !in_brackets =>
            {
                // Emit any pending address inside the group
                if let Some(addr) = parse_single_address(&current) {
                    addresses.push(addr);
                }
                current.clear();
                in_group = false;
            }
            ',' if !in_quotes && angle_depth == 0 && paren_depth == 0 && !in_brackets => {
                if let Some(addr) = parse_single_address(&current) {
                    addresses.push(addr);
                }
                current.clear();
            }
            _ => current.push(ch),
        }
    }
    if let Some(addr) = parse_single_address(&current) {
        addresses.push(addr);
    }

    addresses
}

/// Parses a single address: either `Display Name <email>` or bare `email`.
///
/// Handles RFC 5322 Section 3.2.2 comments (parenthesized text) that may
/// appear before or after a bare addr-spec per Section 3.4.1 CFWS rules.
/// A trailing comment like `(Display Name)` is used as the display name,
/// following the common RFC 822 convention.
///
/// # References
/// - RFC 5322 Section 3.4 (address specification)
/// - RFC 5322 Section 3.4.1 (addr-spec)
/// - RFC 5322 Section 3.2.2 (comments)
pub(crate) fn parse_single_address(input: &str) -> Option<Address> {
    let input = input.trim();
    if input.is_empty() {
        return None;
    }

    // Try "Display Name <email@domain>" form (RFC 5322 Section 3.4)
    if let Some(angle_start) = input.rfind('<') {
        if let Some(angle_end) = input.rfind('>') {
            if angle_end > angle_start {
                let mut email = input[angle_start + 1..angle_end].trim().to_string();
                // RFC 5322 Section 4.4: strip obsolete source route
                // (obs-route = obs-domain-list ":"). Example:
                // `<@hop1,@hop2:user@domain>` → `user@domain`.
                if email.starts_with('@') {
                    if let Some(colon) = email.find(':') {
                        email = email[colon + 1..].trim().to_string();
                    }
                }
                let name_part = input[..angle_start].trim();
                let name = normalize_display_name_phrase(name_part);
                if !email.is_empty() {
                    return Some(Address { name, email });
                }
            }
        }
    }

    // Bare email address — may have RFC 5322 Section 3.2.2 comments
    // (parenthesized text) before or after the addr-spec per Section 3.4.1.
    //
    // Use `contains_at_outside_quotes` instead of plain `contains('@')`
    // so that a quoted local-part containing `@` (e.g., `"user@internal"`)
    // is not mistaken for an addr-spec when there is no structural `@`
    // outside the quoted-string (RFC 5322 Section 3.4.1).
    if contains_at_outside_quotes(input) {
        // Check for a trailing comment like "user@example.com (Display Name)".
        // RFC 822 convention: trailing parenthesized comment is the display name.
        //
        // Use `find_paren_outside_quotes` instead of plain `find('(')` so
        // that parentheses inside a quoted local-part (RFC 5322 Section 3.2.4)
        // are not mistaken for comment delimiters.
        if let Some(paren_start) = find_paren_outside_quotes(input) {
            let email_part = input[..paren_start].trim();
            let comment_and_rest = input[paren_start..].trim();
            let name = if !email_part.is_empty() && contains_at_outside_quotes(email_part) {
                // Trailing comment: extract text between parentheses
                // as display name (RFC 822 convention, RFC 5322 Section 3.4.1 CFWS).
                // Decode RFC 2047 encoded words in the comment text
                // (RFC 2047 Section 5 rule (2): encoded-words may appear in comments).
                extract_comment_text(comment_and_rest)
                    .map(|n| encoded_words::decode_encoded_words(&n))
            } else if email_part.is_empty() || !contains_at_outside_quotes(email_part) {
                // Leading comment: the comment appears before the addr-spec.
                // RFC 5322 Section 3.2.2 allows comments in CFWS positions,
                // and the common RFC 822 convention uses a leading comment as
                // the display name (e.g., `(John Doe) user@example.com`).
                // Verify the text after the comment contains an addr-spec.
                let after_comment = strip_comments(comment_and_rest);
                if contains_at_outside_quotes(after_comment.trim()) {
                    extract_comment_text(comment_and_rest)
                        .map(|n| encoded_words::decode_encoded_words(&n))
                } else {
                    None
                }
            } else {
                None
            };
            // Strip all comments to get the bare addr-spec
            // (RFC 5322 Section 3.2.2)
            let stripped = strip_comments(input);
            let email = stripped.trim().to_string();
            if !email.is_empty() && contains_at_outside_quotes(&email) {
                return Some(Address { name, email });
            }
        }
        return Some(Address {
            name: None,
            email: input.to_string(),
        });
    }

    None
}

/// Extracts the text content from a parenthesized RFC 5322 comment string.
///
/// Given a string like `(Display Name)`, returns `Some("Display Name")`.
/// Handles nested parentheses and backslash-escaped characters per
/// RFC 5322 Section 3.2.2.
///
/// # References
/// - RFC 5322 Section 3.2.2 (comment syntax)
pub(crate) fn extract_comment_text(s: &str) -> Option<String> {
    let s = s.trim();
    if !s.starts_with('(') {
        return None;
    }
    // Find the matching closing paren, respecting nesting and escapes
    let mut depth: u32 = 0;
    let mut result = String::new();
    let mut escaped = false;
    let mut started = false;
    for c in s.chars() {
        if escaped {
            escaped = false;
            result.push(c);
            continue;
        }
        match c {
            '\\' => {
                escaped = true;
            }
            '(' => {
                if started {
                    // Nested paren — include literally
                    result.push(c);
                }
                depth = depth.saturating_add(1);
                started = true;
            }
            ')' => {
                depth = depth.saturating_sub(1);
                if depth == 0 {
                    break;
                }
                // Nested closing paren — include literally
                result.push(c);
            }
            _ => {
                if depth > 0 {
                    result.push(c);
                }
            }
        }
    }
    let trimmed = result.trim().to_string();
    if trimmed.is_empty() {
        None
    } else {
        Some(trimmed)
    }
}

/// Returns `true` if `s` contains an `@` character outside of quoted strings
/// and parenthesized comments.
///
/// Used by the group-address heuristic: an `@` inside a quoted display-name
/// (e.g., `"user@host"`) or inside a parenthesized comment (e.g.,
/// `Group (user@host):`) is not an addr-spec indicator and must not prevent
/// recognition of group syntax (RFC 5322 Section 3.4).
///
/// # References
/// - RFC 5322 Section 3.4 (group syntax)
/// - RFC 5322 Section 3.2.2 (comment, quoted-pair inside comments)
/// - RFC 5322 Section 3.2.4 (quoted-string, quoted-pair)
pub(crate) fn contains_at_outside_quotes(s: &str) -> bool {
    let mut in_quotes = false;
    let mut paren_depth: u32 = 0;
    let mut escaped = false;
    for c in s.chars() {
        if escaped {
            escaped = false;
            continue;
        }
        match c {
            // Quoted-pair: backslash escapes the next character inside
            // quoted-strings (RFC 5322 Section 3.2.4) and comments
            // (RFC 5322 Section 3.2.2).
            '\\' if in_quotes || paren_depth > 0 => escaped = true,
            '"' if paren_depth == 0 => in_quotes = !in_quotes,
            // RFC 5322 Section 3.2.2: comments nest and are delimited by
            // parentheses.  Only track outside of quoted strings.
            '(' if !in_quotes => paren_depth = paren_depth.saturating_add(1),
            ')' if !in_quotes && paren_depth > 0 => paren_depth -= 1,
            '@' if !in_quotes && paren_depth == 0 => return true,
            _ => {}
        }
    }
    false
}

/// Returns the byte offset of the first `(` that is not inside a quoted-string.
///
/// Parentheses inside a quoted local-part (e.g., `"user(foo)"@example.com`)
/// are literal per RFC 5322 Section 3.2.4 and must not be treated as comment
/// delimiters. This function walks the string respecting quoted-string
/// boundaries so that only structural `(` characters are found.
///
/// # References
/// - RFC 5322 Section 3.2.2 (comment syntax)
/// - RFC 5322 Section 3.2.4 (quoted-string)
pub(crate) fn find_paren_outside_quotes(s: &str) -> Option<usize> {
    let mut in_quotes = false;
    let mut escaped = false;
    for (i, c) in s.char_indices() {
        if escaped {
            escaped = false;
            continue;
        }
        match c {
            // Quoted-pair: backslash escapes the next character inside
            // quoted-strings (RFC 5322 Section 3.2.4).
            '\\' if in_quotes => escaped = true,
            '"' => in_quotes = !in_quotes,
            '(' if !in_quotes => return Some(i),
            _ => {}
        }
    }
    None
}

/// Strips parenthesized comments from a string.
///
/// RFC 5322 Section 3.2.2 defines comments as text enclosed in parentheses,
/// which may be nested. A backslash escapes the next character inside a comment.
/// Parentheses inside quoted-strings (RFC 5322 Section 3.2.4) are literal
/// characters and do not open or close comments.
///
/// # References
/// - RFC 5322 Section 3.2.2 (comment syntax)
/// - RFC 5322 Section 3.2.4 (quoted-string: parens are literal inside quotes)
/// - RFC 5322 Section 4.3 (CFWS in obsolete date syntax)
pub(crate) fn strip_comments(input: &str) -> String {
    let mut result = String::with_capacity(input.len());
    let mut depth: u32 = 0;
    let mut escaped = false;
    let mut in_quotes = false;
    for c in input.chars() {
        if escaped {
            escaped = false;
            if depth == 0 {
                result.push(c);
            }
            continue;
        }
        // Inside a quoted-string, only backslash and closing quote are special.
        // Parentheses are literal per RFC 5322 Section 3.2.4.
        if in_quotes && depth == 0 {
            match c {
                '\\' => {
                    escaped = true;
                    result.push(c);
                }
                '"' => {
                    in_quotes = false;
                    result.push(c);
                }
                _ => result.push(c),
            }
            continue;
        }
        match c {
            '\\' => {
                escaped = true;
                if depth == 0 {
                    result.push(c);
                }
            }
            '"' if depth == 0 => {
                in_quotes = true;
                result.push(c);
            }
            '(' => depth = depth.saturating_add(1),
            ')' if depth > 0 => depth = depth.saturating_sub(1),
            _ if depth == 0 => result.push(c),
            _ => {}
        }
    }
    result
}

/// Normalizes a `display-name` phrase from a `name-addr`.
///
/// RFC 5322 Section 3.2.5 defines `display-name = phrase`, where each `word`
/// may be either an atom or a quoted-string. RFC 5322 Section 3.2.2 allows
/// CFWS comments between those words, but comments are semantically
/// invisible. RFC 2047 Section 5 additionally allows encoded-words only in
/// the unquoted phrase context, never inside quoted-strings.
///
/// This helper therefore:
/// - strips CFWS comments from the phrase,
/// - collapses inter-word WSP to single spaces,
/// - unquotes quoted-string words while preserving their literal contents,
/// - decodes RFC 2047 encoded-words only in unquoted phrase spans.
///
/// # References
/// - RFC 5322 Section 3.2.5 (phrase / display-name)
/// - RFC 5322 Section 3.2.2 (comments)
/// - RFC 2047 Section 5 (encoded-words in phrase context)
pub(crate) fn normalize_display_name_phrase(name_part: &str) -> Option<String> {
    let stripped = strip_comments(name_part);
    let mut segments: Vec<String> = Vec::new();
    let mut raw = String::new();
    let mut quoted = String::new();
    let mut in_quotes = false;
    let mut escaped = false;

    for c in stripped.chars() {
        if in_quotes {
            if escaped {
                quoted.push(c);
                escaped = false;
                continue;
            }

            match c {
                '\\' => {
                    escaped = true;
                    quoted.push(c);
                }
                '"' => {
                    let unescaped = unescape_quoted_string(&quoted);
                    if !unescaped.is_empty() {
                        segments.push(unescaped);
                    }
                    quoted.clear();
                    in_quotes = false;
                }
                _ => quoted.push(c),
            }
        } else if c == '"' {
            push_decoded_phrase_segment(&mut segments, &raw);
            raw.clear();
            in_quotes = true;
        } else {
            raw.push(c);
        }
    }

    // Unterminated quoted-string: fall back to treating the remainder as a
    // raw phrase fragment rather than dropping it outright (Postel's law,
    // RFC 1122 Section 1.2.2).
    if in_quotes {
        raw.push('"');
        raw.push_str(&quoted);
    }
    push_decoded_phrase_segment(&mut segments, &raw);

    if segments.is_empty() {
        None
    } else {
        Some(segments.join(" "))
    }
}

/// Normalizes an unquoted phrase span by collapsing CFWS-equivalent WSP.
///
/// # References
/// - RFC 5322 Section 3.2.5 (phrase whitespace)
fn normalize_phrase_whitespace(input: &str) -> String {
    input.split_ascii_whitespace().collect::<Vec<_>>().join(" ")
}

/// Decodes one unquoted `phrase` span and appends it to `segments` when it
/// carries any semantic content.
///
/// # References
/// - RFC 5322 Section 3.2.5 (phrase)
/// - RFC 2047 Section 5 (encoded-words in phrases)
fn push_decoded_phrase_segment(segments: &mut Vec<String>, raw: &str) {
    let normalized = normalize_phrase_whitespace(raw);
    if normalized.is_empty() {
        return;
    }

    let decoded = encoded_words::decode_encoded_words(&normalized);
    let decoded = normalize_phrase_whitespace(&decoded);
    if !decoded.is_empty() {
        segments.push(decoded);
    }
}

/// Unescapes a quoted-string: removes backslash from `\\` → `\` and `\"` → `"`.
///
/// Per RFC 5322 Section 3.2.4, a `quoted-pair` is `"\" (VCHAR / WSP)`.
///
/// # References
/// - RFC 5322 Section 3.2.4 (quoted-pair)
pub(crate) fn unescape_quoted_string(input: &str) -> String {
    let mut result = String::with_capacity(input.len());
    let mut chars = input.chars();
    while let Some(c) = chars.next() {
        if c == '\\' {
            // Consume the escaped character (RFC 5322 Section 3.2.4)
            if let Some(next) = chars.next() {
                result.push(next);
            } else {
                result.push(c);
            }
        } else {
            result.push(c);
        }
    }
    result
}