Skip to main content

structured_email_address/
lib.rs

1//! # structured-email-address
2//!
3//! RFC 5321/5322/6531 conformant email address parser, validator, and normalizer.
4//!
5//! Unlike existing Rust crates that stop at RFC validation, this crate provides:
6//! - **Subaddress extraction**: `user+tag@domain` → separate `user`, `tag`, `domain`
7//! - **Provider-aware normalization**: Gmail dot-stripping, configurable case folding
8//! - **PSL domain validation**: verify domain against the Public Suffix List
9//! - **Anti-homoglyph protection**: detect Cyrillic/Latin lookalikes via Unicode skeleton
10//! - **Configurable strictness**: Strict (5321), Standard (5322), Lax (obs-* allowed)
11//! - **Zero-copy parsing**: internal spans into the input string
12//!
13//! # Quick Start
14//!
15//! ```
16//! use structured_email_address::{EmailAddress, Config};
17//!
18//! // Simple: parse with defaults
19//! let email: EmailAddress = "user+tag@example.com".parse().unwrap();
20//! assert_eq!(email.local_part(), "user+tag");
21//! assert_eq!(email.tag(), Some("tag"));
22//! assert_eq!(email.domain(), "example.com");
23//!
24//! // Configured: Gmail normalization pipeline
25//! let config = Config::builder()
26//!     .strip_subaddress()
27//!     .dots_gmail_only()
28//!     .lowercase_all()
29//!     .build();
30//!
31//! let email = EmailAddress::parse_with("A.L.I.C.E+promo@Gmail.COM", &config).unwrap();
32//! assert_eq!(email.canonical(), "alice@gmail.com");
33//! assert_eq!(email.tag(), Some("promo"));
34//! ```
35
36#![cfg_attr(
37    not(test),
38    deny(clippy::unwrap_used, clippy::expect_used, clippy::panic)
39)]
40
41mod config;
42mod error;
43mod normalize;
44mod parser;
45mod validate;
46
47pub use config::{
48    CasePolicy, Config, ConfigBuilder, DomainCheck, DotPolicy, Strictness, SubaddressPolicy,
49};
50pub use error::{Error, ErrorKind};
51pub use normalize::confusable_skeleton;
52
53/// A parsed, validated, and normalized email address.
54///
55/// Immutable after construction. All accessors return borrowed data.
56#[derive(Debug, Clone)]
57pub struct EmailAddress {
58    /// Original input (trimmed).
59    original: String,
60    /// Canonical local part (after normalization).
61    local_part: String,
62    /// Extracted subaddress tag, if any.
63    tag: Option<String>,
64    /// Canonical domain (IDNA-encoded, lowercased).
65    domain: String,
66    /// Unicode form of the domain (only when domain has punycode labels).
67    domain_unicode: Option<String>,
68    /// Display name, if parsed from `name-addr` format.
69    display_name: Option<String>,
70    /// Confusable skeleton, if config enabled it.
71    skeleton: Option<String>,
72}
73
74impl EmailAddress {
75    /// Parse and validate with the given configuration.
76    pub fn parse_with(input: &str, config: &Config) -> Result<Self, Error> {
77        let parsed = parser::parse(
78            input,
79            config.strictness,
80            config.allow_display_name,
81            config.allow_domain_literal,
82        )?;
83
84        let normalized = normalize::normalize(&parsed, config)?;
85        validate::validate(&parsed, &normalized, config)?;
86
87        Ok(Self {
88            original: parsed.input.to_string(),
89            local_part: normalized.local_part,
90            tag: normalized.tag,
91            domain: normalized.domain,
92            domain_unicode: normalized.domain_unicode,
93            display_name: normalized.display_name,
94            skeleton: normalized.skeleton,
95        })
96    }
97
98    /// The canonical local part (after normalization).
99    ///
100    /// If subaddress stripping is enabled, this excludes the `+tag`.
101    /// If dot stripping is enabled, dots are removed.
102    pub fn local_part(&self) -> &str {
103        &self.local_part
104    }
105
106    /// The extracted subaddress tag, if present.
107    ///
108    /// For `user+promo@example.com`, returns `Some("promo")`.
109    /// Always extracted regardless of [`SubaddressPolicy`] — the policy only
110    /// affects whether it appears in [`canonical()`](Self::canonical).
111    pub fn tag(&self) -> Option<&str> {
112        self.tag.as_deref()
113    }
114
115    /// The canonical domain (IDNA-encoded, lowercased).
116    pub fn domain(&self) -> &str {
117        &self.domain
118    }
119
120    /// The canonical domain in Unicode form.
121    ///
122    /// For internationalized domains (`münchen.de` → `xn--mnchen-3ya.de`),
123    /// returns the Unicode form of the canonical domain. For ASCII-only
124    /// domains, returns the same value as [`domain()`](Self::domain).
125    ///
126    /// # Security
127    ///
128    /// The Unicode form is intended for **display only**. It may reintroduce
129    /// [IDN homograph attacks](https://en.wikipedia.org/wiki/IDN_homograph_attack)
130    /// where visually similar characters from different scripts produce
131    /// different domain names (e.g. Cyrillic `а` vs Latin `a`).
132    ///
133    /// For security-sensitive comparisons (allow-lists, deduplication, access
134    /// control), always use [`domain()`](Self::domain) which returns the
135    /// ACE/Punycode form. If you must compare Unicode domains, apply your own
136    /// confusable-detection logic (see [`confusable_skeleton()`]).
137    ///
138    /// ```
139    /// use structured_email_address::EmailAddress;
140    ///
141    /// let email: EmailAddress = "user@münchen.de".parse().unwrap();
142    /// assert_eq!(email.domain(), "xn--mnchen-3ya.de");
143    /// assert_eq!(email.domain_unicode(), "münchen.de");
144    ///
145    /// let ascii: EmailAddress = "user@example.com".parse().unwrap();
146    /// assert_eq!(ascii.domain_unicode(), "example.com");
147    /// ```
148    pub fn domain_unicode(&self) -> &str {
149        self.domain_unicode.as_deref().unwrap_or(&self.domain)
150    }
151
152    /// The display name, if parsed from `"Name" <addr>` or `Name <addr>` format.
153    pub fn display_name(&self) -> Option<&str> {
154        self.display_name.as_deref()
155    }
156
157    /// The full canonical address: `local_part@domain`.
158    ///
159    /// If the local part contains characters that require quoting (spaces,
160    /// special chars), it is wrapped in quotes for RFC compliance.
161    pub fn canonical(&self) -> String {
162        if needs_quoting(&self.local_part) {
163            let escaped = escape_local_part(&self.local_part);
164            format!("\"{}\"@{}", escaped, self.domain)
165        } else {
166            format!("{}@{}", self.local_part, self.domain)
167        }
168    }
169
170    /// The original input (trimmed).
171    pub fn original(&self) -> &str {
172        &self.original
173    }
174
175    /// The confusable skeleton of the local part (if config enabled it).
176    ///
177    /// Two addresses with the same skeleton + domain are visually confusable.
178    pub fn skeleton(&self) -> Option<&str> {
179        self.skeleton.as_deref()
180    }
181
182    /// Check if the domain is a well-known freemail provider.
183    pub fn is_freemail(&self) -> bool {
184        is_freemail_domain(&self.domain)
185    }
186
187    /// Parse a batch of email addresses with the given configuration.
188    ///
189    /// Returns one `Result` per input, in the same order. The config is
190    /// shared across all inputs, amortizing setup cost.
191    ///
192    /// # Example
193    ///
194    /// ```
195    /// use structured_email_address::{EmailAddress, Config};
196    ///
197    /// let config = Config::default();
198    /// let results = EmailAddress::parse_batch(
199    ///     &["alice@example.com", "invalid", "bob@example.org"],
200    ///     &config,
201    /// );
202    /// assert!(results[0].is_ok());
203    /// assert!(results[1].is_err());
204    /// assert!(results[2].is_ok());
205    /// ```
206    pub fn parse_batch(inputs: &[&str], config: &Config) -> Vec<Result<Self, Error>> {
207        inputs
208            .iter()
209            .map(|input| Self::parse_with(input, config))
210            .collect()
211    }
212
213    /// Parse a batch of email addresses in parallel using rayon.
214    ///
215    /// Same semantics as [`parse_batch`](Self::parse_batch), but distributes
216    /// work across rayon's thread pool. Useful for bulk import/validation of
217    /// large lists (10K+ addresses).
218    ///
219    /// Requires the `rayon` feature.
220    ///
221    /// # Example
222    ///
223    /// ```
224    /// use structured_email_address::{EmailAddress, Config};
225    ///
226    /// let config = Config::default();
227    /// let results = EmailAddress::parse_batch_par(
228    ///     &["alice@example.com", "bob@example.org"],
229    ///     &config,
230    /// );
231    /// assert!(results.iter().all(|r| r.is_ok()));
232    /// ```
233    #[cfg(feature = "rayon")]
234    pub fn parse_batch_par(inputs: &[&str], config: &Config) -> Vec<Result<Self, Error>> {
235        use rayon::prelude::*;
236
237        inputs
238            .par_iter()
239            .map(|input| Self::parse_with(input, config))
240            .collect()
241    }
242}
243
244impl std::fmt::Display for EmailAddress {
245    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
246        let local = if needs_quoting(&self.local_part) {
247            format!("\"{}\"", escape_local_part(&self.local_part))
248        } else {
249            self.local_part.clone()
250        };
251        match &self.display_name {
252            Some(name) => write!(
253                f,
254                "\"{}\" <{}@{}>",
255                escape_display_name(name),
256                local,
257                self.domain
258            ),
259            None => write!(f, "{}@{}", local, self.domain),
260        }
261    }
262}
263
264/// Check if a local-part needs quoting for RFC 5321/5322 serialization.
265/// Returns true if the local part contains characters outside of atext.
266fn needs_quoting(local: &str) -> bool {
267    if local.is_empty() {
268        return true;
269    }
270    // Dots are only safe in valid dot-atom form (no leading/trailing/consecutive dots).
271    if local.starts_with('.') || local.ends_with('.') || local.contains("..") {
272        return true;
273    }
274    local.chars().any(|ch| {
275        !ch.is_ascii_alphanumeric()
276            && !matches!(
277                ch,
278                '!' | '#'
279                    | '$'
280                    | '%'
281                    | '&'
282                    | '\''
283                    | '*'
284                    | '+'
285                    | '-'
286                    | '/'
287                    | '='
288                    | '?'
289                    | '^'
290                    | '_'
291                    | '`'
292                    | '{'
293                    | '|'
294                    | '}'
295                    | '~'
296                    | '.'
297            )
298            && (ch as u32) < 0x80 // non-ASCII doesn't need quoting per RFC 6531
299    })
300}
301
302/// Escape a local-part for use inside quotes: backslash-escape `"` and `\`,
303/// strip CR/LF to prevent header injection (FWS is collapsed during normalization).
304fn escape_local_part(local: &str) -> String {
305    let mut escaped = String::with_capacity(local.len());
306    for ch in local.chars() {
307        match ch {
308            '"' | '\\' => {
309                escaped.push('\\');
310                escaped.push(ch);
311            }
312            '\r' | '\n' => {} // strip CRLF to prevent header injection
313            _ => escaped.push(ch),
314        }
315    }
316    escaped
317}
318
319/// Backslash-escapes `"` and `\`, and strips bare CR/LF to prevent
320/// header injection in serialized output.
321fn escape_display_name(name: &str) -> String {
322    let mut escaped = String::with_capacity(name.len());
323    for ch in name.chars() {
324        match ch {
325            '"' => {
326                escaped.push('\\');
327                escaped.push('"');
328            }
329            '\\' => {
330                escaped.push('\\');
331                escaped.push('\\');
332            }
333            '\r' | '\n' => {} // strip CRLF
334            _ => escaped.push(ch),
335        }
336    }
337    escaped
338}
339
340/// Equality is based on canonical form (`local_part` + `domain`) only.
341/// Display name, tag, and skeleton are intentionally excluded —
342/// `"John" <user@example.com>` equals `"Jane" <user@example.com>`
343/// because they route to the same mailbox.
344impl PartialEq for EmailAddress {
345    fn eq(&self, other: &Self) -> bool {
346        self.local_part == other.local_part && self.domain == other.domain
347    }
348}
349
350impl Eq for EmailAddress {}
351
352impl std::hash::Hash for EmailAddress {
353    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
354        self.local_part.hash(state);
355        self.domain.hash(state);
356    }
357}
358
359impl std::str::FromStr for EmailAddress {
360    type Err = Error;
361
362    fn from_str(s: &str) -> Result<Self, Self::Err> {
363        Self::parse_with(s, &Config::default())
364    }
365}
366
367#[cfg(feature = "serde")]
368impl serde::Serialize for EmailAddress {
369    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
370        self.canonical().serialize(serializer)
371    }
372}
373
374#[cfg(feature = "serde")]
375impl<'de> serde::Deserialize<'de> for EmailAddress {
376    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
377        let s = String::deserialize(deserializer)?;
378        s.parse().map_err(serde::de::Error::custom)
379    }
380}
381
382/// Check if a domain is a well-known freemail provider.
383fn is_freemail_domain(domain: &str) -> bool {
384    matches!(
385        domain,
386        "gmail.com"
387            | "googlemail.com"
388            | "yahoo.com"
389            | "yahoo.co.uk"
390            | "yahoo.co.jp"
391            | "outlook.com"
392            | "hotmail.com"
393            | "live.com"
394            | "msn.com"
395            | "aol.com"
396            | "protonmail.com"
397            | "proton.me"
398            | "icloud.com"
399            | "me.com"
400            | "mac.com"
401            | "mail.com"
402            | "zoho.com"
403            | "yandex.ru"
404            | "yandex.com"
405            | "mail.ru"
406            | "gmx.com"
407            | "gmx.de"
408            | "web.de"
409            | "tutanota.com"
410            | "tuta.io"
411            | "fastmail.com"
412    )
413}
414
415#[cfg(test)]
416mod tests {
417    use super::*;
418
419    // ── FromStr (default config) ──
420
421    #[test]
422    fn parse_simple() {
423        let email: EmailAddress = "user@example.com".parse().unwrap_or_else(|e| panic!("{e}"));
424        assert_eq!(email.local_part(), "user");
425        assert_eq!(email.domain(), "example.com");
426        assert_eq!(email.tag(), None);
427        assert_eq!(email.canonical(), "user@example.com");
428    }
429
430    #[test]
431    fn parse_with_tag() {
432        let email: EmailAddress = "user+newsletter@example.com"
433            .parse()
434            .unwrap_or_else(|e| panic!("{e}"));
435        assert_eq!(email.local_part(), "user+newsletter");
436        assert_eq!(email.tag(), Some("newsletter"));
437    }
438
439    #[test]
440    fn display_format() {
441        let email: EmailAddress = "user@example.com".parse().unwrap_or_else(|e| panic!("{e}"));
442        assert_eq!(format!("{email}"), "user@example.com");
443    }
444
445    #[test]
446    fn display_name_escaping() {
447        let config = Config::builder().allow_display_name().build();
448        // Display name with quotes should be escaped
449        let email = EmailAddress::parse_with("John \"Johnny\" Doe <user@example.com>", &config)
450            .unwrap_or_else(|e| panic!("{e}"));
451        let formatted = format!("{email}");
452        assert!(
453            formatted.contains("\\\"Johnny\\\""),
454            "Expected escaped quotes in: {formatted}"
455        );
456    }
457
458    #[test]
459    fn equality_by_canonical() {
460        let a: EmailAddress = "user@example.com".parse().unwrap_or_else(|e| panic!("{e}"));
461        let b: EmailAddress = "user@Example.COM".parse().unwrap_or_else(|e| panic!("{e}"));
462        // Default config: domain-only lowercase, so local parts same case → equal
463        assert_eq!(a, b);
464    }
465
466    #[test]
467    fn freemail_detection() {
468        let email: EmailAddress = "user@gmail.com".parse().unwrap_or_else(|e| panic!("{e}"));
469        assert!(email.is_freemail());
470
471        let email: EmailAddress = "user@company.com".parse().unwrap_or_else(|e| panic!("{e}"));
472        assert!(!email.is_freemail());
473    }
474
475    // ── Configured parsing ──
476
477    #[test]
478    fn full_normalization_pipeline() {
479        let config = Config::builder()
480            .strip_subaddress()
481            .dots_gmail_only()
482            .lowercase_all()
483            .check_confusables()
484            .build();
485
486        let email = EmailAddress::parse_with("A.L.I.C.E+promo@Gmail.COM", &config)
487            .unwrap_or_else(|e| panic!("{e}"));
488        assert_eq!(email.canonical(), "alice@gmail.com");
489        assert_eq!(email.tag(), Some("promo"));
490        assert!(email.skeleton().is_some());
491    }
492
493    #[test]
494    fn display_name_parsing() {
495        let config = Config::builder().allow_display_name().build();
496
497        let email = EmailAddress::parse_with("John Doe <user@example.com>", &config)
498            .unwrap_or_else(|e| panic!("{e}"));
499        assert_eq!(email.display_name(), Some("John Doe"));
500        assert_eq!(email.local_part(), "user");
501        assert_eq!(email.domain(), "example.com");
502    }
503
504    // ── Serde ──
505
506    #[cfg(feature = "serde")]
507    #[test]
508    fn serde_roundtrip() {
509        let email: EmailAddress = "user@example.com".parse().unwrap_or_else(|e| panic!("{e}"));
510        let json = serde_json::to_string(&email).unwrap_or_else(|e| panic!("{e}"));
511        assert_eq!(json, "\"user@example.com\"");
512
513        let back: EmailAddress = serde_json::from_str(&json).unwrap_or_else(|e| panic!("{e}"));
514        assert_eq!(email, back);
515    }
516
517    // ── Validation errors ──
518
519    #[test]
520    fn rejects_empty() {
521        let result: Result<EmailAddress, _> = "".parse();
522        assert!(result.is_err());
523    }
524
525    #[test]
526    fn rejects_no_domain_dot() {
527        let result: Result<EmailAddress, _> = "user@localhost".parse();
528        assert!(result.is_err());
529        assert!(matches!(result.unwrap_err().kind(), ErrorKind::DomainNoDot));
530    }
531
532    #[test]
533    fn allows_single_label_when_configured() {
534        let config = Config::builder().allow_single_label_domain().build();
535        let email =
536            EmailAddress::parse_with("user@localhost", &config).unwrap_or_else(|e| panic!("{e}"));
537        assert_eq!(email.domain(), "localhost");
538    }
539
540    // ── Batch parsing ──
541
542    #[test]
543    fn batch_parse_mixed_results() {
544        // Verifies that parse_batch returns Ok for valid and Err for invalid
545        // inputs, preserving input order.
546        let config = Config::default();
547        let results = EmailAddress::parse_batch(
548            &["alice@example.com", "invalid", "bob@example.org"],
549            &config,
550        );
551        assert_eq!(results.len(), 3);
552        assert!(results[0].is_ok());
553        assert!(results[1].is_err());
554        assert!(results[2].is_ok());
555        assert_eq!(results[0].as_ref().map(|e| e.domain()), Ok("example.com"));
556        assert_eq!(results[2].as_ref().map(|e| e.domain()), Ok("example.org"));
557    }
558
559    #[test]
560    fn batch_parse_empty_input() {
561        // Empty slice returns empty vec.
562        let config = Config::default();
563        let results = EmailAddress::parse_batch(&[], &config);
564        assert!(results.is_empty());
565    }
566
567    #[test]
568    fn batch_parse_all_valid() {
569        // Batch of valid addresses all succeed.
570        let config = Config::default();
571        let inputs = &["a@b.com", "x@y.org", "test+tag@example.com"];
572        let results = EmailAddress::parse_batch(inputs, &config);
573        assert!(results.iter().all(|r| r.is_ok()));
574    }
575
576    #[test]
577    fn batch_parse_all_invalid() {
578        // Batch of invalid addresses all fail.
579        let config = Config::default();
580        let results = EmailAddress::parse_batch(&["", "noatsign", "@missing-local.com"], &config);
581        assert!(results.iter().all(|r| r.is_err()));
582    }
583
584    #[test]
585    fn batch_parse_with_config() {
586        // Batch parsing respects config (e.g., subaddress stripping).
587        let config = Config::builder()
588            .strip_subaddress()
589            .dots_gmail_only()
590            .lowercase_all()
591            .build();
592        let results =
593            EmailAddress::parse_batch(&["A.L.I.C.E+promo@Gmail.COM", "BOB@example.com"], &config);
594        assert_eq!(results.len(), 2);
595        assert_eq!(
596            results[0].as_ref().map(|e| e.canonical()),
597            Ok("alice@gmail.com".to_string())
598        );
599        assert_eq!(
600            results[1].as_ref().map(|e| e.canonical()),
601            Ok("bob@example.com".to_string())
602        );
603    }
604
605    // ── domain_unicode() accessor ──
606
607    #[test]
608    fn domain_unicode_roundtrip() {
609        // IDN domain: input Unicode → domain() punycode → domain_unicode() back to Unicode.
610        let email: EmailAddress = "user@münchen.de".parse().unwrap_or_else(|e| panic!("{e}"));
611        assert_eq!(email.domain(), "xn--mnchen-3ya.de");
612        assert_eq!(email.domain_unicode(), "münchen.de");
613    }
614
615    #[test]
616    fn domain_unicode_ascii_fallback() {
617        // ASCII domain: domain_unicode() returns same as domain().
618        let email: EmailAddress = "user@example.com".parse().unwrap_or_else(|e| panic!("{e}"));
619        assert_eq!(email.domain_unicode(), "example.com");
620        assert_eq!(email.domain_unicode(), email.domain());
621    }
622
623    #[test]
624    fn domain_unicode_mixed_labels() {
625        // Domain with one IDN label and one ASCII label.
626        let email: EmailAddress = "user@über.example.com"
627            .parse()
628            .unwrap_or_else(|e| panic!("{e}"));
629        assert_eq!(email.domain(), "xn--ber-goa.example.com");
630        assert_eq!(email.domain_unicode(), "über.example.com");
631    }
632
633    #[test]
634    fn domain_unicode_japanese() {
635        // Japanese domain roundtrip.
636        let email: EmailAddress = "user@例え.jp".parse().unwrap_or_else(|e| panic!("{e}"));
637        assert!(email.domain().contains("xn--"));
638        assert_eq!(email.domain_unicode(), "例え.jp");
639    }
640
641    #[cfg(feature = "rayon")]
642    #[test]
643    fn batch_par_matches_sequential() {
644        // Parallel variant produces identical results to sequential.
645        let config = Config::builder().strip_subaddress().lowercase_all().build();
646        let inputs = &[
647            "alice@example.com",
648            "invalid",
649            "BOB+tag@Example.ORG",
650            "",
651            "user@test.com",
652        ];
653        let seq = EmailAddress::parse_batch(inputs, &config);
654        let par = EmailAddress::parse_batch_par(inputs, &config);
655        assert_eq!(seq.len(), par.len());
656        for (i, (s, p)) in seq.iter().zip(par.iter()).enumerate() {
657            match (s, p) {
658                (Ok(a), Ok(b)) => assert_eq!(a, b, "result {i} diverges"),
659                (Err(a), Err(b)) => assert_eq!(a, b, "error {i} diverges: {a} vs {b}"),
660                _ => panic!("result {i}: one Ok, one Err"),
661            }
662        }
663    }
664}