Skip to main content

structured_email_address/
lib.rs

1//! # structured-email-address
2//!
3//! RFC 5321/5322/6531 conformant email address parser, validator, and normalizer.
4//!
5//! Unlike existing Rust crates that stop at RFC validation, this crate provides:
6//! - **Subaddress extraction**: `user+tag@domain` → separate `user`, `tag`, `domain`
7//! - **Provider-aware normalization**: Gmail dot-stripping, configurable case folding
8//! - **PSL domain validation**: verify domain against the Public Suffix List
9//! - **Anti-homoglyph protection**: detect Cyrillic/Latin lookalikes via Unicode skeleton
10//! - **Configurable strictness**: Strict (5321), Standard (5322), Lax (obs-* allowed)
11//! - **Zero-copy parsing**: internal spans into the input string
12//!
13//! # Quick Start
14//!
15//! ```
16//! use structured_email_address::{EmailAddress, Config};
17//!
18//! // Simple: parse with defaults
19//! let email: EmailAddress = "user+tag@example.com".parse().unwrap();
20//! assert_eq!(email.local_part(), "user+tag");
21//! assert_eq!(email.tag(), Some("tag"));
22//! assert_eq!(email.domain(), "example.com");
23//!
24//! // Configured: Gmail normalization pipeline
25//! let config = Config::builder()
26//!     .strip_subaddress()
27//!     .dots_gmail_only()
28//!     .lowercase_all()
29//!     .build();
30//!
31//! let email = EmailAddress::parse_with("A.L.I.C.E+promo@Gmail.COM", &config).unwrap();
32//! assert_eq!(email.canonical(), "alice@gmail.com");
33//! assert_eq!(email.tag(), Some("promo"));
34//! ```
35
36#![cfg_attr(
37    not(test),
38    deny(clippy::unwrap_used, clippy::expect_used, clippy::panic)
39)]
40
41mod config;
42mod error;
43mod normalize;
44mod parser;
45mod validate;
46
47pub use config::{
48    CasePolicy, Config, ConfigBuilder, DomainCheck, DotPolicy, Strictness, SubaddressPolicy,
49};
50pub use error::{Error, ErrorKind};
51pub use normalize::confusable_skeleton;
52
53/// A parsed, validated, and normalized email address.
54///
55/// Immutable after construction. All accessors return borrowed data.
56#[derive(Debug, Clone)]
57pub struct EmailAddress {
58    /// Original input (trimmed).
59    original: String,
60    /// Canonical local part (after normalization).
61    local_part: String,
62    /// Extracted subaddress tag, if any.
63    tag: Option<String>,
64    /// Canonical domain (IDNA-encoded, lowercased).
65    domain: String,
66    /// Unicode form of the domain (only when domain has punycode labels).
67    domain_unicode: Option<String>,
68    /// Display name, if parsed from `name-addr` format.
69    display_name: Option<String>,
70    /// Confusable skeleton, if config enabled it.
71    skeleton: Option<String>,
72}
73
74impl EmailAddress {
75    /// Parse and validate with the given configuration.
76    pub fn parse_with(input: &str, config: &Config) -> Result<Self, Error> {
77        let parsed = parser::parse(
78            input,
79            config.strictness,
80            config.allow_display_name,
81            config.allow_domain_literal,
82        )?;
83
84        let normalized = normalize::normalize(&parsed, config)?;
85        validate::validate(&parsed, &normalized, config)?;
86
87        Ok(Self {
88            original: parsed.input.to_string(),
89            local_part: normalized.local_part,
90            tag: normalized.tag,
91            domain: normalized.domain,
92            domain_unicode: normalized.domain_unicode,
93            display_name: normalized.display_name,
94            skeleton: normalized.skeleton,
95        })
96    }
97
98    /// The canonical local part (after normalization).
99    ///
100    /// If subaddress stripping is enabled, this excludes the `+tag`.
101    /// If dot stripping is enabled, dots are removed.
102    pub fn local_part(&self) -> &str {
103        &self.local_part
104    }
105
106    /// The extracted subaddress tag, if present.
107    ///
108    /// For `user+promo@example.com`, returns `Some("promo")`.
109    /// Always extracted regardless of [`SubaddressPolicy`] — the policy only
110    /// affects whether it appears in [`canonical()`](Self::canonical).
111    pub fn tag(&self) -> Option<&str> {
112        self.tag.as_deref()
113    }
114
115    /// The canonical domain (IDNA-encoded, lowercased).
116    pub fn domain(&self) -> &str {
117        &self.domain
118    }
119
120    /// The canonical domain in Unicode form.
121    ///
122    /// For internationalized domains (`münchen.de` → `xn--mnchen-3ya.de`),
123    /// returns the Unicode form of the canonical domain. For ASCII-only
124    /// domains, returns the same value as [`domain()`](Self::domain).
125    ///
126    /// ```
127    /// use structured_email_address::EmailAddress;
128    ///
129    /// let email: EmailAddress = "user@münchen.de".parse().unwrap();
130    /// assert_eq!(email.domain(), "xn--mnchen-3ya.de");
131    /// assert_eq!(email.domain_unicode(), "münchen.de");
132    ///
133    /// let ascii: EmailAddress = "user@example.com".parse().unwrap();
134    /// assert_eq!(ascii.domain_unicode(), "example.com");
135    /// ```
136    pub fn domain_unicode(&self) -> &str {
137        self.domain_unicode.as_deref().unwrap_or(&self.domain)
138    }
139
140    /// The display name, if parsed from `"Name" <addr>` or `Name <addr>` format.
141    pub fn display_name(&self) -> Option<&str> {
142        self.display_name.as_deref()
143    }
144
145    /// The full canonical address: `local_part@domain`.
146    ///
147    /// If the local part contains characters that require quoting (spaces,
148    /// special chars), it is wrapped in quotes for RFC compliance.
149    pub fn canonical(&self) -> String {
150        if needs_quoting(&self.local_part) {
151            let escaped = escape_local_part(&self.local_part);
152            format!("\"{}\"@{}", escaped, self.domain)
153        } else {
154            format!("{}@{}", self.local_part, self.domain)
155        }
156    }
157
158    /// The original input (trimmed).
159    pub fn original(&self) -> &str {
160        &self.original
161    }
162
163    /// The confusable skeleton of the local part (if config enabled it).
164    ///
165    /// Two addresses with the same skeleton + domain are visually confusable.
166    pub fn skeleton(&self) -> Option<&str> {
167        self.skeleton.as_deref()
168    }
169
170    /// Check if the domain is a well-known freemail provider.
171    pub fn is_freemail(&self) -> bool {
172        is_freemail_domain(&self.domain)
173    }
174
175    /// Parse a batch of email addresses with the given configuration.
176    ///
177    /// Returns one `Result` per input, in the same order. The config is
178    /// shared across all inputs, amortizing setup cost.
179    ///
180    /// # Example
181    ///
182    /// ```
183    /// use structured_email_address::{EmailAddress, Config};
184    ///
185    /// let config = Config::default();
186    /// let results = EmailAddress::parse_batch(
187    ///     &["alice@example.com", "invalid", "bob@example.org"],
188    ///     &config,
189    /// );
190    /// assert!(results[0].is_ok());
191    /// assert!(results[1].is_err());
192    /// assert!(results[2].is_ok());
193    /// ```
194    pub fn parse_batch(inputs: &[&str], config: &Config) -> Vec<Result<Self, Error>> {
195        inputs
196            .iter()
197            .map(|input| Self::parse_with(input, config))
198            .collect()
199    }
200
201    /// Parse a batch of email addresses in parallel using rayon.
202    ///
203    /// Same semantics as [`parse_batch`](Self::parse_batch), but distributes
204    /// work across rayon's thread pool. Useful for bulk import/validation of
205    /// large lists (10K+ addresses).
206    ///
207    /// Requires the `rayon` feature.
208    ///
209    /// # Example
210    ///
211    /// ```
212    /// use structured_email_address::{EmailAddress, Config};
213    ///
214    /// let config = Config::default();
215    /// let results = EmailAddress::parse_batch_par(
216    ///     &["alice@example.com", "bob@example.org"],
217    ///     &config,
218    /// );
219    /// assert!(results.iter().all(|r| r.is_ok()));
220    /// ```
221    #[cfg(feature = "rayon")]
222    pub fn parse_batch_par(inputs: &[&str], config: &Config) -> Vec<Result<Self, Error>> {
223        use rayon::prelude::*;
224
225        inputs
226            .par_iter()
227            .map(|input| Self::parse_with(input, config))
228            .collect()
229    }
230}
231
232impl std::fmt::Display for EmailAddress {
233    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
234        let local = if needs_quoting(&self.local_part) {
235            format!("\"{}\"", escape_local_part(&self.local_part))
236        } else {
237            self.local_part.clone()
238        };
239        match &self.display_name {
240            Some(name) => write!(
241                f,
242                "\"{}\" <{}@{}>",
243                escape_display_name(name),
244                local,
245                self.domain
246            ),
247            None => write!(f, "{}@{}", local, self.domain),
248        }
249    }
250}
251
252/// Check if a local-part needs quoting for RFC 5321/5322 serialization.
253/// Returns true if the local part contains characters outside of atext.
254fn needs_quoting(local: &str) -> bool {
255    if local.is_empty() {
256        return true;
257    }
258    // Dots are only safe in valid dot-atom form (no leading/trailing/consecutive dots).
259    if local.starts_with('.') || local.ends_with('.') || local.contains("..") {
260        return true;
261    }
262    local.chars().any(|ch| {
263        !ch.is_ascii_alphanumeric()
264            && !matches!(
265                ch,
266                '!' | '#'
267                    | '$'
268                    | '%'
269                    | '&'
270                    | '\''
271                    | '*'
272                    | '+'
273                    | '-'
274                    | '/'
275                    | '='
276                    | '?'
277                    | '^'
278                    | '_'
279                    | '`'
280                    | '{'
281                    | '|'
282                    | '}'
283                    | '~'
284                    | '.'
285            )
286            && (ch as u32) < 0x80 // non-ASCII doesn't need quoting per RFC 6531
287    })
288}
289
290/// Escape a local-part for use inside quotes: backslash-escape `"` and `\`,
291/// strip CR/LF to prevent header injection (FWS is collapsed during normalization).
292fn escape_local_part(local: &str) -> String {
293    let mut escaped = String::with_capacity(local.len());
294    for ch in local.chars() {
295        match ch {
296            '"' | '\\' => {
297                escaped.push('\\');
298                escaped.push(ch);
299            }
300            '\r' | '\n' => {} // strip CRLF to prevent header injection
301            _ => escaped.push(ch),
302        }
303    }
304    escaped
305}
306
307/// Backslash-escapes `"` and `\`, and strips bare CR/LF to prevent
308/// header injection in serialized output.
309fn escape_display_name(name: &str) -> String {
310    let mut escaped = String::with_capacity(name.len());
311    for ch in name.chars() {
312        match ch {
313            '"' => {
314                escaped.push('\\');
315                escaped.push('"');
316            }
317            '\\' => {
318                escaped.push('\\');
319                escaped.push('\\');
320            }
321            '\r' | '\n' => {} // strip CRLF
322            _ => escaped.push(ch),
323        }
324    }
325    escaped
326}
327
328/// Equality is based on canonical form (`local_part` + `domain`) only.
329/// Display name, tag, and skeleton are intentionally excluded —
330/// `"John" <user@example.com>` equals `"Jane" <user@example.com>`
331/// because they route to the same mailbox.
332impl PartialEq for EmailAddress {
333    fn eq(&self, other: &Self) -> bool {
334        self.local_part == other.local_part && self.domain == other.domain
335    }
336}
337
338impl Eq for EmailAddress {}
339
340impl std::hash::Hash for EmailAddress {
341    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
342        self.local_part.hash(state);
343        self.domain.hash(state);
344    }
345}
346
347impl std::str::FromStr for EmailAddress {
348    type Err = Error;
349
350    fn from_str(s: &str) -> Result<Self, Self::Err> {
351        Self::parse_with(s, &Config::default())
352    }
353}
354
355#[cfg(feature = "serde")]
356impl serde::Serialize for EmailAddress {
357    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
358        self.canonical().serialize(serializer)
359    }
360}
361
362#[cfg(feature = "serde")]
363impl<'de> serde::Deserialize<'de> for EmailAddress {
364    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
365        let s = String::deserialize(deserializer)?;
366        s.parse().map_err(serde::de::Error::custom)
367    }
368}
369
370/// Check if a domain is a well-known freemail provider.
371fn is_freemail_domain(domain: &str) -> bool {
372    matches!(
373        domain,
374        "gmail.com"
375            | "googlemail.com"
376            | "yahoo.com"
377            | "yahoo.co.uk"
378            | "yahoo.co.jp"
379            | "outlook.com"
380            | "hotmail.com"
381            | "live.com"
382            | "msn.com"
383            | "aol.com"
384            | "protonmail.com"
385            | "proton.me"
386            | "icloud.com"
387            | "me.com"
388            | "mac.com"
389            | "mail.com"
390            | "zoho.com"
391            | "yandex.ru"
392            | "yandex.com"
393            | "mail.ru"
394            | "gmx.com"
395            | "gmx.de"
396            | "web.de"
397            | "tutanota.com"
398            | "tuta.io"
399            | "fastmail.com"
400    )
401}
402
403#[cfg(test)]
404mod tests {
405    use super::*;
406
407    // ── FromStr (default config) ──
408
409    #[test]
410    fn parse_simple() {
411        let email: EmailAddress = "user@example.com".parse().unwrap_or_else(|e| panic!("{e}"));
412        assert_eq!(email.local_part(), "user");
413        assert_eq!(email.domain(), "example.com");
414        assert_eq!(email.tag(), None);
415        assert_eq!(email.canonical(), "user@example.com");
416    }
417
418    #[test]
419    fn parse_with_tag() {
420        let email: EmailAddress = "user+newsletter@example.com"
421            .parse()
422            .unwrap_or_else(|e| panic!("{e}"));
423        assert_eq!(email.local_part(), "user+newsletter");
424        assert_eq!(email.tag(), Some("newsletter"));
425    }
426
427    #[test]
428    fn display_format() {
429        let email: EmailAddress = "user@example.com".parse().unwrap_or_else(|e| panic!("{e}"));
430        assert_eq!(format!("{email}"), "user@example.com");
431    }
432
433    #[test]
434    fn display_name_escaping() {
435        let config = Config::builder().allow_display_name().build();
436        // Display name with quotes should be escaped
437        let email = EmailAddress::parse_with("John \"Johnny\" Doe <user@example.com>", &config)
438            .unwrap_or_else(|e| panic!("{e}"));
439        let formatted = format!("{email}");
440        assert!(
441            formatted.contains("\\\"Johnny\\\""),
442            "Expected escaped quotes in: {formatted}"
443        );
444    }
445
446    #[test]
447    fn equality_by_canonical() {
448        let a: EmailAddress = "user@example.com".parse().unwrap_or_else(|e| panic!("{e}"));
449        let b: EmailAddress = "user@Example.COM".parse().unwrap_or_else(|e| panic!("{e}"));
450        // Default config: domain-only lowercase, so local parts same case → equal
451        assert_eq!(a, b);
452    }
453
454    #[test]
455    fn freemail_detection() {
456        let email: EmailAddress = "user@gmail.com".parse().unwrap_or_else(|e| panic!("{e}"));
457        assert!(email.is_freemail());
458
459        let email: EmailAddress = "user@company.com".parse().unwrap_or_else(|e| panic!("{e}"));
460        assert!(!email.is_freemail());
461    }
462
463    // ── Configured parsing ──
464
465    #[test]
466    fn full_normalization_pipeline() {
467        let config = Config::builder()
468            .strip_subaddress()
469            .dots_gmail_only()
470            .lowercase_all()
471            .check_confusables()
472            .build();
473
474        let email = EmailAddress::parse_with("A.L.I.C.E+promo@Gmail.COM", &config)
475            .unwrap_or_else(|e| panic!("{e}"));
476        assert_eq!(email.canonical(), "alice@gmail.com");
477        assert_eq!(email.tag(), Some("promo"));
478        assert!(email.skeleton().is_some());
479    }
480
481    #[test]
482    fn display_name_parsing() {
483        let config = Config::builder().allow_display_name().build();
484
485        let email = EmailAddress::parse_with("John Doe <user@example.com>", &config)
486            .unwrap_or_else(|e| panic!("{e}"));
487        assert_eq!(email.display_name(), Some("John Doe"));
488        assert_eq!(email.local_part(), "user");
489        assert_eq!(email.domain(), "example.com");
490    }
491
492    // ── Serde ──
493
494    #[cfg(feature = "serde")]
495    #[test]
496    fn serde_roundtrip() {
497        let email: EmailAddress = "user@example.com".parse().unwrap_or_else(|e| panic!("{e}"));
498        let json = serde_json::to_string(&email).unwrap_or_else(|e| panic!("{e}"));
499        assert_eq!(json, "\"user@example.com\"");
500
501        let back: EmailAddress = serde_json::from_str(&json).unwrap_or_else(|e| panic!("{e}"));
502        assert_eq!(email, back);
503    }
504
505    // ── Validation errors ──
506
507    #[test]
508    fn rejects_empty() {
509        let result: Result<EmailAddress, _> = "".parse();
510        assert!(result.is_err());
511    }
512
513    #[test]
514    fn rejects_no_domain_dot() {
515        let result: Result<EmailAddress, _> = "user@localhost".parse();
516        assert!(result.is_err());
517        assert!(matches!(result.unwrap_err().kind(), ErrorKind::DomainNoDot));
518    }
519
520    #[test]
521    fn allows_single_label_when_configured() {
522        let config = Config::builder().allow_single_label_domain().build();
523        let email =
524            EmailAddress::parse_with("user@localhost", &config).unwrap_or_else(|e| panic!("{e}"));
525        assert_eq!(email.domain(), "localhost");
526    }
527
528    // ── Batch parsing ──
529
530    #[test]
531    fn batch_parse_mixed_results() {
532        // Verifies that parse_batch returns Ok for valid and Err for invalid
533        // inputs, preserving input order.
534        let config = Config::default();
535        let results = EmailAddress::parse_batch(
536            &["alice@example.com", "invalid", "bob@example.org"],
537            &config,
538        );
539        assert_eq!(results.len(), 3);
540        assert!(results[0].is_ok());
541        assert!(results[1].is_err());
542        assert!(results[2].is_ok());
543        assert_eq!(results[0].as_ref().map(|e| e.domain()), Ok("example.com"));
544        assert_eq!(results[2].as_ref().map(|e| e.domain()), Ok("example.org"));
545    }
546
547    #[test]
548    fn batch_parse_empty_input() {
549        // Empty slice returns empty vec.
550        let config = Config::default();
551        let results = EmailAddress::parse_batch(&[], &config);
552        assert!(results.is_empty());
553    }
554
555    #[test]
556    fn batch_parse_all_valid() {
557        // Batch of valid addresses all succeed.
558        let config = Config::default();
559        let inputs = &["a@b.com", "x@y.org", "test+tag@example.com"];
560        let results = EmailAddress::parse_batch(inputs, &config);
561        assert!(results.iter().all(|r| r.is_ok()));
562    }
563
564    #[test]
565    fn batch_parse_all_invalid() {
566        // Batch of invalid addresses all fail.
567        let config = Config::default();
568        let results = EmailAddress::parse_batch(&["", "noatsign", "@missing-local.com"], &config);
569        assert!(results.iter().all(|r| r.is_err()));
570    }
571
572    #[test]
573    fn batch_parse_with_config() {
574        // Batch parsing respects config (e.g., subaddress stripping).
575        let config = Config::builder()
576            .strip_subaddress()
577            .dots_gmail_only()
578            .lowercase_all()
579            .build();
580        let results =
581            EmailAddress::parse_batch(&["A.L.I.C.E+promo@Gmail.COM", "BOB@example.com"], &config);
582        assert_eq!(results.len(), 2);
583        assert_eq!(
584            results[0].as_ref().map(|e| e.canonical()),
585            Ok("alice@gmail.com".to_string())
586        );
587        assert_eq!(
588            results[1].as_ref().map(|e| e.canonical()),
589            Ok("bob@example.com".to_string())
590        );
591    }
592
593    // ── domain_unicode() accessor ──
594
595    #[test]
596    fn domain_unicode_roundtrip() {
597        // IDN domain: input Unicode → domain() punycode → domain_unicode() back to Unicode.
598        let email: EmailAddress = "user@münchen.de".parse().unwrap_or_else(|e| panic!("{e}"));
599        assert_eq!(email.domain(), "xn--mnchen-3ya.de");
600        assert_eq!(email.domain_unicode(), "münchen.de");
601    }
602
603    #[test]
604    fn domain_unicode_ascii_fallback() {
605        // ASCII domain: domain_unicode() returns same as domain().
606        let email: EmailAddress = "user@example.com".parse().unwrap_or_else(|e| panic!("{e}"));
607        assert_eq!(email.domain_unicode(), "example.com");
608        assert_eq!(email.domain_unicode(), email.domain());
609    }
610
611    #[test]
612    fn domain_unicode_mixed_labels() {
613        // Domain with one IDN label and one ASCII label.
614        let email: EmailAddress = "user@über.example.com"
615            .parse()
616            .unwrap_or_else(|e| panic!("{e}"));
617        assert_eq!(email.domain(), "xn--ber-goa.example.com");
618        assert_eq!(email.domain_unicode(), "über.example.com");
619    }
620
621    #[test]
622    fn domain_unicode_japanese() {
623        // Japanese domain roundtrip.
624        let email: EmailAddress = "user@例え.jp".parse().unwrap_or_else(|e| panic!("{e}"));
625        assert!(email.domain().contains("xn--"));
626        assert_eq!(email.domain_unicode(), "例え.jp");
627    }
628
629    #[cfg(feature = "rayon")]
630    #[test]
631    fn batch_par_matches_sequential() {
632        // Parallel variant produces identical results to sequential.
633        let config = Config::builder().strip_subaddress().lowercase_all().build();
634        let inputs = &[
635            "alice@example.com",
636            "invalid",
637            "BOB+tag@Example.ORG",
638            "",
639            "user@test.com",
640        ];
641        let seq = EmailAddress::parse_batch(inputs, &config);
642        let par = EmailAddress::parse_batch_par(inputs, &config);
643        assert_eq!(seq.len(), par.len());
644        for (i, (s, p)) in seq.iter().zip(par.iter()).enumerate() {
645            match (s, p) {
646                (Ok(a), Ok(b)) => assert_eq!(a, b, "result {i} diverges"),
647                (Err(a), Err(b)) => assert_eq!(a, b, "error {i} diverges: {a} vs {b}"),
648                _ => panic!("result {i}: one Ok, one Err"),
649            }
650        }
651    }
652}