Skip to main content

structured_email_address/
lib.rs

1//! # structured-email-address
2//!
3//! RFC 5321/5322/6531 conformant email address parser, validator, and normalizer.
4//!
5//! Unlike existing Rust crates that stop at RFC validation, this crate provides:
6//! - **Subaddress extraction**: `user+tag@domain` → separate `user`, `tag`, `domain`
7//! - **Provider-aware normalization**: Gmail dot-stripping, configurable case folding
8//! - **PSL domain validation**: verify domain against the Public Suffix List
9//! - **Anti-homoglyph protection**: detect Cyrillic/Latin lookalikes via Unicode skeleton
10//! - **Configurable strictness**: Strict (5321), Standard (5322), Lax (obs-* allowed)
11//! - **Zero-copy parsing**: internal spans into the input string
12//!
13//! # Quick Start
14//!
15//! ```
16//! use structured_email_address::{EmailAddress, Config};
17//!
18//! // Simple: parse with defaults
19//! let email: EmailAddress = "user+tag@example.com".parse().unwrap();
20//! assert_eq!(email.local_part(), "user+tag");
21//! assert_eq!(email.tag(), Some("tag"));
22//! assert_eq!(email.domain(), "example.com");
23//!
24//! // Configured: Gmail normalization pipeline
25//! let config = Config::builder()
26//!     .strip_subaddress()
27//!     .dots_gmail_only()
28//!     .lowercase_all()
29//!     .build();
30//!
31//! let email = EmailAddress::parse_with("A.L.I.C.E+promo@Gmail.COM", &config).unwrap();
32//! assert_eq!(email.canonical(), "alice@gmail.com");
33//! assert_eq!(email.tag(), Some("promo"));
34//! ```
35
36#![cfg_attr(
37    not(test),
38    deny(clippy::unwrap_used, clippy::expect_used, clippy::panic)
39)]
40
41mod config;
42mod error;
43mod normalize;
44mod parser;
45mod provider;
46mod validate;
47
48pub use config::{
49    CasePolicy, Config, ConfigBuilder, DomainCheck, DotPolicy, Strictness, SubaddressPolicy,
50};
51pub use error::{Error, ErrorKind};
52pub use normalize::confusable_skeleton;
53pub use provider::{ProviderRegistry, ProviderRule};
54
55/// A parsed, validated, and normalized email address.
56///
57/// Immutable after construction. All accessors return borrowed data.
58#[derive(Debug, Clone)]
59pub struct EmailAddress {
60    /// Original input, exactly as supplied to the parser.
61    original: String,
62    /// Canonical local part (after normalization).
63    local_part: String,
64    /// Extracted subaddress tag, if any.
65    tag: Option<String>,
66    /// Canonical domain (IDNA-encoded, lowercased).
67    domain: String,
68    /// Unicode form of the domain (only when domain has punycode labels).
69    domain_unicode: Option<String>,
70    /// Display name, if parsed from `name-addr` format.
71    display_name: Option<String>,
72    /// Confusable skeleton, if config enabled it.
73    skeleton: Option<String>,
74    /// Whether the domain is a known freemail provider (from the registry).
75    freemail: bool,
76}
77
78impl EmailAddress {
79    /// Parse and validate with the given configuration.
80    pub fn parse_with(input: &str, config: &Config) -> Result<Self, Error> {
81        let parsed = parser::parse(
82            input,
83            config.strictness,
84            config.allow_display_name,
85            config.allow_domain_literal,
86        )?;
87
88        let normalized = normalize::normalize(&parsed, config)?;
89        validate::validate(&parsed, &normalized, config)?;
90
91        // Freemail status comes from the provider registry (built-ins + any
92        // custom rules), independent of provider-aware normalization.
93        let freemail = config
94            .providers
95            .lookup(&normalized.domain)
96            .is_some_and(|p| p.is_freemail());
97
98        Ok(Self {
99            original: parsed.input.to_string(),
100            local_part: normalized.local_part,
101            tag: normalized.tag,
102            domain: normalized.domain,
103            domain_unicode: normalized.domain_unicode,
104            display_name: normalized.display_name,
105            skeleton: normalized.skeleton,
106            freemail,
107        })
108    }
109
110    /// The canonical local part (after normalization).
111    ///
112    /// If subaddress stripping is enabled, this excludes the `+tag`.
113    /// If dot stripping is enabled, dots are removed.
114    pub fn local_part(&self) -> &str {
115        &self.local_part
116    }
117
118    /// The extracted subaddress tag, if present.
119    ///
120    /// For `user+promo@example.com`, returns `Some("promo")`.
121    /// Always extracted regardless of [`SubaddressPolicy`] — the policy only
122    /// affects whether it appears in [`canonical()`](Self::canonical).
123    pub fn tag(&self) -> Option<&str> {
124        self.tag.as_deref()
125    }
126
127    /// The canonical domain (IDNA-encoded, lowercased).
128    pub fn domain(&self) -> &str {
129        &self.domain
130    }
131
132    /// The canonical domain in Unicode form.
133    ///
134    /// For internationalized domains (`münchen.de` → `xn--mnchen-3ya.de`),
135    /// returns the Unicode form of the canonical domain. For ASCII-only
136    /// domains, returns the same value as [`domain()`](Self::domain).
137    ///
138    /// # Security
139    ///
140    /// The Unicode form is intended for **display only**. It may reintroduce
141    /// [IDN homograph attacks](https://en.wikipedia.org/wiki/IDN_homograph_attack)
142    /// where visually similar characters from different scripts produce
143    /// different domain names (e.g. Cyrillic `а` vs Latin `a`).
144    ///
145    /// For security-sensitive comparisons (allow-lists, deduplication, access
146    /// control), always use [`domain()`](Self::domain) which returns the
147    /// ACE/Punycode form. If you must compare Unicode domains, apply your own
148    /// confusable-detection logic (see [`confusable_skeleton()`]).
149    ///
150    /// ```
151    /// use structured_email_address::EmailAddress;
152    ///
153    /// let email: EmailAddress = "user@münchen.de".parse().unwrap();
154    /// assert_eq!(email.domain(), "xn--mnchen-3ya.de");
155    /// assert_eq!(email.domain_unicode(), "münchen.de");
156    ///
157    /// let ascii: EmailAddress = "user@example.com".parse().unwrap();
158    /// assert_eq!(ascii.domain_unicode(), "example.com");
159    /// ```
160    pub fn domain_unicode(&self) -> &str {
161        self.domain_unicode.as_deref().unwrap_or(&self.domain)
162    }
163
164    /// The display name, if parsed from `"Name" <addr>` or `Name <addr>` format.
165    pub fn display_name(&self) -> Option<&str> {
166        self.display_name.as_deref()
167    }
168
169    /// The full canonical address: `local_part@domain`.
170    ///
171    /// If the local part contains characters that require quoting (spaces,
172    /// special chars), it is wrapped in quotes for RFC compliance.
173    pub fn canonical(&self) -> String {
174        if needs_quoting(&self.local_part) {
175            let escaped = escape_local_part(&self.local_part);
176            format!("\"{}\"@{}", escaped, self.domain)
177        } else {
178            format!("{}@{}", self.local_part, self.domain)
179        }
180    }
181
182    /// The original input, exactly as supplied to the parser (not trimmed).
183    pub fn original(&self) -> &str {
184        &self.original
185    }
186
187    /// The confusable skeleton of the local part (if config enabled it).
188    ///
189    /// Two addresses with the same skeleton + domain are visually confusable.
190    pub fn skeleton(&self) -> Option<&str> {
191        self.skeleton.as_deref()
192    }
193
194    /// Check if the domain is a known freemail provider.
195    ///
196    /// Determined from the [`ProviderRegistry`] in the [`Config`] used to parse
197    /// (built-in providers plus any registered via
198    /// [`ConfigBuilder::add_provider`]).
199    pub fn is_freemail(&self) -> bool {
200        self.freemail
201    }
202
203    /// Parse a batch of email addresses with the given configuration.
204    ///
205    /// Returns one `Result` per input, in the same order. The config is
206    /// shared across all inputs, amortizing setup cost.
207    ///
208    /// # Example
209    ///
210    /// ```
211    /// use structured_email_address::{EmailAddress, Config};
212    ///
213    /// let config = Config::default();
214    /// let results = EmailAddress::parse_batch(
215    ///     &["alice@example.com", "invalid", "bob@example.org"],
216    ///     &config,
217    /// );
218    /// assert!(results[0].is_ok());
219    /// assert!(results[1].is_err());
220    /// assert!(results[2].is_ok());
221    /// ```
222    pub fn parse_batch(inputs: &[&str], config: &Config) -> Vec<Result<Self, Error>> {
223        inputs
224            .iter()
225            .map(|input| Self::parse_with(input, config))
226            .collect()
227    }
228
229    /// Parse a batch of email addresses in parallel using rayon.
230    ///
231    /// Same semantics as [`parse_batch`](Self::parse_batch), but distributes
232    /// work across rayon's thread pool. Useful for bulk import/validation of
233    /// large lists (10K+ addresses).
234    ///
235    /// Requires the `rayon` feature.
236    ///
237    /// # Example
238    ///
239    /// ```
240    /// use structured_email_address::{EmailAddress, Config};
241    ///
242    /// let config = Config::default();
243    /// let results = EmailAddress::parse_batch_par(
244    ///     &["alice@example.com", "bob@example.org"],
245    ///     &config,
246    /// );
247    /// assert!(results.iter().all(|r| r.is_ok()));
248    /// ```
249    #[cfg(feature = "rayon")]
250    pub fn parse_batch_par(inputs: &[&str], config: &Config) -> Vec<Result<Self, Error>> {
251        use rayon::prelude::*;
252
253        inputs
254            .par_iter()
255            .map(|input| Self::parse_with(input, config))
256            .collect()
257    }
258}
259
260impl std::fmt::Display for EmailAddress {
261    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
262        let local = if needs_quoting(&self.local_part) {
263            format!("\"{}\"", escape_local_part(&self.local_part))
264        } else {
265            self.local_part.clone()
266        };
267        match &self.display_name {
268            Some(name) => write!(
269                f,
270                "\"{}\" <{}@{}>",
271                escape_display_name(name),
272                local,
273                self.domain
274            ),
275            None => write!(f, "{}@{}", local, self.domain),
276        }
277    }
278}
279
280/// Check if a local-part needs quoting for RFC 5321/5322 serialization.
281/// Returns true if the local part contains characters outside of atext.
282fn needs_quoting(local: &str) -> bool {
283    if local.is_empty() {
284        return true;
285    }
286    // Dots are only safe in valid dot-atom form (no leading/trailing/consecutive dots).
287    if local.starts_with('.') || local.ends_with('.') || local.contains("..") {
288        return true;
289    }
290    local.chars().any(|ch| {
291        !ch.is_ascii_alphanumeric()
292            && !matches!(
293                ch,
294                '!' | '#'
295                    | '$'
296                    | '%'
297                    | '&'
298                    | '\''
299                    | '*'
300                    | '+'
301                    | '-'
302                    | '/'
303                    | '='
304                    | '?'
305                    | '^'
306                    | '_'
307                    | '`'
308                    | '{'
309                    | '|'
310                    | '}'
311                    | '~'
312                    | '.'
313            )
314            && (ch as u32) < 0x80 // non-ASCII doesn't need quoting per RFC 6531
315    })
316}
317
318/// Escape a local-part for use inside quotes: backslash-escape `"` and `\`,
319/// strip CR/LF to prevent header injection (FWS is collapsed during normalization).
320fn escape_local_part(local: &str) -> String {
321    let mut escaped = String::with_capacity(local.len());
322    for ch in local.chars() {
323        match ch {
324            '"' | '\\' => {
325                escaped.push('\\');
326                escaped.push(ch);
327            }
328            '\r' | '\n' => {} // strip CRLF to prevent header injection
329            _ => escaped.push(ch),
330        }
331    }
332    escaped
333}
334
335/// Backslash-escapes `"` and `\`, and strips bare CR/LF to prevent
336/// header injection in serialized output.
337fn escape_display_name(name: &str) -> String {
338    let mut escaped = String::with_capacity(name.len());
339    for ch in name.chars() {
340        match ch {
341            '"' => {
342                escaped.push('\\');
343                escaped.push('"');
344            }
345            '\\' => {
346                escaped.push('\\');
347                escaped.push('\\');
348            }
349            '\r' | '\n' => {} // strip CRLF
350            _ => escaped.push(ch),
351        }
352    }
353    escaped
354}
355
356/// Equality is based on canonical form (`local_part` + `domain`) only.
357/// Display name, tag, and skeleton are intentionally excluded —
358/// `"John" <user@example.com>` equals `"Jane" <user@example.com>`
359/// because they route to the same mailbox.
360impl PartialEq for EmailAddress {
361    fn eq(&self, other: &Self) -> bool {
362        self.local_part == other.local_part && self.domain == other.domain
363    }
364}
365
366impl Eq for EmailAddress {}
367
368impl std::hash::Hash for EmailAddress {
369    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
370        self.local_part.hash(state);
371        self.domain.hash(state);
372    }
373}
374
375impl std::str::FromStr for EmailAddress {
376    type Err = Error;
377
378    fn from_str(s: &str) -> Result<Self, Self::Err> {
379        Self::parse_with(s, &Config::default())
380    }
381}
382
383#[cfg(feature = "serde")]
384impl serde::Serialize for EmailAddress {
385    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
386        self.canonical().serialize(serializer)
387    }
388}
389
390#[cfg(feature = "serde")]
391impl<'de> serde::Deserialize<'de> for EmailAddress {
392    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
393        let s = String::deserialize(deserializer)?;
394        s.parse().map_err(serde::de::Error::custom)
395    }
396}
397
398#[cfg(test)]
399mod tests;