structured_email_address/lib.rs
1//! # structured-email-address
2//!
3//! RFC 5321/5322/6531 conformant email address parser, validator, and normalizer.
4//!
5//! Unlike existing Rust crates that stop at RFC validation, this crate provides:
6//! - **Subaddress extraction**: `user+tag@domain` → separate `user`, `tag`, `domain`
7//! - **Provider-aware normalization**: Gmail dot-stripping, configurable case folding
8//! - **PSL domain validation**: verify domain against the Public Suffix List
9//! - **Anti-homoglyph protection**: detect Cyrillic/Latin lookalikes via Unicode skeleton
10//! - **Configurable strictness**: Strict (5321), Standard (5322), Lax (obs-* allowed)
11//! - **Zero-copy parsing**: internal spans into the input string
12//!
13//! # Quick Start
14//!
15//! ```
16//! use structured_email_address::{EmailAddress, Config};
17//!
18//! // Simple: parse with defaults
19//! let email: EmailAddress = "user+tag@example.com".parse().unwrap();
20//! assert_eq!(email.local_part(), "user+tag");
21//! assert_eq!(email.tag(), Some("tag"));
22//! assert_eq!(email.domain(), "example.com");
23//!
24//! // Configured: Gmail normalization pipeline
25//! let config = Config::builder()
26//! .strip_subaddress()
27//! .dots_gmail_only()
28//! .lowercase_all()
29//! .build();
30//!
31//! let email = EmailAddress::parse_with("A.L.I.C.E+promo@Gmail.COM", &config).unwrap();
32//! assert_eq!(email.canonical(), "alice@gmail.com");
33//! assert_eq!(email.tag(), Some("promo"));
34//! ```
35
36#![cfg_attr(
37 not(test),
38 deny(clippy::unwrap_used, clippy::expect_used, clippy::panic)
39)]
40
41mod config;
42mod error;
43mod normalize;
44mod parser;
45mod provider;
46mod validate;
47
48pub use config::{
49 CasePolicy, Config, ConfigBuilder, DomainCheck, DotPolicy, Strictness, SubaddressPolicy,
50};
51pub use error::{Error, ErrorKind};
52pub use normalize::confusable_skeleton;
53pub use provider::{ProviderRegistry, ProviderRule};
54
55/// A parsed, validated, and normalized email address.
56///
57/// Immutable after construction. All accessors return borrowed data.
58#[derive(Debug, Clone)]
59pub struct EmailAddress {
60 /// Original input, exactly as supplied to the parser.
61 original: String,
62 /// Canonical local part (after normalization).
63 local_part: String,
64 /// Extracted subaddress tag, if any.
65 tag: Option<String>,
66 /// Canonical domain (IDNA-encoded, lowercased).
67 domain: String,
68 /// Unicode form of the domain (only when domain has punycode labels).
69 domain_unicode: Option<String>,
70 /// Display name, if parsed from `name-addr` format.
71 display_name: Option<String>,
72 /// Confusable skeleton, if config enabled it.
73 skeleton: Option<String>,
74 /// Whether the domain is a known freemail provider (from the registry).
75 freemail: bool,
76}
77
78impl EmailAddress {
79 /// Parse and validate with the given configuration.
80 pub fn parse_with(input: &str, config: &Config) -> Result<Self, Error> {
81 let parsed = parser::parse(
82 input,
83 config.strictness,
84 config.allow_display_name,
85 config.allow_domain_literal,
86 )?;
87
88 let normalized = normalize::normalize(&parsed, config)?;
89 validate::validate(&parsed, &normalized, config)?;
90
91 // Freemail status comes from the provider registry (built-ins + any
92 // custom rules), independent of provider-aware normalization.
93 let freemail = config
94 .providers
95 .lookup(&normalized.domain)
96 .is_some_and(|p| p.is_freemail());
97
98 Ok(Self {
99 original: parsed.input.to_string(),
100 local_part: normalized.local_part,
101 tag: normalized.tag,
102 domain: normalized.domain,
103 domain_unicode: normalized.domain_unicode,
104 display_name: normalized.display_name,
105 skeleton: normalized.skeleton,
106 freemail,
107 })
108 }
109
110 /// The canonical local part (after normalization).
111 ///
112 /// If subaddress stripping is enabled, this excludes the `+tag`.
113 /// If dot stripping is enabled, dots are removed.
114 pub fn local_part(&self) -> &str {
115 &self.local_part
116 }
117
118 /// The extracted subaddress tag, if present.
119 ///
120 /// For `user+promo@example.com`, returns `Some("promo")`.
121 /// Always extracted regardless of [`SubaddressPolicy`] — the policy only
122 /// affects whether it appears in [`canonical()`](Self::canonical).
123 pub fn tag(&self) -> Option<&str> {
124 self.tag.as_deref()
125 }
126
127 /// The canonical domain (IDNA-encoded, lowercased).
128 pub fn domain(&self) -> &str {
129 &self.domain
130 }
131
132 /// The canonical domain in Unicode form.
133 ///
134 /// For internationalized domains (`münchen.de` → `xn--mnchen-3ya.de`),
135 /// returns the Unicode form of the canonical domain. For ASCII-only
136 /// domains, returns the same value as [`domain()`](Self::domain).
137 ///
138 /// # Security
139 ///
140 /// The Unicode form is intended for **display only**. It may reintroduce
141 /// [IDN homograph attacks](https://en.wikipedia.org/wiki/IDN_homograph_attack)
142 /// where visually similar characters from different scripts produce
143 /// different domain names (e.g. Cyrillic `а` vs Latin `a`).
144 ///
145 /// For security-sensitive comparisons (allow-lists, deduplication, access
146 /// control), always use [`domain()`](Self::domain) which returns the
147 /// ACE/Punycode form. If you must compare Unicode domains, apply your own
148 /// confusable-detection logic (see [`confusable_skeleton()`]).
149 ///
150 /// ```
151 /// use structured_email_address::EmailAddress;
152 ///
153 /// let email: EmailAddress = "user@münchen.de".parse().unwrap();
154 /// assert_eq!(email.domain(), "xn--mnchen-3ya.de");
155 /// assert_eq!(email.domain_unicode(), "münchen.de");
156 ///
157 /// let ascii: EmailAddress = "user@example.com".parse().unwrap();
158 /// assert_eq!(ascii.domain_unicode(), "example.com");
159 /// ```
160 pub fn domain_unicode(&self) -> &str {
161 self.domain_unicode.as_deref().unwrap_or(&self.domain)
162 }
163
164 /// The display name, if parsed from `"Name" <addr>` or `Name <addr>` format.
165 pub fn display_name(&self) -> Option<&str> {
166 self.display_name.as_deref()
167 }
168
169 /// The full canonical address: `local_part@domain`.
170 ///
171 /// If the local part contains characters that require quoting (spaces,
172 /// special chars), it is wrapped in quotes for RFC compliance.
173 pub fn canonical(&self) -> String {
174 if needs_quoting(&self.local_part) {
175 let escaped = escape_local_part(&self.local_part);
176 format!("\"{}\"@{}", escaped, self.domain)
177 } else {
178 format!("{}@{}", self.local_part, self.domain)
179 }
180 }
181
182 /// The original input, exactly as supplied to the parser (not trimmed).
183 pub fn original(&self) -> &str {
184 &self.original
185 }
186
187 /// The confusable skeleton of the local part (if config enabled it).
188 ///
189 /// Two addresses with the same skeleton + domain are visually confusable.
190 pub fn skeleton(&self) -> Option<&str> {
191 self.skeleton.as_deref()
192 }
193
194 /// Check if the domain is a known freemail provider.
195 ///
196 /// Determined from the [`ProviderRegistry`] in the [`Config`] used to parse
197 /// (built-in providers plus any registered via
198 /// [`ConfigBuilder::add_provider`]).
199 pub fn is_freemail(&self) -> bool {
200 self.freemail
201 }
202
203 /// Parse a batch of email addresses with the given configuration.
204 ///
205 /// Returns one `Result` per input, in the same order. The config is
206 /// shared across all inputs, amortizing setup cost.
207 ///
208 /// # Example
209 ///
210 /// ```
211 /// use structured_email_address::{EmailAddress, Config};
212 ///
213 /// let config = Config::default();
214 /// let results = EmailAddress::parse_batch(
215 /// &["alice@example.com", "invalid", "bob@example.org"],
216 /// &config,
217 /// );
218 /// assert!(results[0].is_ok());
219 /// assert!(results[1].is_err());
220 /// assert!(results[2].is_ok());
221 /// ```
222 pub fn parse_batch(inputs: &[&str], config: &Config) -> Vec<Result<Self, Error>> {
223 inputs
224 .iter()
225 .map(|input| Self::parse_with(input, config))
226 .collect()
227 }
228
229 /// Parse a batch of email addresses in parallel using rayon.
230 ///
231 /// Same semantics as [`parse_batch`](Self::parse_batch), but distributes
232 /// work across rayon's thread pool. Useful for bulk import/validation of
233 /// large lists (10K+ addresses).
234 ///
235 /// Requires the `rayon` feature.
236 ///
237 /// # Example
238 ///
239 /// ```
240 /// use structured_email_address::{EmailAddress, Config};
241 ///
242 /// let config = Config::default();
243 /// let results = EmailAddress::parse_batch_par(
244 /// &["alice@example.com", "bob@example.org"],
245 /// &config,
246 /// );
247 /// assert!(results.iter().all(|r| r.is_ok()));
248 /// ```
249 #[cfg(feature = "rayon")]
250 pub fn parse_batch_par(inputs: &[&str], config: &Config) -> Vec<Result<Self, Error>> {
251 use rayon::prelude::*;
252
253 inputs
254 .par_iter()
255 .map(|input| Self::parse_with(input, config))
256 .collect()
257 }
258}
259
260impl std::fmt::Display for EmailAddress {
261 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
262 let local = if needs_quoting(&self.local_part) {
263 format!("\"{}\"", escape_local_part(&self.local_part))
264 } else {
265 self.local_part.clone()
266 };
267 match &self.display_name {
268 Some(name) => write!(
269 f,
270 "\"{}\" <{}@{}>",
271 escape_display_name(name),
272 local,
273 self.domain
274 ),
275 None => write!(f, "{}@{}", local, self.domain),
276 }
277 }
278}
279
280/// Check if a local-part needs quoting for RFC 5321/5322 serialization.
281/// Returns true if the local part contains characters outside of atext.
282fn needs_quoting(local: &str) -> bool {
283 if local.is_empty() {
284 return true;
285 }
286 // Dots are only safe in valid dot-atom form (no leading/trailing/consecutive dots).
287 if local.starts_with('.') || local.ends_with('.') || local.contains("..") {
288 return true;
289 }
290 local.chars().any(|ch| {
291 !ch.is_ascii_alphanumeric()
292 && !matches!(
293 ch,
294 '!' | '#'
295 | '$'
296 | '%'
297 | '&'
298 | '\''
299 | '*'
300 | '+'
301 | '-'
302 | '/'
303 | '='
304 | '?'
305 | '^'
306 | '_'
307 | '`'
308 | '{'
309 | '|'
310 | '}'
311 | '~'
312 | '.'
313 )
314 && (ch as u32) < 0x80 // non-ASCII doesn't need quoting per RFC 6531
315 })
316}
317
318/// Escape a local-part for use inside quotes: backslash-escape `"` and `\`,
319/// strip CR/LF to prevent header injection (FWS is collapsed during normalization).
320fn escape_local_part(local: &str) -> String {
321 let mut escaped = String::with_capacity(local.len());
322 for ch in local.chars() {
323 match ch {
324 '"' | '\\' => {
325 escaped.push('\\');
326 escaped.push(ch);
327 }
328 '\r' | '\n' => {} // strip CRLF to prevent header injection
329 _ => escaped.push(ch),
330 }
331 }
332 escaped
333}
334
335/// Backslash-escapes `"` and `\`, and strips bare CR/LF to prevent
336/// header injection in serialized output.
337fn escape_display_name(name: &str) -> String {
338 let mut escaped = String::with_capacity(name.len());
339 for ch in name.chars() {
340 match ch {
341 '"' => {
342 escaped.push('\\');
343 escaped.push('"');
344 }
345 '\\' => {
346 escaped.push('\\');
347 escaped.push('\\');
348 }
349 '\r' | '\n' => {} // strip CRLF
350 _ => escaped.push(ch),
351 }
352 }
353 escaped
354}
355
356/// Equality is based on canonical form (`local_part` + `domain`) only.
357/// Display name, tag, and skeleton are intentionally excluded —
358/// `"John" <user@example.com>` equals `"Jane" <user@example.com>`
359/// because they route to the same mailbox.
360impl PartialEq for EmailAddress {
361 fn eq(&self, other: &Self) -> bool {
362 self.local_part == other.local_part && self.domain == other.domain
363 }
364}
365
366impl Eq for EmailAddress {}
367
368impl std::hash::Hash for EmailAddress {
369 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
370 self.local_part.hash(state);
371 self.domain.hash(state);
372 }
373}
374
375impl std::str::FromStr for EmailAddress {
376 type Err = Error;
377
378 fn from_str(s: &str) -> Result<Self, Self::Err> {
379 Self::parse_with(s, &Config::default())
380 }
381}
382
383#[cfg(feature = "serde")]
384impl serde::Serialize for EmailAddress {
385 fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
386 self.canonical().serialize(serializer)
387 }
388}
389
390#[cfg(feature = "serde")]
391impl<'de> serde::Deserialize<'de> for EmailAddress {
392 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
393 let s = String::deserialize(deserializer)?;
394 s.parse().map_err(serde::de::Error::custom)
395 }
396}
397
398#[cfg(test)]
399mod tests;