Skip to main content

structured_email_address/
lib.rs

1//! # structured-email-address
2//!
3//! RFC 5321/5322/6531 conformant email address parser, validator, and normalizer.
4//!
5//! Unlike existing Rust crates that stop at RFC validation, this crate provides:
6//! - **Subaddress extraction**: `user+tag@domain` → separate `user`, `tag`, `domain`
7//! - **Provider-aware normalization**: Gmail dot-stripping, configurable case folding
8//! - **PSL domain validation**: verify domain against the Public Suffix List
9//! - **Anti-homoglyph protection**: detect Cyrillic/Latin lookalikes via Unicode skeleton
10//! - **Configurable strictness**: Strict (5321), Standard (5322), Lax (obs-* allowed)
11//! - **Zero-copy parsing**: internal spans into the input string
12//!
13//! # Quick Start
14//!
15//! ```
16//! use structured_email_address::{EmailAddress, Config};
17//!
18//! // Simple: parse with defaults
19//! let email: EmailAddress = "user+tag@example.com".parse().unwrap();
20//! assert_eq!(email.local_part(), "user+tag");
21//! assert_eq!(email.tag(), Some("tag"));
22//! assert_eq!(email.domain(), "example.com");
23//!
24//! // Configured: Gmail normalization pipeline
25//! let config = Config::builder()
26//!     .strip_subaddress()
27//!     .dots_gmail_only()
28//!     .lowercase_all()
29//!     .build();
30//!
31//! let email = EmailAddress::parse_with("A.L.I.C.E+promo@Gmail.COM", &config).unwrap();
32//! assert_eq!(email.canonical(), "alice@gmail.com");
33//! assert_eq!(email.tag(), Some("promo"));
34//! ```
35
36#![deny(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
37
38mod config;
39mod error;
40mod normalize;
41mod parser;
42mod validate;
43
44pub use config::{
45    CasePolicy, Config, ConfigBuilder, DomainCheck, DotPolicy, Strictness, SubaddressPolicy,
46};
47pub use error::{Error, ErrorKind};
48pub use normalize::confusable_skeleton;
49
50/// A parsed, validated, and normalized email address.
51///
52/// Immutable after construction. All accessors return borrowed data.
53#[derive(Debug, Clone)]
54pub struct EmailAddress {
55    /// Original input (trimmed).
56    original: String,
57    /// Canonical local part (after normalization).
58    local_part: String,
59    /// Extracted subaddress tag, if any.
60    tag: Option<String>,
61    /// Canonical domain (IDNA-encoded, lowercased).
62    domain: String,
63    /// Display name, if parsed from `name-addr` format.
64    display_name: Option<String>,
65    /// Confusable skeleton, if config enabled it.
66    skeleton: Option<String>,
67}
68
69impl EmailAddress {
70    /// Parse and validate with the given configuration.
71    pub fn parse_with(input: &str, config: &Config) -> Result<Self, Error> {
72        let parsed = parser::parse(
73            input,
74            config.strictness,
75            config.allow_display_name,
76            config.allow_domain_literal,
77        )?;
78
79        let normalized = normalize::normalize(&parsed, config);
80        validate::validate(&parsed, &normalized, config)?;
81
82        Ok(Self {
83            original: parsed.input.to_string(),
84            local_part: normalized.local_part,
85            tag: normalized.tag,
86            domain: normalized.domain,
87            display_name: normalized.display_name,
88            skeleton: normalized.skeleton,
89        })
90    }
91
92    /// The canonical local part (after normalization).
93    ///
94    /// If subaddress stripping is enabled, this excludes the `+tag`.
95    /// If dot stripping is enabled, dots are removed.
96    pub fn local_part(&self) -> &str {
97        &self.local_part
98    }
99
100    /// The extracted subaddress tag, if present.
101    ///
102    /// For `user+promo@example.com`, returns `Some("promo")`.
103    /// Always extracted regardless of [`SubaddressPolicy`] — the policy only
104    /// affects whether it appears in [`canonical()`](Self::canonical).
105    pub fn tag(&self) -> Option<&str> {
106        self.tag.as_deref()
107    }
108
109    /// The canonical domain (IDNA-encoded, lowercased).
110    pub fn domain(&self) -> &str {
111        &self.domain
112    }
113
114    /// The display name, if parsed from `"Name" <addr>` or `Name <addr>` format.
115    pub fn display_name(&self) -> Option<&str> {
116        self.display_name.as_deref()
117    }
118
119    /// The full canonical address: `local_part@domain`.
120    pub fn canonical(&self) -> String {
121        format!("{}@{}", self.local_part, self.domain)
122    }
123
124    /// The original input (trimmed).
125    pub fn original(&self) -> &str {
126        &self.original
127    }
128
129    /// The confusable skeleton of the local part (if config enabled it).
130    ///
131    /// Two addresses with the same skeleton + domain are visually confusable.
132    pub fn skeleton(&self) -> Option<&str> {
133        self.skeleton.as_deref()
134    }
135
136    /// Check if the domain is a well-known freemail provider.
137    pub fn is_freemail(&self) -> bool {
138        is_freemail_domain(&self.domain)
139    }
140}
141
142impl std::fmt::Display for EmailAddress {
143    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144        match &self.display_name {
145            Some(name) => write!(f, "\"{}\" <{}@{}>", name, self.local_part, self.domain),
146            None => write!(f, "{}@{}", self.local_part, self.domain),
147        }
148    }
149}
150
151impl PartialEq for EmailAddress {
152    fn eq(&self, other: &Self) -> bool {
153        self.local_part == other.local_part && self.domain == other.domain
154    }
155}
156
157impl Eq for EmailAddress {}
158
159impl std::hash::Hash for EmailAddress {
160    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
161        self.local_part.hash(state);
162        self.domain.hash(state);
163    }
164}
165
166impl std::str::FromStr for EmailAddress {
167    type Err = Error;
168
169    fn from_str(s: &str) -> Result<Self, Self::Err> {
170        Self::parse_with(s, &Config::default())
171    }
172}
173
174#[cfg(feature = "serde")]
175impl serde::Serialize for EmailAddress {
176    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
177        self.canonical().serialize(serializer)
178    }
179}
180
181#[cfg(feature = "serde")]
182impl<'de> serde::Deserialize<'de> for EmailAddress {
183    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
184        let s = String::deserialize(deserializer)?;
185        s.parse().map_err(serde::de::Error::custom)
186    }
187}
188
189/// Check if a domain is a well-known freemail provider.
190fn is_freemail_domain(domain: &str) -> bool {
191    matches!(
192        domain,
193        "gmail.com"
194            | "googlemail.com"
195            | "yahoo.com"
196            | "yahoo.co.uk"
197            | "yahoo.co.jp"
198            | "outlook.com"
199            | "hotmail.com"
200            | "live.com"
201            | "msn.com"
202            | "aol.com"
203            | "protonmail.com"
204            | "proton.me"
205            | "icloud.com"
206            | "me.com"
207            | "mac.com"
208            | "mail.com"
209            | "zoho.com"
210            | "yandex.ru"
211            | "yandex.com"
212            | "mail.ru"
213            | "gmx.com"
214            | "gmx.de"
215            | "web.de"
216            | "tutanota.com"
217            | "tuta.io"
218            | "fastmail.com"
219    )
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225
226    // ── FromStr (default config) ──
227
228    #[test]
229    fn parse_simple() {
230        let email: EmailAddress = "user@example.com".parse().unwrap_or_else(|e| panic!("{e}"));
231        assert_eq!(email.local_part(), "user");
232        assert_eq!(email.domain(), "example.com");
233        assert_eq!(email.tag(), None);
234        assert_eq!(email.canonical(), "user@example.com");
235    }
236
237    #[test]
238    fn parse_with_tag() {
239        let email: EmailAddress = "user+newsletter@example.com"
240            .parse()
241            .unwrap_or_else(|e| panic!("{e}"));
242        assert_eq!(email.local_part(), "user+newsletter");
243        assert_eq!(email.tag(), Some("newsletter"));
244    }
245
246    #[test]
247    fn display_format() {
248        let email: EmailAddress = "user@example.com".parse().unwrap_or_else(|e| panic!("{e}"));
249        assert_eq!(format!("{email}"), "user@example.com");
250    }
251
252    #[test]
253    fn equality_by_canonical() {
254        let a: EmailAddress = "user@example.com".parse().unwrap_or_else(|e| panic!("{e}"));
255        let b: EmailAddress = "user@Example.COM".parse().unwrap_or_else(|e| panic!("{e}"));
256        // Default config: domain-only lowercase, so local parts same case → equal
257        assert_eq!(a, b);
258    }
259
260    #[test]
261    fn freemail_detection() {
262        let email: EmailAddress = "user@gmail.com".parse().unwrap_or_else(|e| panic!("{e}"));
263        assert!(email.is_freemail());
264
265        let email: EmailAddress = "user@company.com".parse().unwrap_or_else(|e| panic!("{e}"));
266        assert!(!email.is_freemail());
267    }
268
269    // ── Configured parsing ──
270
271    #[test]
272    fn full_normalization_pipeline() {
273        let config = Config::builder()
274            .strip_subaddress()
275            .dots_gmail_only()
276            .lowercase_all()
277            .check_confusables()
278            .build();
279
280        let email = EmailAddress::parse_with("A.L.I.C.E+promo@Gmail.COM", &config)
281            .unwrap_or_else(|e| panic!("{e}"));
282        assert_eq!(email.canonical(), "alice@gmail.com");
283        assert_eq!(email.tag(), Some("promo"));
284        assert!(email.skeleton().is_some());
285    }
286
287    #[test]
288    fn display_name_parsing() {
289        let config = Config::builder().allow_display_name().build();
290
291        let email = EmailAddress::parse_with("John Doe <user@example.com>", &config)
292            .unwrap_or_else(|e| panic!("{e}"));
293        assert_eq!(email.display_name(), Some("John Doe"));
294        assert_eq!(email.local_part(), "user");
295        assert_eq!(email.domain(), "example.com");
296    }
297
298    // ── Serde ──
299
300    #[cfg(feature = "serde")]
301    #[test]
302    fn serde_roundtrip() {
303        let email: EmailAddress = "user@example.com".parse().unwrap_or_else(|e| panic!("{e}"));
304        let json = serde_json::to_string(&email).unwrap_or_else(|e| panic!("{e}"));
305        assert_eq!(json, "\"user@example.com\"");
306
307        let back: EmailAddress = serde_json::from_str(&json).unwrap_or_else(|e| panic!("{e}"));
308        assert_eq!(email, back);
309    }
310
311    // ── Validation errors ──
312
313    #[test]
314    fn rejects_empty() {
315        let result: Result<EmailAddress, _> = "".parse();
316        assert!(result.is_err());
317    }
318
319    #[test]
320    fn rejects_no_domain_dot() {
321        let result: Result<EmailAddress, _> = "user@localhost".parse();
322        assert!(result.is_err());
323        assert!(matches!(result.unwrap_err().kind(), ErrorKind::DomainNoDot));
324    }
325
326    #[test]
327    fn allows_single_label_when_configured() {
328        let config = Config::builder().allow_single_label_domain().build();
329        let email =
330            EmailAddress::parse_with("user@localhost", &config).unwrap_or_else(|e| panic!("{e}"));
331        assert_eq!(email.domain(), "localhost");
332    }
333}