structured_email_address/provider.rs
1//! Provider-aware normalization rules.
2//!
3//! Different mail providers treat the local part differently: Gmail ignores
4//! dots, most freemail providers fold case, subaddress separators vary. A
5//! [`ProviderRegistry`] maps domains to [`ProviderRule`]s so normalization can
6//! be provider-aware, and applications can register their own providers.
7//!
8//! The registry is also the source of truth for [`EmailAddress::is_freemail`],
9//! independent of whether provider-aware normalization is enabled.
10//!
11//! [`EmailAddress::is_freemail`]: crate::EmailAddress::is_freemail
12
13/// Normalization rule for one mail provider (a set of equivalent domains).
14///
15/// Construct with [`ProviderRule::new`] and refine with the builder-style
16/// setters. Fields are private so the rule can gain options without a breaking
17/// change.
18///
19/// # Example
20///
21/// ```
22/// use structured_email_address::ProviderRule;
23///
24/// // A corporate provider that ignores dots and folds case, tag separator '+'.
25/// let rule = ProviderRule::new(["mail.example.com"])
26/// .strip_dots(true)
27/// .lowercase_local(true)
28/// .freemail(false);
29/// assert!(rule.matches("MAIL.EXAMPLE.COM"));
30/// ```
31#[derive(Debug, Clone)]
32pub struct ProviderRule {
33 domains: Vec<Box<str>>,
34 strip_dots: bool,
35 lowercase_local: bool,
36 subaddress_sep: Option<char>,
37 is_freemail: bool,
38}
39
40impl ProviderRule {
41 /// Create a rule for the given domains.
42 ///
43 /// Domains are stored in their IDNA-ASCII (punycode) canonical form so a
44 /// rule registered as `münchen.de` and one as `xn--mnchen-3ya.de` are
45 /// equivalent, and matching agrees with the canonical domain used elsewhere.
46 ///
47 /// Defaults: no dot stripping, no case folding, `+` subaddress separator,
48 /// and `is_freemail = false` (a custom rule is treated as a private domain
49 /// unless you opt in with [`freemail(true)`](Self::freemail)).
50 pub fn new<I, S>(domains: I) -> Self
51 where
52 I: IntoIterator<Item = S>,
53 S: Into<String>,
54 {
55 Self {
56 domains: domains
57 .into_iter()
58 .map(|d| canonical_domain(&d.into()))
59 .collect(),
60 strip_dots: false,
61 lowercase_local: false,
62 subaddress_sep: Some('+'),
63 is_freemail: false,
64 }
65 }
66
67 /// Set whether dots in the local part are insignificant (e.g. Gmail).
68 #[must_use]
69 pub fn strip_dots(mut self, yes: bool) -> Self {
70 self.strip_dots = yes;
71 self
72 }
73
74 /// Set whether the local part is case-insensitive (folded to lowercase).
75 #[must_use]
76 pub fn lowercase_local(mut self, yes: bool) -> Self {
77 self.lowercase_local = yes;
78 self
79 }
80
81 /// Set the subaddress separator, or `None` if the provider has no
82 /// subaddressing.
83 #[must_use]
84 pub fn subaddress_separator(mut self, sep: Option<char>) -> Self {
85 self.subaddress_sep = sep;
86 self
87 }
88
89 /// Set whether this provider is a free webmail provider.
90 #[must_use]
91 pub fn freemail(mut self, yes: bool) -> Self {
92 self.is_freemail = yes;
93 self
94 }
95
96 /// Returns true if `domain` belongs to this provider.
97 ///
98 /// The domain is canonicalized to IDNA-ASCII before comparison, so Unicode
99 /// and punycode spellings of the same domain match.
100 pub fn matches(&self, domain: &str) -> bool {
101 self.matches_canonical(&canonical_domain(domain))
102 }
103
104 /// Match against a domain already in canonical (IDNA-ASCII) form.
105 fn matches_canonical(&self, canonical: &str) -> bool {
106 self.domains.iter().any(|d| &**d == canonical)
107 }
108
109 /// Whether the local part's dots are insignificant.
110 pub fn strips_dots(&self) -> bool {
111 self.strip_dots
112 }
113
114 /// Whether the local part is case-insensitive.
115 pub fn folds_case(&self) -> bool {
116 self.lowercase_local
117 }
118
119 /// The provider's subaddress separator, if any.
120 pub fn separator(&self) -> Option<char> {
121 self.subaddress_sep
122 }
123
124 /// Whether this is a free webmail provider.
125 pub fn is_freemail(&self) -> bool {
126 self.is_freemail
127 }
128}
129
130/// A set of [`ProviderRule`]s with domain lookup.
131///
132/// [`builtin`](Self::builtin) seeds the well-known providers; applications can
133/// extend it with [`add`](Self::add). User-added rules take precedence over
134/// built-ins, so a custom rule can redefine a built-in provider.
135#[derive(Debug, Clone)]
136pub struct ProviderRegistry {
137 rules: Vec<ProviderRule>,
138}
139
140// Process-wide built-in registry, constructed once. `builtin()` clones it and
141// the GmailOnly dot-policy borrows it, so neither pays a per-call allocation.
142// no-std: once_cell::race::OnceBox (alloc) or a caller-injected registry.
143static BUILTIN: std::sync::LazyLock<ProviderRegistry> = std::sync::LazyLock::new(|| {
144 let p = |domains: &[&str]| {
145 ProviderRule::new(domains.iter().copied())
146 .lowercase_local(true)
147 .freemail(true)
148 };
149 ProviderRegistry {
150 rules: vec![
151 p(&["gmail.com", "googlemail.com"]).strip_dots(true),
152 p(&["outlook.com", "hotmail.com", "live.com", "msn.com"]),
153 p(&["yahoo.com", "yahoo.co.uk", "yahoo.co.jp"]),
154 p(&["protonmail.com", "proton.me"]),
155 p(&["icloud.com", "me.com", "mac.com"]),
156 p(&["yandex.ru", "yandex.com"]),
157 p(&["mail.ru"]),
158 // Freemail providers without special normalization quirks.
159 p(&[
160 "aol.com",
161 "mail.com",
162 "zoho.com",
163 "gmx.com",
164 "gmx.de",
165 "web.de",
166 "tutanota.com",
167 "tuta.io",
168 "fastmail.com",
169 ]),
170 ],
171 }
172});
173
174/// Borrow the process-wide built-in registry without allocating.
175pub(crate) fn builtin_ref() -> &'static ProviderRegistry {
176 &BUILTIN
177}
178
179impl ProviderRegistry {
180 /// An empty registry.
181 pub fn empty() -> Self {
182 Self { rules: Vec::new() }
183 }
184
185 /// The built-in registry of well-known mail providers.
186 ///
187 /// Only Gmail/Googlemail ignore dots; every entry folds local-part case and
188 /// uses `+` as its subaddress separator. All built-ins are freemail.
189 ///
190 /// Returns an owned clone of a process-wide shared registry, so the rule set
191 /// is constructed (and its domains IDNA-canonicalized) only once.
192 pub fn builtin() -> Self {
193 builtin_ref().clone()
194 }
195
196 /// Add a rule. User-added rules take precedence over earlier ones.
197 pub fn add(&mut self, rule: ProviderRule) {
198 self.rules.push(rule);
199 }
200
201 /// Builder-style [`add`](Self::add).
202 #[must_use]
203 pub fn with(mut self, rule: ProviderRule) -> Self {
204 self.add(rule);
205 self
206 }
207
208 /// Look up the rule for a domain, or `None` if no provider matches.
209 ///
210 /// Most-recently-added rules win, so a custom rule overrides a built-in for
211 /// the same domain. The domain is canonicalized to IDNA-ASCII once, so
212 /// Unicode and punycode spellings resolve to the same rule.
213 pub fn lookup(&self, domain: &str) -> Option<&ProviderRule> {
214 let canonical = canonical_domain(domain);
215 self.rules
216 .iter()
217 .rev()
218 .find(|r| r.matches_canonical(&canonical))
219 }
220}
221
222impl Default for ProviderRegistry {
223 fn default() -> Self {
224 Self::builtin()
225 }
226}
227
228/// Canonicalize a domain to its IDNA-ASCII (punycode) form, lowercased.
229///
230/// Falls back to ASCII lowercasing if the input is not a valid domain, so
231/// matching never panics on arbitrary registry input.
232fn canonical_domain(domain: &str) -> Box<str> {
233 idna::domain_to_ascii(domain)
234 .unwrap_or_else(|_| domain.to_ascii_lowercase())
235 .into_boxed_str()
236}
237
238#[cfg(test)]
239mod tests;