structured_email_address/
normalize.rs1use unicode_normalization::UnicodeNormalization;
7use unicode_security::confusable_detection::skeleton;
8
9use crate::config::{CasePolicy, Config, DotPolicy, SubaddressPolicy};
10use crate::parser::Parsed;
11
12#[derive(Debug, Clone)]
14pub(crate) struct Normalized {
15 pub local_part: String,
17 pub tag: Option<String>,
19 pub domain: String,
21 pub display_name: Option<String>,
23 pub skeleton: Option<String>,
25}
26
27pub(crate) fn normalize(parsed: &Parsed<'_>, config: &Config) -> Normalized {
29 let raw_local = parsed.local_part.as_str(parsed.input);
30 let raw_domain = parsed.domain.as_str(parsed.input);
31
32 let unquoted_local = if raw_local.starts_with('"') && raw_local.ends_with('"') {
34 &raw_local[1..raw_local.len() - 1]
35 } else {
36 raw_local
37 };
38
39 let nfc_local: String = unquoted_local.nfc().collect();
41 let nfc_domain: String = raw_domain.nfc().collect();
42
43 let cased_local = match config.case_policy {
45 CasePolicy::All => nfc_local.to_lowercase(),
46 CasePolicy::Domain | CasePolicy::Preserve => nfc_local,
47 };
48
49 let sep = config.subaddress_separator;
51 let (base_local, tag) = match cased_local.split_once(sep) {
52 Some((base, tag)) => (base.to_string(), Some(tag.to_string())),
53 None => (cased_local, None),
54 };
55
56 let local_after_tag = match config.subaddress {
58 SubaddressPolicy::Strip => base_local.clone(),
59 SubaddressPolicy::Preserve => match &tag {
60 Some(t) => format!("{}{}{}", base_local, sep, t),
61 None => base_local.clone(),
62 },
63 };
64
65 let local_after_dots = apply_dot_policy(&local_after_tag, &nfc_domain, config.dot_policy);
67
68 let canonical_domain =
70 idna::domain_to_ascii(&nfc_domain).unwrap_or_else(|_| nfc_domain.to_lowercase());
71
72 let canonical_domain = canonical_domain.to_lowercase();
74
75 let skel = if config.check_confusables {
77 Some(confusable_skeleton(&local_after_dots))
78 } else {
79 None
80 };
81
82 let display_name = parsed
84 .display_name
85 .map(|span| span.as_str(parsed.input).to_string());
86
87 Normalized {
88 local_part: local_after_dots,
89 tag,
90 domain: canonical_domain,
91 display_name,
92 skeleton: skel,
93 }
94}
95
96fn apply_dot_policy(local: &str, domain: &str, policy: DotPolicy) -> String {
98 match policy {
99 DotPolicy::Preserve => local.to_string(),
100 DotPolicy::Always => local.replace('.', ""),
101 DotPolicy::GmailOnly => {
102 let domain_lower = domain.to_lowercase();
103 if is_gmail_domain(&domain_lower) {
104 local.replace('.', "")
105 } else {
106 local.to_string()
107 }
108 }
109 }
110}
111
112fn is_gmail_domain(domain: &str) -> bool {
114 matches!(domain, "gmail.com" | "googlemail.com")
115}
116
117pub fn confusable_skeleton(input: &str) -> String {
122 let nfc: String = input.nfc().collect();
123 skeleton(&nfc).collect::<String>().to_lowercase()
124}
125
126#[cfg(test)]
127mod tests {
128 use super::*;
129 use crate::config::Config;
130 use crate::parser;
131
132 fn parse_and_normalize(input: &str, config: &Config) -> Normalized {
133 let parsed = parser::parse(
134 input,
135 config.strictness,
136 config.allow_display_name,
137 config.allow_domain_literal,
138 )
139 .unwrap_or_else(|e| panic!("parse failed for '{input}': {e}"));
140 normalize(&parsed, config)
141 }
142
143 #[test]
144 fn basic_normalization() {
145 let config = Config::default();
146 let n = parse_and_normalize("User@Example.COM", &config);
147 assert_eq!(n.local_part, "User"); assert_eq!(n.domain, "example.com");
149 }
150
151 #[test]
152 fn lowercase_all() {
153 let config = Config::builder().lowercase_all().build();
154 let n = parse_and_normalize("User@Example.COM", &config);
155 assert_eq!(n.local_part, "user");
156 assert_eq!(n.domain, "example.com");
157 }
158
159 #[test]
160 fn subaddress_extraction() {
161 let config = Config::default();
162 let n = parse_and_normalize("user+promo@example.com", &config);
163 assert_eq!(n.tag, Some("promo".to_string()));
164 assert_eq!(n.local_part, "user+promo");
166 }
167
168 #[test]
169 fn subaddress_strip() {
170 let config = Config::builder().strip_subaddress().lowercase_all().build();
171 let n = parse_and_normalize("user+promo@example.com", &config);
172 assert_eq!(n.tag, Some("promo".to_string()));
173 assert_eq!(n.local_part, "user");
174 }
175
176 #[test]
177 fn gmail_dot_stripping() {
178 let config = Config::builder().dots_gmail_only().lowercase_all().build();
179
180 let n = parse_and_normalize("a.l.i.c.e@gmail.com", &config);
181 assert_eq!(n.local_part, "alice");
182
183 let n = parse_and_normalize("a.l.i.c.e@example.com", &config);
185 assert_eq!(n.local_part, "a.l.i.c.e");
186 }
187
188 #[test]
189 fn idna_domain() {
190 let config = Config::default();
191 let n = parse_and_normalize("user@münchen.de", &config);
192 assert_eq!(n.domain, "xn--mnchen-3ya.de");
193 }
194
195 #[test]
196 fn confusable_skeleton_cyrillic() {
197 let latin = confusable_skeleton("alice");
199 let cyrillic = confusable_skeleton("\u{0430}lice");
200 assert_eq!(latin, cyrillic);
201 }
202
203 #[test]
204 fn full_pipeline() {
205 let config = Config::builder()
206 .strip_subaddress()
207 .dots_gmail_only()
208 .lowercase_all()
209 .check_confusables()
210 .build();
211
212 let n = parse_and_normalize("A.L.I.C.E+promo@Gmail.COM", &config);
213 assert_eq!(n.local_part, "alice");
214 assert_eq!(n.tag, Some("promo".to_string()));
215 assert_eq!(n.domain, "gmail.com");
216 assert!(n.skeleton.is_some());
217 }
218}