validator_async/validation/
email.rs

1use idna::domain_to_ascii;
2use regex::Regex;
3use std::{borrow::Cow, sync::LazyLock};
4
5use crate::ValidateIp;
6
7// Regex from the specs
8// https://html.spec.whatwg.org/multipage/forms.html#valid-e-mail-address
9// It will mark esoteric email addresses like quoted string as invalid
10static EMAIL_USER_RE: LazyLock<Regex> =
11    LazyLock::new(|| Regex::new(r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+\z").unwrap());
12static EMAIL_DOMAIN_RE: LazyLock<Regex> = LazyLock::new(|| {
13    Regex::new(
14        r"^[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
15    ).unwrap()
16});
17// literal form, ipv4 or ipv6 address (SMTP 4.1.3)
18static EMAIL_LITERAL_RE: LazyLock<Regex> =
19    LazyLock::new(|| Regex::new(r"\[([a-fA-F0-9:\.]+)\]\z").unwrap());
20
21/// Checks if the domain is a valid domain and if not, check whether it's an IP
22#[must_use]
23fn validate_domain_part(domain_part: &str) -> bool {
24    if EMAIL_DOMAIN_RE.is_match(domain_part) {
25        return true;
26    }
27
28    // maybe we have an ip as a domain?
29    match EMAIL_LITERAL_RE.captures(domain_part) {
30        Some(caps) => match caps.get(1) {
31            Some(c) => c.as_str().validate_ip(),
32            None => false,
33        },
34        None => false,
35    }
36}
37
38/// Validates whether the given string is an email based on the [HTML5 spec](https://html.spec.whatwg.org/multipage/forms.html#valid-e-mail-address).
39/// [RFC 5322](https://tools.ietf.org/html/rfc5322) is not practical in most circumstances and allows email addresses
40/// that are unfamiliar to most users.
41pub trait ValidateEmail {
42    fn validate_email(&self) -> bool {
43        let val = if let Some(v) = self.as_email_string() {
44            v
45        } else {
46            return true;
47        };
48
49        if val.is_empty() || !val.contains('@') {
50            return false;
51        }
52
53        let parts: Vec<&str> = val.rsplitn(2, '@').collect();
54        let user_part = parts[1];
55        let domain_part = parts[0];
56
57        // validate the length of each part of the email, BEFORE doing the regex
58        // according to RFC5321 the max length of the local part is 64 characters
59        // and the max length of the domain part is 255 characters
60        // https://datatracker.ietf.org/doc/html/rfc5321#section-4.5.3.1.1
61        if user_part.chars().count() > 64 || domain_part.chars().count() > 255 {
62            return false;
63        }
64
65        if !EMAIL_USER_RE.is_match(user_part) {
66            return false;
67        }
68
69        if !validate_domain_part(domain_part) {
70            // Still the possibility of an [IDN](https://en.wikipedia.org/wiki/Internationalized_domain_name)
71            return match domain_to_ascii(domain_part) {
72                Ok(d) => validate_domain_part(&d),
73                Err(_) => false,
74            };
75        }
76
77        true
78    }
79
80    fn as_email_string(&self) -> Option<Cow<str>>;
81}
82
83impl<T> ValidateEmail for &T
84where
85    T: ValidateEmail,
86{
87    fn as_email_string(&self) -> Option<Cow<str>> {
88        T::as_email_string(self)
89    }
90}
91
92impl ValidateEmail for String {
93    fn as_email_string(&self) -> Option<Cow<str>> {
94        Some(Cow::from(self))
95    }
96}
97
98impl<T> ValidateEmail for Option<T>
99where
100    T: ValidateEmail,
101{
102    fn as_email_string(&self) -> Option<Cow<str>> {
103        let Some(u) = self else {
104            return None;
105        };
106
107        T::as_email_string(u)
108    }
109}
110
111impl ValidateEmail for &str {
112    fn as_email_string(&self) -> Option<Cow<'_, str>> {
113        Some(Cow::from(*self))
114    }
115}
116
117impl ValidateEmail for Cow<'_, str> {
118    fn as_email_string(&self) -> Option<Cow<'_, str>> {
119        Some(self.clone())
120    }
121}
122
123#[cfg(test)]
124mod tests {
125    use std::borrow::Cow;
126
127    use crate::ValidateEmail;
128
129    #[test]
130    fn test_validate_email() {
131        // Test cases taken from Django
132        // https://github.com/django/django/blob/master/tests/validators/tests.py#L48
133        let tests = vec![
134            ("email@here.com", true),
135            ("weirder-email@here.and.there.com", true),
136            (r#"!def!xyz%abc@example.com"#, true),
137            ("email@[127.0.0.1]", true),
138            ("email@[2001:dB8::1]", true),
139            ("email@[2001:dB8:0:0:0:0:0:1]", true),
140            ("email@[::fffF:127.0.0.1]", true),
141            ("example@valid-----hyphens.com", true),
142            ("example@valid-with-hyphens.com", true),
143            ("test@domain.with.idn.tld.उदाहरण.परीक्षा", true),
144            (r#""test@test"@example.com"#, false),
145            // max length for domain name labels is 63 characters per RFC 1034
146            ("a@atm.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true),
147            ("a@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.atm", true),
148            (
149                "a@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bbbbbbbbbb.atm",
150                true,
151            ),
152            // 64 * a
153            ("a@atm.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", false),
154            ("", false),
155            ("abc", false),
156            ("abc@", false),
157            ("abc@bar", true),
158            ("a @x.cz", false),
159            ("abc@.com", false),
160            ("something@@somewhere.com", false),
161            ("email@127.0.0.1", true),
162            ("email@[127.0.0.256]", false),
163            ("email@[2001:db8::12345]", false),
164            ("email@[2001:db8:0:0:0:0:1]", false),
165            ("email@[::ffff:127.0.0.256]", false),
166            ("example@invalid-.com", false),
167            ("example@-invalid.com", false),
168            ("example@invalid.com-", false),
169            ("example@inv-.alid-.com", false),
170            ("example@inv-.-alid.com", false),
171            (r#"test@example.com\n\n<script src="x.js">"#, false),
172            (r#""\\\011"@here.com"#, false),
173            (r#""\\\012"@here.com"#, false),
174            ("trailingdot@shouldfail.com.", false),
175            // Trailing newlines in username or domain not allowed
176            ("a@b.com\n", false),
177            ("a\n@b.com", false),
178            (r#""test@test"\n@example.com"#, false),
179            ("a@[127.0.0.1]\n", false),
180            // underscores are not allowed
181            ("John.Doe@exam_ple.com", false),
182        ];
183
184        for (input, expected) in tests {
185            // println!("{} - {}", input, expected);
186            assert_eq!(
187                input.validate_email(),
188                expected,
189                "Email `{}` was not classified correctly",
190                input
191            );
192        }
193    }
194
195    #[test]
196    fn test_validate_email_cow() {
197        let test: Cow<'static, str> = "email@here.com".into();
198        assert!(test.validate_email());
199        let test: Cow<'static, str> = String::from("email@here.com").into();
200        assert!(test.validate_email());
201        let test: Cow<'static, str> = "a@[127.0.0.1]\n".into();
202        assert!(!test.validate_email());
203        let test: Cow<'static, str> = String::from("a@[127.0.0.1]\n").into();
204        assert!(!test.validate_email());
205    }
206
207    #[test]
208    fn test_validate_email_rfc5321() {
209        // 65 character local part
210        let test = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa@mail.com";
211        assert!(!test.validate_email());
212        // 256 character domain part
213        let test = "a@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com";
214        assert!(!test.validate_email());
215    }
216}