use itertools::Itertools;
use super::FoundToken;
use super::hostname::lex_hostname;
use crate::TokenKind;
pub(crate) const MAX_EMAIL_LEN: usize = 254;
pub fn lex_email_address(source: &[char]) -> Option<FoundToken> {
let source = &source[..usize::min(source.len(), MAX_EMAIL_LEN)];
let at_loc = {
let mut iter = source.iter().enumerate();
let mut in_quote = false;
loop {
match iter.next()? {
(_, '"') => in_quote = !in_quote,
(_, ' ') if !in_quote => return None,
(i, '@') if !in_quote => break i,
_ => {}
}
}
};
let local_part = &source[0..at_loc];
if !validate_local_part(local_part) {
return None;
}
let domain_part_len = lex_hostname(&source[at_loc + 1..])?;
if domain_part_len == 0 {
return None;
}
Some(FoundToken {
next_index: at_loc + 1 + domain_part_len,
token: TokenKind::EmailAddress,
})
}
fn validate_local_part(mut local_part: &[char]) -> bool {
if local_part.len() > 64 || local_part.is_empty() {
return false;
}
let is_quoted =
local_part.first().cloned() == Some('"') && local_part.last().cloned() == Some('"');
if is_quoted && local_part.len() < 2 {
return false;
}
if is_quoted {
local_part = &local_part[1..local_part.len() - 1];
}
if !is_quoted {
if !local_part.iter().cloned().all(valid_unquoted_character) {
return false;
}
if local_part.first().cloned().unwrap() == '.' || local_part.last().cloned().unwrap() == '.'
{
return false;
}
for (c, n) in local_part.iter().tuple_windows() {
if *c == '.' && *n == '.' {
return false;
}
}
} else {
let mut iter = local_part.iter().cloned();
while let Some(c) = iter.next() {
if c == '\\' {
iter.next();
continue;
}
let also_valid = ['(', ')', ',', ':', ';', '<', '>', '@', '[', ']', ' '];
if !valid_unquoted_character(c) && !also_valid.contains(&c) {
return false;
}
}
}
true
}
fn valid_unquoted_character(c: char) -> bool {
if matches!(c,
'A'..='Z' |
'a'..='z' |
'0'..='9'
) {
return true;
}
if c > '\u{007F}' {
return true;
}
let others = [
'!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}',
'~', '.',
];
if others.contains(&c) {
return true;
}
false
}
#[cfg(test)]
mod tests {
use rand::RngExt;
use super::super::hostname::tests::example_domain_parts;
use super::{lex_email_address, validate_local_part};
fn example_local_parts() -> impl Iterator<Item = Vec<char>> {
[
r"simple",
r"very.common",
r"x",
r"long.email-address-with-hyphens",
r"user.name+tag+sorting",
r"name/surname",
r"admin",
r"example",
r#"" ""#,
r#""john..doe""#,
r"mailhost!username",
r#""very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual""#,
r"user%example.com",
r"user-",
r"postmaster",
r"postmaster",
r"_test",
]
.into_iter()
.map(|s| s.chars().collect())
}
#[test]
fn example_local_parts_pass_validation() {
for local in example_local_parts() {
dbg!(local.iter().collect::<String>());
assert!(validate_local_part(&local));
}
}
#[test]
fn test_many_example_email_addresses() {
for local in example_local_parts() {
for mut domain in example_domain_parts() {
let mut address = local.clone();
address.push('@');
address.append(&mut domain);
dbg!(address.iter().collect::<String>());
let found = lex_email_address(&address).unwrap();
assert_eq!(found.next_index, address.len());
}
}
}
#[test]
fn does_not_allow_empty_domain() {
for local in example_local_parts() {
let mut address = local.clone();
address.push('@');
address.push(' ');
assert!(lex_email_address(&address).is_none());
}
}
#[test]
fn survives_random_chars() {
let mut rng = rand::rng();
let mut buf = [' '; 128];
for _ in 0..1 << 16 {
buf.fill_with(|| rng.random());
lex_email_address(&buf);
}
}
}