pub mod domains;
pub mod sift3;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Suggestion {
address: String,
domain: String,
}
impl Suggestion {
pub fn new(address: String, domain: String) -> Self {
Self { address, domain }
}
pub fn address(&self) -> &str {
&self.address
}
pub fn domain(&self) -> &str {
&self.domain
}
pub fn full(&self) -> String {
format!("{}@{}", self.address, self.domain)
}
}
#[derive(Debug, Clone)]
pub struct Config {
pub domains: Vec<String>,
pub second_level_domains: Vec<String>,
pub top_level_domains: Vec<String>,
pub threshold: f64,
}
impl Default for Config {
fn default() -> Self {
Self {
domains: domains::POPULAR_DOMAINS.iter().map(|&s| s.into()).collect(),
second_level_domains: domains::SECOND_LEVEL_DOMAINS
.iter()
.map(|&s| s.into())
.collect(),
top_level_domains: domains::TOP_LEVEL_DOMAINS
.iter()
.map(|&s| s.into())
.collect(),
threshold: 0.6,
}
}
}
#[derive(Debug, Clone)]
pub struct Mailidator {
config: Config,
}
impl Default for Mailidator {
fn default() -> Self {
Self::new(Config::default())
}
}
impl Mailidator {
pub fn new(config: Config) -> Self {
Self { config }
}
pub fn check(&self, email: &str) -> Option<Suggestion> {
let (address, domain) = self.parse_email(email)?;
if self.config.domains.iter().any(|d| d == &domain) {
return None;
}
let suggested_domain = self.suggest_domain(&domain)?;
Some(Suggestion::new(address, suggested_domain))
}
fn parse_email(&self, email: &str) -> Option<(String, String)> {
let at_index = email.find('@')?;
if at_index == 0 || at_index >= email.len() - 1 {
return None;
}
if email[at_index + 1..].contains('@') {
return None;
}
let address = email[..at_index].to_string();
let domain_part = &email[at_index + 1..];
if domain_part.is_empty() || domain_part.starts_with('.') || domain_part.ends_with('.') {
return None;
}
let domain = domain_part.to_lowercase();
Some((address, domain))
}
fn suggest_domain(&self, domain: &str) -> Option<String> {
let mut best_match: Option<&str> = None;
let mut best_score = f64::INFINITY;
const POPULARITY_BIAS_FACTOR: f64 = 0.05;
for (index, candidate) in self.config.domains.iter().enumerate() {
let distance = sift3::distance(domain, candidate);
if distance == 0.0 {
return Some(candidate.clone());
}
if distance <= self.config.threshold {
let popularity_bias = (index as f64) * POPULARITY_BIAS_FACTOR;
let score = distance + popularity_bias;
if score < best_score {
best_score = score;
best_match = Some(candidate);
}
}
}
if let Some(domain_ref) = best_match {
return Some(domain_ref.to_string());
}
if let Some(constructed) = self.suggest_constructed_domain(domain) {
let constructed_distance = sift3::distance(domain, &constructed);
if constructed_distance <= self.config.threshold {
return Some(constructed);
}
}
None
}
fn suggest_constructed_domain(&self, domain: &str) -> Option<String> {
let dot_pos = domain.find('.')?;
if dot_pos == 0 || dot_pos == domain.len() - 1 {
return None;
}
let sld_part = &domain[..dot_pos];
let tld_part = &domain[dot_pos + 1..];
if sld_part.len() < 2 || tld_part.len() < 2 {
return None;
}
let mut best_sld: Option<&str> = None;
let mut best_sld_distance = f64::INFINITY;
let mut best_tld: Option<&str> = None;
let mut best_tld_distance = f64::INFINITY;
for sld in &self.config.second_level_domains {
let distance = sift3::distance(sld_part, sld);
if distance < best_sld_distance && distance <= self.config.threshold {
best_sld_distance = distance;
best_sld = Some(sld);
}
}
for tld in &self.config.top_level_domains {
let distance = sift3::distance(tld_part, tld);
if distance < best_tld_distance && distance <= self.config.threshold {
best_tld_distance = distance;
best_tld = Some(tld);
}
}
match (best_sld, best_tld) {
(Some(sld), Some(tld)) => {
Some(format!("{sld}.{tld}"))
}
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_email_parsing() {
let checker = Mailidator::default();
assert_eq!(
checker.parse_email("test@example.com"),
Some(("test".to_string(), "example.com".to_string()))
);
assert_eq!(checker.parse_email("invalid"), None);
assert_eq!(checker.parse_email("@example.com"), None);
assert_eq!(checker.parse_email("test@"), None);
}
#[test]
fn test_gmail_suggestion() {
let checker = Mailidator::default();
let suggestion = checker.check("test@gmaik.com");
assert!(suggestion.is_some());
let suggestion = suggestion.unwrap();
assert_eq!(suggestion.address(), "test");
assert_eq!(suggestion.domain(), "gmail.com");
assert_eq!(suggestion.full(), "test@gmail.com");
}
#[test]
fn test_no_suggestion_for_valid_domain() {
let checker = Mailidator::default();
let suggestion = checker.check("test@gmail.com");
assert!(suggestion.is_none());
}
#[test]
fn test_yahoo_suggestion() {
let checker = Mailidator::default();
let suggestion = checker.check("test@yaho.com");
assert!(suggestion.is_some());
let suggestion = suggestion.unwrap();
assert_eq!(suggestion.domain(), "yahoo.com");
}
#[test]
fn test_email_suggestion_methods() {
let suggestion = Suggestion::new("test".to_string(), "gmail.com".to_string());
assert_eq!(suggestion.address(), "test");
assert_eq!(suggestion.domain(), "gmail.com");
assert_eq!(suggestion.full(), "test@gmail.com");
}
}