mailidator 0.1.0

A lightweight Rust library for checking email address misspellings
Documentation
//! # Email Spell Checker
//!
//! A lightweight Rust library for checking email address misspellings and suggesting corrections.
//!
//! ## Features
//!
//! - ⚡️ Lightning fast: Uses Sift3 algorithm for fast and accurate string distance calculation
//! - 🔋 Updated: Built-in popular domains and modern TLDs
//! - 🚀 Lightweight: Zero external dependencies (except optional serde)
//! - 💙 Type-safe: Written in Rust with strong type safety
//! - ⚙️ Extensible: Allows custom domains and configuration
//! - 🔨 Easy to use: Simple and intuitive API
//!
//! ## Basic Usage
//!
//! ```rust
//! use mailidator::Mailidator;
//!
//! let checker = Mailidator::default();
//! let suggestion = checker.check("jorge@gmaik.co");
//!
//! if let Some(suggestion) = suggestion {
//!     println!("Did you mean: {}?", suggestion.full());
//!     println!("Address: {}", suggestion.address());
//!     println!("Domain: {}", suggestion.domain());
//! }
//! ```

pub mod domains;
pub mod sift3;

#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

/// Represents a suggested correction for a misspelled email address
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Suggestion {
    /// The local part of the email (before @)
    address: String,
    /// The suggested domain
    domain: String,
}

impl Suggestion {
    /// Create a new email suggestion
    pub fn new(address: String, domain: String) -> Self {
        Self { address, domain }
    }

    /// Get the local part of the email (before @)
    pub fn address(&self) -> &str {
        &self.address
    }

    /// Get the suggested domain
    pub fn domain(&self) -> &str {
        &self.domain
    }

    /// Get the full corrected email address
    pub fn full(&self) -> String {
        format!("{}@{}", self.address, self.domain)
    }
}

/// Configuration for the email spell checker
#[derive(Debug, Clone)]
pub struct Config {
    /// List of popular domains to suggest
    pub domains: Vec<String>,
    /// List of second-level domains
    pub second_level_domains: Vec<String>,
    /// List of top-level domains
    pub top_level_domains: Vec<String>,
    /// Threshold for string distance (lower means stricter matching)
    pub threshold: f64,
}

impl Default for Config {
    fn default() -> Self {
        Self {
            // Use Into trait for more efficient string conversion
            domains: domains::POPULAR_DOMAINS.iter().map(|&s| s.into()).collect(),
            second_level_domains: domains::SECOND_LEVEL_DOMAINS
                .iter()
                .map(|&s| s.into())
                .collect(),
            top_level_domains: domains::TOP_LEVEL_DOMAINS
                .iter()
                .map(|&s| s.into())
                .collect(),
            threshold: 0.6,
        }
    }
}

/// The main email spell checker
#[derive(Debug, Clone)]
pub struct Mailidator {
    config: Config,
}

impl Default for Mailidator {
    fn default() -> Self {
        Self::new(Config::default())
    }
}

impl Mailidator {
    /// Create a new email spell checker with the given configuration
    pub fn new(config: Config) -> Self {
        Self { config }
    }

    /// Check an email address and return a suggestion if a likely typo is detected
    pub fn check(&self, email: &str) -> Option<Suggestion> {
        let (address, domain) = self.parse_email(email)?;

        // Early exit: if the domain is already in our popular domains, no suggestion needed
        // Use iterator to avoid string allocation for comparison
        if self.config.domains.iter().any(|d| d == &domain) {
            return None;
        }

        // Try to find the best domain suggestion
        let suggested_domain = self.suggest_domain(&domain)?;

        Some(Suggestion::new(address, suggested_domain))
    }

    /// Parse an email address into local and domain parts
    fn parse_email(&self, email: &str) -> Option<(String, String)> {
        // Fast path: find the @ symbol (should be only one)
        let at_index = email.find('@')?;

        // Basic validation: ensure there's content before and after @
        if at_index == 0 || at_index >= email.len() - 1 {
            return None;
        }

        // Check if there's another @ after this one
        if email[at_index + 1..].contains('@') {
            return None;
        }

        let address = email[..at_index].to_string();
        let domain_part = &email[at_index + 1..];

        // Ensure domain is not empty and has valid characters
        if domain_part.is_empty() || domain_part.starts_with('.') || domain_part.ends_with('.') {
            return None;
        }

        // Convert domain to lowercase only once
        let domain = domain_part.to_lowercase();

        Some((address, domain))
    }

    /// Suggest a domain based on the input domain
    fn suggest_domain(&self, domain: &str) -> Option<String> {
        let mut best_match: Option<&str> = None;
        let mut best_score = f64::INFINITY;
        const POPULARITY_BIAS_FACTOR: f64 = 0.05;

        // Check against popular domains with popularity bias
        for (index, candidate) in self.config.domains.iter().enumerate() {
            let distance = sift3::distance(domain, candidate);

            // Early exit for exact matches
            if distance == 0.0 {
                return Some(candidate.clone());
            }

            // Only consider domains within threshold
            if distance <= self.config.threshold {
                // Add popularity bias - earlier domains (more popular) get advantage
                let popularity_bias = (index as f64) * POPULARITY_BIAS_FACTOR;
                let score = distance + popularity_bias;

                if score < best_score {
                    best_score = score;
                    best_match = Some(candidate);
                }
            }
        }

        // Return best match if found
        if let Some(domain_ref) = best_match {
            return Some(domain_ref.to_string());
        }

        // If no direct domain match, try constructing from parts
        if let Some(constructed) = self.suggest_constructed_domain(domain) {
            let constructed_distance = sift3::distance(domain, &constructed);
            if constructed_distance <= self.config.threshold {
                return Some(constructed);
            }
        }

        None
    }

    /// Try to construct a domain suggestion from second-level and top-level domains
    fn suggest_constructed_domain(&self, domain: &str) -> Option<String> {
        // Fast path: find the first dot
        let dot_pos = domain.find('.')?;
        if dot_pos == 0 || dot_pos == domain.len() - 1 {
            return None;
        }

        let sld_part = &domain[..dot_pos];
        let tld_part = &domain[dot_pos + 1..];

        // Early exit for very short parts
        if sld_part.len() < 2 || tld_part.len() < 2 {
            return None;
        }

        let mut best_sld: Option<&str> = None;
        let mut best_sld_distance = f64::INFINITY;
        let mut best_tld: Option<&str> = None;
        let mut best_tld_distance = f64::INFINITY;

        // Find best second-level domain match
        for sld in &self.config.second_level_domains {
            let distance = sift3::distance(sld_part, sld);
            if distance < best_sld_distance && distance <= self.config.threshold {
                best_sld_distance = distance;
                best_sld = Some(sld);
            }
        }

        // Find best top-level domain match
        for tld in &self.config.top_level_domains {
            let distance = sift3::distance(tld_part, tld);
            if distance < best_tld_distance && distance <= self.config.threshold {
                best_tld_distance = distance;
                best_tld = Some(tld);
            }
        }

        // Construct domain if both parts found
        match (best_sld, best_tld) {
            (Some(sld), Some(tld)) => {
                // Use format! only when we have a match to avoid unnecessary allocations
                Some(format!("{sld}.{tld}"))
            }
            _ => None,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_email_parsing() {
        let checker = Mailidator::default();

        assert_eq!(
            checker.parse_email("test@example.com"),
            Some(("test".to_string(), "example.com".to_string()))
        );

        assert_eq!(checker.parse_email("invalid"), None);
        assert_eq!(checker.parse_email("@example.com"), None);
        assert_eq!(checker.parse_email("test@"), None);
    }

    #[test]
    fn test_gmail_suggestion() {
        let checker = Mailidator::default();
        let suggestion = checker.check("test@gmaik.com");

        assert!(suggestion.is_some());
        let suggestion = suggestion.unwrap();
        assert_eq!(suggestion.address(), "test");
        assert_eq!(suggestion.domain(), "gmail.com");
        assert_eq!(suggestion.full(), "test@gmail.com");
    }

    #[test]
    fn test_no_suggestion_for_valid_domain() {
        let checker = Mailidator::default();
        let suggestion = checker.check("test@gmail.com");

        assert!(suggestion.is_none());
    }

    #[test]
    fn test_yahoo_suggestion() {
        let checker = Mailidator::default();
        let suggestion = checker.check("test@yaho.com");

        assert!(suggestion.is_some());
        let suggestion = suggestion.unwrap();

        assert_eq!(suggestion.domain(), "yahoo.com");
    }

    #[test]
    fn test_email_suggestion_methods() {
        let suggestion = Suggestion::new("test".to_string(), "gmail.com".to_string());

        assert_eq!(suggestion.address(), "test");
        assert_eq!(suggestion.domain(), "gmail.com");
        assert_eq!(suggestion.full(), "test@gmail.com");
    }
}