structured-email-address 0.0.5

RFC 5321/5322/6531 email address parser, validator, and normalizer. Subaddress extraction, provider-aware normalization, PSL domain validation, anti-homoglyph protection.
Documentation
//! Post-parse validation: length limits, domain checks, TLD validation.

use crate::config::{Config, DomainCheck};
use crate::error::{Error, ErrorKind};
use crate::normalize::Normalized;
use crate::parser::Parsed;

/// Maximum local-part length in octets (RFC 5321 §4.5.3.1.1).
const MAX_LOCAL_PART_LEN: usize = 64;

/// Maximum total address length in octets (RFC 5321 §4.5.3.1.3).
/// This is the addr-spec (without display name), so local + "@" + domain.
const MAX_ADDRESS_LEN: usize = 254;

/// Maximum domain label length in octets (RFC 1035 §2.3.4).
const MAX_LABEL_LEN: usize = 63;

/// Validate a parsed and normalized email address.
pub(crate) fn validate(
    parsed: &Parsed<'_>,
    normalized: &Normalized,
    config: &Config,
) -> Result<(), Error> {
    let domain = &normalized.domain;

    // Length limits apply to the semantic addr-spec content (RFC 5321 §4.5.3.1),
    // with CFWS stripped (obs-forms), but before normalization (tag/dot stripping).
    let local = parsed.local_part_str();
    let domain_str = parsed.domain_str();

    // Length: local part (max 64 octets).
    if local.len() > MAX_LOCAL_PART_LEN {
        return Err(Error::new(
            ErrorKind::LocalPartTooLong { len: local.len() },
            parsed.local_part.start,
        ));
    }

    // Length: total address (max 254 octets).
    let total = local.len() + 1 + domain_str.len();
    if total > MAX_ADDRESS_LEN {
        return Err(Error::new(
            ErrorKind::AddressTooLong { len: total },
            parsed.local_part.start,
        ));
    }

    // Domain literals like [192.168.1.1] get special handling below.
    let is_domain_literal = domain_str.starts_with('[');

    // Domain must have at least one dot (unless configured otherwise).
    if config.require_tld_dot && !domain.contains('.') && !is_domain_literal {
        return Err(Error::new(ErrorKind::DomainNoDot, parsed.domain.start));
    }
    if !is_domain_literal {
        // Domain label length check only applies to hostnames, not domain literals
        // like [192.168.1.1] where splitting on '.' produces invalid labels.
        for label in domain.split('.') {
            if label.len() > MAX_LABEL_LEN {
                return Err(Error::new(
                    ErrorKind::DomainLabelTooLong {
                        label: label.to_string(),
                        len: label.len(),
                    },
                    parsed.domain.start,
                ));
            }
        }

        match config.domain_check {
            DomainCheck::Syntax => {}
            DomainCheck::Tld => validate_tld(domain, parsed.domain.start)?,
            DomainCheck::Psl => validate_psl(domain, parsed.domain.start)?,
        }
    }

    Ok(())
}

/// Basic TLD validation: check the last label is at least 2 chars and all-alpha.
fn validate_tld(domain: &str, pos: usize) -> Result<(), Error> {
    let tld = domain.rsplit('.').next().unwrap_or(domain);
    // Punycode TLDs start with xn-- and must have content after the prefix.
    if tld.starts_with("xn--") && tld.len() > 4 {
        return Ok(());
    }
    // TLD should be all-alpha and at least 2 chars.
    if tld.len() < 2 || !tld.chars().all(|c| c.is_ascii_alphabetic()) {
        return Err(Error::new(ErrorKind::UnknownTld(tld.to_string()), pos));
    }
    Ok(())
}

/// PSL-based domain validation (requires `psl` feature).
#[cfg(feature = "psl")]
fn validate_psl(domain: &str, pos: usize) -> Result<(), Error> {
    match psl::suffix(domain.as_bytes()) {
        Some(suffix) if suffix.is_known() => Ok(()),
        _ => {
            let tld = domain.rsplit('.').next().unwrap_or(domain);
            Err(Error::new(ErrorKind::UnknownTld(tld.to_string()), pos))
        }
    }
}

#[cfg(not(feature = "psl"))]
fn validate_psl(domain: &str, pos: usize) -> Result<(), Error> {
    // Fallback to basic TLD check when PSL feature is disabled.
    validate_tld(domain, pos)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn tld_valid() {
        assert!(validate_tld("example.com", 0).is_ok());
        assert!(validate_tld("example.co.uk", 0).is_ok());
        assert!(validate_tld("example.xn--p1ai", 0).is_ok()); // .рф in punycode
    }

    #[test]
    fn tld_invalid() {
        assert!(validate_tld("example.x", 0).is_err()); // single char
        assert!(validate_tld("example.123", 0).is_err()); // numeric
    }

    // ── Full validate() tests ──

    #[test]
    fn rejects_local_part_too_long() {
        let long_local = "a".repeat(65);
        let input = format!("{long_local}@example.com");
        let result: Result<crate::EmailAddress, _> = input.parse();
        assert!(matches!(
            result.unwrap_err().kind(),
            crate::ErrorKind::LocalPartTooLong { .. }
        ));
    }

    #[test]
    fn rejects_address_too_long() {
        let long_domain = format!("{}.com", "a".repeat(250));
        let input = format!("u@{long_domain}");
        let result: Result<crate::EmailAddress, _> = input.parse();
        let kind = result.unwrap_err().kind().clone();
        assert!(
            matches!(
                kind,
                crate::ErrorKind::AddressTooLong { .. }
                    | crate::ErrorKind::DomainLabelTooLong { .. }
                    | crate::ErrorKind::IdnaError(_)
            ),
            "expected length or IDNA error, got {kind:?}"
        );
    }

    #[test]
    fn rejects_domain_label_too_long() {
        let long_label = "a".repeat(64);
        let input = format!("user@{long_label}.com");
        let result: Result<crate::EmailAddress, _> = input.parse();
        let kind = result.unwrap_err().kind().clone();
        assert!(
            matches!(
                kind,
                crate::ErrorKind::DomainLabelTooLong { .. } | crate::ErrorKind::IdnaError(_)
            ),
            "expected label-too-long or IDNA error, got {kind:?}"
        );
    }

    #[test]
    fn domain_literal_skips_label_check() {
        let config = crate::Config::builder()
            .allow_domain_literal()
            .allow_single_label_domain()
            .build();
        let result = crate::EmailAddress::parse_with("user@[192.168.1.1]", &config);
        assert!(result.is_ok());
    }
}