whoxydse 0.1.1

Discover related top-level domains using Whoxy API: historical WHOIS, reverse WHOIS, and DNS verification
use crate::api::WhoxyClient;
use crate::models::{DomainResult, PivotAttributes, PivotConfig};
use anyhow::Result;
use std::collections::HashSet;

// --- Fidelity (attribute vs domain match) ---

/// Known second-level TLD suffixes (e.g. co.uk, com.au) so we take the correct label.
const MULTI_PART_TLDS: &[&str] = &["uk", "au", "nz", "jp", "br", "za", "in", "kr", "sg", "my"];

/// Extract the main label from a domain (e.g. "string.com" -> "string", "example.co.uk" -> "example").
fn domain_base(domain: &str) -> String {
    let domain = domain.trim().to_lowercase();
    let parts: Vec<&str> = domain.split('.').collect();
    if parts.len() >= 3 && MULTI_PART_TLDS.contains(&parts[parts.len() - 1]) {
        parts[parts.len() - 3].to_string()
    } else if parts.len() >= 2 {
        parts[parts.len() - 2].to_string()
    } else if parts.is_empty() {
        String::new()
    } else {
        parts[0].to_string()
    }
}

/// True if the email is clearly tied to the target domain (e.g. bob@string.com for string.com).
fn email_matches_domain(email: &str, target_domain: &str) -> bool {
    let email = email.trim().to_lowercase();
    let target = target_domain.trim().to_lowercase();
    if let Some(at) = email.rfind('@') {
        let email_domain = email[at + 1..].trim();
        email_domain == target
    } else {
        false
    }
}

/// True if the company name is tied to the target domain (e.g. "String Inc" for string.com).
fn company_matches_domain(company: &str, target_domain: &str) -> bool {
    let base = domain_base(target_domain);
    if base.is_empty() {
        return false;
    }
    let company_norm = company
        .trim()
        .to_lowercase()
        .replace(|c: char| !c.is_alphanumeric(), " ");
    company_norm.contains(&base)
}

/// True if the name is tied to the target domain (e.g. "String" or "String LLC" for string.com).
fn name_matches_domain(name: &str, target_domain: &str) -> bool {
    let base = domain_base(target_domain);
    if base.is_empty() {
        return false;
    }
    let name_norm = name
        .trim()
        .to_lowercase()
        .replace(|c: char| !c.is_alphanumeric(), " ");
    name_norm.contains(&base) || name_norm == base
}

/// Returns true if we are pivoting on at least one attribute that does not match the target domain.
/// When true, results may have lower fidelity and the user should validate ownership.
pub fn has_low_fidelity_pivots(domain: &str, attributes: &PivotAttributes, config: &PivotConfig) -> bool {
    if domain.is_empty() {
        return false;
    }
    if config.use_name {
        for name in &attributes.names {
            if !name_matches_domain(name, domain) {
                return true;
            }
        }
    }
    if config.use_email {
        for email in &attributes.emails {
            if !email_matches_domain(email, domain) {
                return true;
            }
        }
    }
    if config.use_company {
        for company in &attributes.companies {
            if !company_matches_domain(company, domain) {
                return true;
            }
        }
    }
    false
}

// --- Pivot extraction and discovery ---

/// Extract pivotable attributes from historical WHOIS records
pub fn extract_pivot_attributes(records: &[crate::models::WhoisRecord]) -> PivotAttributes {
    let mut all_attrs = PivotAttributes::default();

    for record in records {
        let attrs = record.extract_attributes();
        all_attrs.merge(attrs);
    }

    all_attrs
}

/// Discover domains using reverse WHOIS lookups on pivot attributes.
/// Uses paginated mini-mode API (1000 results per page) to fetch all matching domains.
pub async fn discover_domains(
    client: &WhoxyClient,
    attributes: &PivotAttributes,
    config: &PivotConfig,
) -> Result<Vec<DomainResult>> {
    let mut all_domains = Vec::new();

    // Reverse WHOIS by name (all pages, mini mode)
    if config.use_name {
        for name in &attributes.names {
            eprintln!("Searching reverse WHOIS for name: {} (fetching all pages)...", name);
            match client.reverse_whois_all_pages("name", name).await {
                Ok(domains) => {
                    eprintln!("  Found {} domain(s) for name '{}'", domains.len(), name);
                    all_domains.extend(domains);
                }
                Err(e) => {
                    eprintln!("Warning: Failed to reverse WHOIS by name '{}': {}", name, e);
                }
            }
        }
    }

    // Reverse WHOIS by email (all pages, mini mode)
    if config.use_email {
        for email in &attributes.emails {
            eprintln!("Searching reverse WHOIS for email: {} (fetching all pages)...", email);
            match client.reverse_whois_all_pages("email", email).await {
                Ok(domains) => {
                    eprintln!("  Found {} domain(s) for email '{}'", domains.len(), email);
                    all_domains.extend(domains);
                }
                Err(e) => {
                    eprintln!("Warning: Failed to reverse WHOIS by email '{}': {}", email, e);
                }
            }
        }
    }

    // Reverse WHOIS by company (all pages, mini mode)
    if config.use_company {
        for company in &attributes.companies {
            eprintln!("Searching reverse WHOIS for company: {} (fetching all pages)...", company);
            match client.reverse_whois_all_pages("company", company).await {
                Ok(domains) => {
                    eprintln!("  Found {} domain(s) for company '{}'", domains.len(), company);
                    all_domains.extend(domains);
                }
                Err(e) => {
                    eprintln!(
                        "Warning: Failed to reverse WHOIS by company '{}': {}",
                        company, e
                    );
                }
            }
        }
    }

    Ok(all_domains)
}

/// De-duplicate domains (case-insensitive)
pub fn deduplicate_domains(domains: Vec<DomainResult>) -> Vec<DomainResult> {
    let mut seen = HashSet::new();
    let mut unique = Vec::new();

    for domain in domains {
        let domain_lower = domain.domain_name.to_lowercase();
        if seen.insert(domain_lower) {
            unique.push(domain);
        }
    }

    unique
}