whoxydse 0.1.1

Discover related top-level domains using Whoxy API: historical WHOIS, reverse WHOIS, and DNS verification
use serde::{Deserialize, Serialize};
use std::collections::HashSet;

/// Contact information from WHOIS records
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Contact {
    #[serde(default)]
    pub name: Option<String>,
    #[serde(default)]
    pub email: Option<String>,
    #[serde(default)]
    pub organization: Option<String>,
    #[serde(default)]
    pub company: Option<String>,
}

/// Historical WHOIS record
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct WhoisRecord {
    #[serde(default)]
    pub registrant: Option<Contact>,
    #[serde(default)]
    pub admin: Option<Contact>,
    #[serde(default)]
    pub tech: Option<Contact>,
    #[serde(default)]
    pub registrar: Option<String>,
    #[serde(default)]
    pub created_date: Option<String>,
    #[serde(default)]
    pub updated_date: Option<String>,
    #[serde(default)]
    pub expiry_date: Option<String>,
}

/// Response from WHOIS History API
#[derive(Debug, Deserialize)]
pub struct HistoryResponse {
    #[serde(default)]
    pub status: i32,
    #[serde(default)]
    pub status_reason: Option<String>,
    #[serde(default)]
    #[allow(dead_code)]
    pub total_records: Option<i32>,
    #[serde(default)]
    pub history: Vec<WhoisRecord>,
    #[serde(default)]
    pub error: Option<String>,
    #[serde(default)]
    pub error_code: Option<i32>,
}

/// Response from current WHOIS Lookup API
/// The API returns the WHOIS record fields directly at the top level
#[derive(Debug, Deserialize)]
pub struct WhoisResponse {
    #[serde(default)]
    pub status: i32,
    // WHOIS record fields (flattened from WhoisRecord)
    #[serde(default)]
    pub registrant: Option<Contact>,
    #[serde(default)]
    pub admin: Option<Contact>,
    #[serde(default)]
    pub tech: Option<Contact>,
    #[serde(default)]
    pub registrar: Option<String>,
    #[serde(default)]
    pub created_date: Option<String>,
    #[serde(default)]
    pub updated_date: Option<String>,
    #[serde(default)]
    pub expiry_date: Option<String>,
    #[serde(default)]
    pub error: Option<String>,
    #[serde(default)]
    pub error_code: Option<i32>,
}

impl WhoisResponse {
    /// Convert to WhoisRecord for attribute extraction
    pub fn to_record(&self) -> WhoisRecord {
        WhoisRecord {
            registrant: self.registrant.clone(),
            admin: self.admin.clone(),
            tech: self.tech.clone(),
            registrar: self.registrar.clone(),
            created_date: self.created_date.clone(),
            updated_date: self.updated_date.clone(),
            expiry_date: self.expiry_date.clone(),
        }
    }
}

/// Domain result from Reverse WHOIS API
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct DomainResult {
    #[serde(default)]
    pub domain_name: String,
    #[serde(default)]
    pub created_date: Option<String>,
    #[serde(default)]
    pub expiry_date: Option<String>,
}

/// Full domain object from search_result array (internal structure)
#[derive(Debug, Deserialize)]
pub struct SearchResultDomain {
    #[serde(default)]
    domain_name: String,
    #[serde(default)]
    create_date: Option<String>,
    #[serde(default)]
    expiry_date: Option<String>,
}

/// Response from Reverse WHOIS API
#[derive(Debug, Deserialize)]
pub struct ReverseWhoisResponse {
    #[serde(default)]
    pub status: i32,
    #[serde(default)]
    pub status_reason: Option<String>,
    #[serde(default)]
    pub total_domains: Option<i32>,
    #[serde(default)]
    pub domains: Vec<DomainResult>,
    #[serde(default)]
    pub search_result: Option<Vec<SearchResultDomain>>,
    #[serde(default)]
    pub error: Option<String>,
    #[serde(default)]
    pub error_code: Option<i32>,
    #[serde(default)]
    pub page: Option<i32>,
    #[serde(default)]
    pub current_page: Option<i32>,
    #[serde(default)]
    pub per_page: Option<i32>,
}

impl ReverseWhoisResponse {
    /// Post-process the response to extract domains from search_result if domains is empty
    pub fn post_process(&mut self) {
        // If domains is empty but search_result has data, extract domains from search_result
        if self.domains.is_empty() {
            if let Some(ref search_results) = self.search_result {
                self.domains = search_results
                    .iter()
                    .map(|sr| DomainResult {
                        domain_name: sr.domain_name.clone(),
                        created_date: sr.create_date.clone(),
                        expiry_date: sr.expiry_date.clone(),
                    })
                    .collect();
            }
        }
    }
}

/// Pivotable attributes extracted from WHOIS records
#[derive(Debug, Clone, Default)]
pub struct PivotAttributes {
    pub names: HashSet<String>,
    pub emails: HashSet<String>,
    pub companies: HashSet<String>,
}

/// Configuration for pivot attribute usage
#[derive(Debug, Clone)]
pub struct PivotConfig {
    pub use_name: bool,
    pub use_email: bool,
    pub use_company: bool,
}

impl Default for PivotConfig {
    fn default() -> Self {
        Self {
            use_name: true,
            use_email: true,
            use_company: true,
        }
    }
}

/// Statistics for the discovery process
#[derive(Debug, Default)]
pub struct DiscoveryStats {
    pub total_domains_found: usize,
    pub after_deduplication: usize,
    pub after_dns_verification: usize,
    pub domains_by_attribute: std::collections::HashMap<String, usize>,
}

impl Contact {
    /// Extract normalized name if present
    pub fn normalized_name(&self) -> Option<String> {
        self.name.as_ref().map(|n| n.trim().to_string())
    }

    /// Extract normalized email if present
    pub fn normalized_email(&self) -> Option<String> {
        self.email.as_ref().map(|e| e.trim().to_lowercase())
    }

    /// Extract normalized company/organization if present
    pub fn normalized_company(&self) -> Option<String> {
        self.organization
            .as_ref()
            .or_else(|| self.company.as_ref())
            .map(|c| c.trim().to_string())
    }
}

impl WhoisRecord {
    /// Extract all pivotable attributes from this record
    pub fn extract_attributes(&self) -> PivotAttributes {
        let mut attrs = PivotAttributes::default();

        // Extract from registrant
        if let Some(ref reg) = self.registrant {
            if let Some(name) = reg.normalized_name() {
                attrs.add_name(name);
            }
            if let Some(email) = reg.normalized_email() {
                attrs.add_email(email);
            }
            if let Some(company) = reg.normalized_company() {
                attrs.add_company(company);
            }
        }

        // Extract from admin contact
        if let Some(ref admin) = self.admin {
            if let Some(name) = admin.normalized_name() {
                attrs.add_name(name);
            }
            if let Some(email) = admin.normalized_email() {
                attrs.add_email(email);
            }
            if let Some(company) = admin.normalized_company() {
                attrs.add_company(company);
            }
        }

        // Extract from tech contact
        if let Some(ref tech) = self.tech {
            if let Some(name) = tech.normalized_name() {
                attrs.add_name(name);
            }
            if let Some(email) = tech.normalized_email() {
                attrs.add_email(email);
            }
            if let Some(company) = tech.normalized_company() {
                attrs.add_company(company);
            }
        }

        attrs
    }
}

impl PivotAttributes {
    /// Merge another set of attributes into this one
    pub fn merge(&mut self, other: PivotAttributes) {
        // Filter out redacted attributes when merging
        self.names.extend(other.names.into_iter().filter(|n| !is_redacted(n)));
        self.emails.extend(other.emails.into_iter().filter(|e| !is_redacted(e)));
        self.companies.extend(other.companies.into_iter().filter(|c| !is_redacted(c)));
    }

    /// Add a name attribute (with redaction check)
    pub fn add_name(&mut self, name: String) {
        if !name.is_empty() && !is_redacted(&name) {
            self.names.insert(name);
        }
    }

    /// Add an email attribute (with redaction check)
    pub fn add_email(&mut self, email: String) {
        if !email.is_empty() && !is_redacted(&email) {
            self.emails.insert(email);
        }
    }

    /// Add a company attribute (with redaction check)
    pub fn add_company(&mut self, company: String) {
        if !company.is_empty() && !is_redacted(&company) {
            self.companies.insert(company);
        }
    }
}

/// Check if a string contains "redacted" (case-insensitive)
fn is_redacted(s: &str) -> bool {
    s.to_lowercase().contains("redacted")
}