rsubdomain 1.2.14

A high-performance subdomain brute-force tool written in Rust
Documentation
use std::collections::HashMap;
use std::net::IpAddr;

#[cfg(feature = "dns-resolver")]
use crate::dns_resolver::{DnsRecord, DnsResolveResult};
use crate::handle::{AggregatedDiscoveredDomain, CdnConfidence, CdnEvidence};

use super::cdn::match_cdn_provider_by_candidate;
use super::ip_range::match_ip_to_cdn_provider;

#[derive(Debug, Clone)]
pub(crate) struct CdnMetadata {
    pub provider: String,
    pub confidence: CdnConfidence,
    pub evidence: Vec<CdnEvidence>,
}

#[derive(Debug, Clone)]
struct ProviderEvidence {
    provider: String,
    evidence: CdnEvidence,
}

pub(crate) fn analyze_cdn_metadata(
    entry: &AggregatedDiscoveredDomain,
    #[cfg(feature = "dns-resolver")] dns_records: Option<&DnsResolveResult>,
) -> Option<CdnMetadata> {
    let mut evidence = Vec::new();

    for (source, value) in collect_name_candidates(entry) {
        if let Some(matched) = match_cdn_provider_by_candidate(value.as_str()) {
            evidence.push(ProviderEvidence {
                provider: matched.provider,
                evidence: CdnEvidence {
                    source,
                    value,
                    detail: format!("{} match {}", matched.match_kind.as_str(), matched.pattern),
                },
            });
        }
    }

    #[cfg(feature = "dns-resolver")]
    if let Some(records) = dns_records {
        for (source, value) in collect_dns_name_candidates(records) {
            if let Some(matched) = match_cdn_provider_by_candidate(value.as_str()) {
                evidence.push(ProviderEvidence {
                    provider: matched.provider,
                    evidence: CdnEvidence {
                        source,
                        value,
                        detail: format!(
                            "{} match {}",
                            matched.match_kind.as_str(),
                            matched.pattern
                        ),
                    },
                });
            }
        }
    }

    for ip in collect_ip_candidates(entry) {
        if let Some(matched) = match_ip_to_cdn_provider(ip) {
            evidence.push(ProviderEvidence {
                provider: matched.provider,
                evidence: CdnEvidence {
                    source: "IP_RANGE".to_string(),
                    value: ip.to_string(),
                    detail: format!("matched {}", matched.cidr),
                },
            });
        }
    }

    if evidence.is_empty() {
        return None;
    }

    let provider = choose_provider(&evidence)?;
    let evidence = evidence
        .into_iter()
        .filter(|item| item.provider == provider)
        .map(|item| item.evidence)
        .collect::<Vec<_>>();
    let confidence = calculate_confidence(&evidence);

    Some(CdnMetadata {
        provider,
        confidence,
        evidence,
    })
}

fn choose_provider(evidence: &[ProviderEvidence]) -> Option<String> {
    let mut counts = HashMap::new();

    for item in evidence {
        *counts.entry(item.provider.clone()).or_insert(0usize) += confidence_weight(&item.evidence);
    }

    counts
        .into_iter()
        .max_by(|left, right| left.1.cmp(&right.1).then_with(|| left.0.cmp(&right.0)))
        .map(|(provider, _)| provider)
}

fn calculate_confidence(evidence: &[CdnEvidence]) -> CdnConfidence {
    let has_name_high_signal = evidence.iter().any(|item| {
        matches!(item.source.as_str(), "CNAME" | "NS") && item.detail.starts_with("suffix match")
    });
    let has_ptr = evidence.iter().any(|item| item.source == "PTR");
    let has_ip_range = evidence.iter().any(|item| item.source == "IP_RANGE");

    if has_name_high_signal || (has_ptr && has_ip_range) {
        return CdnConfidence::High;
    }

    if has_ptr || has_ip_range {
        return CdnConfidence::Medium;
    }

    CdnConfidence::Low
}

fn collect_name_candidates(entry: &AggregatedDiscoveredDomain) -> Vec<(String, String)> {
    entry
        .records
        .iter()
        .filter(|record| matches!(record.record_type.as_str(), "CNAME" | "NS"))
        .flat_map(|record| {
            record
                .values
                .iter()
                .cloned()
                .map(|value| (record.record_type.clone(), value))
                .collect::<Vec<_>>()
        })
        .collect()
}

#[cfg(feature = "dns-resolver")]
fn collect_dns_name_candidates(records: &DnsResolveResult) -> Vec<(String, String)> {
    records
        .records
        .iter()
        .flat_map(|(record_type, values)| {
            values
                .iter()
                .filter_map(|record| match record {
                    DnsRecord::CNAME(value) | DnsRecord::NS(value) | DnsRecord::PTR(value) => {
                        Some((record_type.clone(), value.clone()))
                    }
                    _ => None,
                })
                .collect::<Vec<_>>()
        })
        .collect()
}

fn collect_ip_candidates(entry: &AggregatedDiscoveredDomain) -> Vec<IpAddr> {
    entry
        .records
        .iter()
        .filter(|record| matches!(record.record_type.as_str(), "A" | "AAAA"))
        .flat_map(|record| {
            record
                .values
                .iter()
                .filter_map(|value| value.parse::<IpAddr>().ok())
                .collect::<Vec<_>>()
        })
        .collect()
}

fn confidence_weight(evidence: &CdnEvidence) -> usize {
    match evidence.source.as_str() {
        "CNAME" | "NS" => 3,
        "PTR" => 2,
        "IP_RANGE" => 2,
        _ => 1,
    }
}

#[cfg(test)]
mod tests {
    use crate::handle::{AggregatedDiscoveredDomain, AggregatedRecordValues, CdnConfidence};

    use super::analyze_cdn_metadata;

    #[test]
    fn cname_suffix_match_is_high_confidence() {
        let entry = AggregatedDiscoveredDomain {
            domain: "cdn.example.com".to_string(),
            records: vec![AggregatedRecordValues {
                record_type: "CNAME".to_string(),
                values: vec!["foo.cloudflare.net".to_string()],
            }],
            has_cdn: false,
            possible_cdn: false,
            cdn_provider: None,
            cdn_confidence: None,
            cdn_evidence: Vec::new(),
            cdn_signals: Vec::new(),
            raw_record_count: 1,
            first_seen: 1,
            last_seen: 1,
        };

        let metadata = analyze_cdn_metadata(
            &entry,
            #[cfg(feature = "dns-resolver")]
            None,
        )
        .unwrap();
        assert_eq!(metadata.provider, "Cloudflare");
        assert_eq!(metadata.confidence, CdnConfidence::High);
    }
}