backdisco 0.4.0

Discover backend origins from CDN frontends using LLM-assisted pattern analysis and brute force enumeration
use anyhow::Result;
use indicatif::ProgressBar;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::Semaphore;
use trust_dns_resolver::config::{ResolverConfig, ResolverOpts};
use trust_dns_resolver::AsyncResolver;

use crate::pattern::Candidate;
use crate::san::normalize_host;

type Resolver = AsyncResolver<trust_dns_resolver::name_server::GenericConnector<trust_dns_resolver::name_server::TokioRuntimeProvider>>;

#[derive(Debug, Clone)]
pub struct VerifyResult {
    pub hostname: String,
    pub dns_ip: Option<String>,
    pub https_status: Option<String>,
    pub http_status: Option<String>,
}

impl VerifyResult {
    pub fn is_live(&self) -> bool {
        self.dns_ip.is_some()
    }

    pub fn has_http_response(&self) -> bool {
        self.https_status.is_some() || self.http_status.is_some()
    }

    pub fn error_message(&self) -> String {
        if self.dns_ip.is_none() {
            "no DNS resolution".to_string()
        } else if !self.has_http_response() {
            "DNS resolved but HTTP/HTTPS checks failed".to_string()
        } else {
            "unknown error".to_string()
        }
    }
}

/// Detect wildcard DNS for base domains by resolving a random non-existent subdomain.
/// Returns a map of base_domain -> wildcard_ip for domains with wildcard DNS.
pub async fn detect_wildcard_dns(
    candidates: &[Candidate],
    timeout: Duration,
) -> HashMap<String, String> {
    let resolver: Arc<Resolver> = Arc::new(
        AsyncResolver::tokio(ResolverConfig::default(), ResolverOpts::default())
    );

    // Extract unique base domains from candidates
    let mut base_domains = HashSet::new();
    for candidate in candidates {
        // Extract base domain (last 2 labels)
        let labels: Vec<&str> = candidate.hostname.split('.').collect();
        if labels.len() >= 2 {
            let base = format!("{}.{}", labels[labels.len()-2], labels[labels.len()-1]);
            base_domains.insert(base);
        }
    }

    let mut wildcards = HashMap::new();

    for base in &base_domains {
        // Generate a random subdomain that shouldn't exist
        let random_label = format!("xz--wildcard-probe-{}", rand_label());
        let probe_host = format!("{}.{}", random_label, base);

        match tokio::time::timeout(timeout, resolve_dns(&probe_host, resolver.clone())).await {
            Ok(Ok(ip)) => {
                wildcards.insert(base.clone(), ip);
            }
            _ => {} // No resolution = no wildcard, good
        }
    }

    wildcards
}

/// Generate a pseudo-random label for wildcard probing (no extra deps needed)
fn rand_label() -> String {
    use std::time::SystemTime;
    let seed = SystemTime::now()
        .duration_since(SystemTime::UNIX_EPOCH)
        .unwrap_or_default()
        .subsec_nanos();
    format!("{:08x}", seed)
}

pub async fn verify_candidates(
    candidates: Vec<Candidate>,
    concurrency: usize,
    timeout: Duration,
    dns_only: bool,
    progress: Option<ProgressBar>,
) -> Vec<VerifyResult> {
    use std::sync::atomic::{AtomicU64, Ordering};
    
    let semaphore = Arc::new(Semaphore::new(concurrency));
    let resolver: Arc<Resolver> = Arc::new(
        AsyncResolver::tokio(ResolverConfig::default(), ResolverOpts::default())
    );
    let live_count = Arc::new(AtomicU64::new(0));

    let tasks: Vec<_> = candidates
        .into_iter()
        .map(|candidate| {
            let permit = semaphore.clone();
            let resolver = resolver.clone();
            let hostname = candidate.hostname.clone();
            let progress = progress.clone();
            let live_count = live_count.clone();
            tokio::spawn(async move {
                let _permit = permit.acquire().await.unwrap();
                let result = verify_single(hostname, resolver, timeout, dns_only).await;
                
                // Update progress if provided
                if let Some(pb) = progress {
                    pb.inc(1);
                    if result.is_live() {
                        let live = live_count.fetch_add(1, Ordering::Relaxed) + 1;
                        pb.set_message(format!(
                            "Verifying candidates | {} live found",
                            live
                        ));
                    } else {
                        let live = live_count.load(Ordering::Relaxed);
                        pb.set_message(format!(
                            "Verifying candidates | {} live found",
                            live
                        ));
                    }
                }
                
                result
            })
        })
        .collect();

    let mut results = Vec::new();
    for task in tasks {
        if let Ok(result) = task.await {
            results.push(result);
        }
    }

    results
}

async fn verify_single(
    candidate: String,
    resolver: Arc<Resolver>,
    timeout: Duration,
    dns_only: bool,
) -> VerifyResult {
    // Normalize the hostname (strip scheme, path, etc.)
    let (hostname, _port) = normalize_host(&candidate);

    // DNS resolution
    let dns_ip = match tokio::time::timeout(timeout, resolve_dns(&hostname, resolver.clone())).await {
        Ok(Ok(ip)) => Some(ip),
        Ok(Err(_)) => None,
        Err(_) => None, // Timeout
    };

    let mut result = VerifyResult {
        hostname: hostname.clone(),
        dns_ip: dns_ip.clone(),
        https_status: None,
        http_status: None,
    };

    // HTTP/HTTPS checks if DNS resolved and not dns_only
    if dns_ip.is_some() && !dns_only {
        let http_results = check_http_both(&hostname, timeout).await;
        result.https_status = http_results.https_status;
        result.http_status = http_results.http_status;
    }

    result
}

async fn resolve_dns(
    hostname: &str,
    resolver: Arc<Resolver>,
) -> Result<String> {
    use trust_dns_resolver::lookup_ip::LookupIp;
    let response: LookupIp = resolver.lookup_ip(hostname).await?;
    for ip in response.iter() {
        return Ok(ip.to_string());
    }
    anyhow::bail!("No IP addresses found")
}

struct HttpBothResult {
    https_status: Option<String>,
    http_status: Option<String>,
}

async fn check_http_both(hostname: &str, timeout: Duration) -> HttpBothResult {
    let client = reqwest::Client::builder()
        .timeout(timeout)
        .danger_accept_invalid_certs(true) // Common for backend origins
        .redirect(reqwest::redirect::Policy::none()) // Don't follow redirects to see actual response
        .build();

    let client = match client {
        Ok(c) => c,
        Err(_) => return HttpBothResult {
            https_status: None,
            http_status: None,
        },
    };

    // Check both HTTPS and HTTP in parallel
    let https_url = format!("https://{}", hostname);
    let http_url = format!("http://{}", hostname);

    let (https_result, http_result) = tokio::join!(
        check_single_protocol(&client, &https_url, timeout),
        check_single_protocol(&client, &http_url, timeout),
    );

    HttpBothResult {
        https_status: https_result,
        http_status: http_result,
    }
}

async fn check_single_protocol(client: &reqwest::Client, url: &str, timeout: Duration) -> Option<String> {
    match tokio::time::timeout(timeout, client.get(url).send()).await {
        Ok(Ok(resp)) => {
            // Any HTTP response is considered valid (success, redirect, client error, server error)
            Some(format!(
                "{} {}",
                resp.status().as_u16(),
                resp.status().canonical_reason().unwrap_or("Unknown")
            ))
        }
        Ok(Err(_)) => None, // Request error
        Err(_) => None,     // Timeout
    }
}