synapse-waf 0.9.1

//! Integration tests for crawler detection and DNS verification.
//!
//! Tests cover:
//! 1. Crawler spoofing detection (UA vs DNS mismatch)
//! 2. DNS verification with caching and TTL
//! 3. Bad bot blocking policy enforcement
//! 4. Bad bot severity levels

use std::net::{IpAddr, Ipv4Addr};
use std::sync::atomic::{AtomicU32, AtomicUsize, Ordering};
use std::sync::Arc;
use synapse_pingora::crawler::{
    BadBotSeverity, CrawlerConfig, CrawlerDetector, DnsFailurePolicy, VerificationMethod,
};
use tokio::sync::Mutex;

// ============================================================================
// Mock DNS Resolver for Testing
// ============================================================================

/// Mock DNS resolver that simulates DNS behavior for testing.
/// Tracks call counts and allows configuration of responses.
struct MockDnsResolver {
    /// Number of reverse lookups performed
    reverse_lookups: Arc<AtomicUsize>,
    /// Number of forward lookups performed
    forward_lookups: Arc<AtomicUsize>,
    /// Simulated reverse DNS results: IP -> hostname
    reverse_results: Arc<Mutex<std::collections::HashMap<String, Option<String>>>>,
    /// Simulated forward DNS results: hostname -> IPs
    forward_results: Arc<Mutex<std::collections::HashMap<String, Vec<IpAddr>>>>,
    /// Simulated DNS failures: hostname/IP that should fail
    dns_failures: Arc<Mutex<std::collections::HashSet<String>>>,
    /// Counter to track DNS timeouts triggered
    dns_timeouts: Arc<AtomicU32>,
}

impl MockDnsResolver {
    fn new() -> Self {
        Self {
            reverse_lookups: Arc::new(AtomicUsize::new(0)),
            forward_lookups: Arc::new(AtomicUsize::new(0)),
            reverse_results: Arc::new(Mutex::new(std::collections::HashMap::new())),
            forward_results: Arc::new(Mutex::new(std::collections::HashMap::new())),
            dns_failures: Arc::new(Mutex::new(std::collections::HashSet::new())),
            dns_timeouts: Arc::new(AtomicU32::new(0)),
        }
    }

    /// Configure a reverse DNS result
    async fn set_reverse_result(&self, ip: IpAddr, hostname: Option<String>) {
        let mut results = self.reverse_results.lock().await;
        results.insert(ip.to_string(), hostname);
    }

    /// Configure a forward DNS result
    async fn set_forward_result(&self, hostname: String, ips: Vec<IpAddr>) {
        let mut results = self.forward_results.lock().await;
        results.insert(hostname, ips);
    }

    /// Mark a hostname/IP as failing DNS lookup
    async fn set_dns_failure(&self, target: String) {
        let mut failures = self.dns_failures.lock().await;
        failures.insert(target);
    }

    /// Get the number of reverse lookups performed
    fn reverse_lookup_count(&self) -> usize {
        self.reverse_lookups.load(Ordering::SeqCst)
    }

    /// Get the number of forward lookups performed
    fn forward_lookup_count(&self) -> usize {
        self.forward_lookups.load(Ordering::SeqCst)
    }

    /// Get the number of DNS timeouts triggered
    fn timeout_count(&self) -> u32 {
        self.dns_timeouts.load(Ordering::SeqCst)
    }

    /// Simulate reverse DNS lookup
    async fn reverse_lookup_mock(&self, ip: IpAddr) -> Result<Option<String>, String> {
        self.reverse_lookups.fetch_add(1, Ordering::SeqCst);
        let results = self.reverse_results.lock().await;
        if let Some(result) = results.get(&ip.to_string()) {
            Ok(result.clone())
        } else {
            Ok(None)
        }
    }

    /// Simulate forward DNS lookup
    async fn forward_lookup_mock(&self, hostname: &str) -> Result<Vec<IpAddr>, String> {
        self.forward_lookups.fetch_add(1, Ordering::SeqCst);

        // Check if this hostname should fail
        let failures = self.dns_failures.lock().await;
        if failures.contains(hostname) {
            self.dns_timeouts.fetch_add(1, Ordering::SeqCst);
            return Err("DNS Timeout".to_string());
        }
        drop(failures);

        let results = self.forward_results.lock().await;
        if let Some(ips) = results.get(hostname) {
            Ok(ips.clone())
        } else {
            Ok(Vec::new())
        }
    }

    /// Simulate IP verification (reverse + forward lookup with round-trip check)
    async fn verify_ip_mock(&self, ip: IpAddr) -> Result<(bool, Option<String>), String> {
        // Step 1: Reverse lookup
        let hostname = match self.reverse_lookup_mock(ip).await? {
            Some(h) => h,
            None => return Ok((false, None)),
        };

        // Step 2: Forward lookup
        let resolved_ips = self.forward_lookup_mock(&hostname).await?;

        // Step 3: Verify IP is in resolved IPs
        let verified = resolved_ips.contains(&ip);

        Ok((verified, Some(hostname)))
    }
}

// ============================================================================
// Test 1: Crawler Spoofing Detection (UA vs DNS mismatch)
// ============================================================================

#[tokio::test]
async fn test_crawler_spoofing_ua_dns_mismatch() {
    // Setup: UA claims Googlebot but reverse DNS is from different domain
    let mut config = CrawlerConfig::default();
    config.verify_legitimate_crawlers = true;
    config.dns_failure_policy = DnsFailurePolicy::ApplyRiskPenalty;

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    // IP that claims to be Googlebot but DNS doesn't match
    let spoofed_ip = IpAddr::V4(Ipv4Addr::new(192, 168, 1, 1));
    let googlebot_ua = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";

    // Verify the UA is recognized as Googlebot
    let result = detector.verify(googlebot_ua, spoofed_ip).await;

    assert!(result.is_crawler, "Should detect as crawler");
    assert_eq!(
        result.crawler_name,
        Some("Googlebot".to_string()),
        "Should identify as Googlebot"
    );
    assert!(result.user_agent_match, "Should match UA pattern");

    // The verification result should be suspicious due to DNS mismatch
    // (real Googlebot uses reverse DNS matching google.com)
    assert!(
        result.suspicious || !result.verified,
        "Should be suspicious or unverified due to DNS mismatch"
    );

    println!("Spoofing test result: {:?}", result);
}

// ============================================================================
// Test 2: DNS Verification - Reverse and Forward Lookup Round-Trip
// ============================================================================

#[tokio::test]
async fn test_dns_verification_reverse_forward_roundtrip() {
    // Setup: Configure mock resolver
    let mock_resolver = MockDnsResolver::new();

    // Configure successful DNS verification scenario:
    // IP 192.0.2.1 -> reverse lookup -> googlebot.example.com
    // googlebot.example.com -> forward lookup -> 192.0.2.1 (round-trip succeeds)
    let legitimate_ip = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 1));
    let hostname = "googlebot.example.com".to_string();

    mock_resolver
        .set_reverse_result(legitimate_ip, Some(hostname.clone()))
        .await;
    mock_resolver
        .set_forward_result(hostname.clone(), vec![legitimate_ip])
        .await;

    // Test reverse lookup
    let reverse_result = mock_resolver.reverse_lookup_mock(legitimate_ip).await;
    assert!(reverse_result.is_ok(), "Reverse lookup should succeed");
    assert_eq!(
        reverse_result.unwrap(),
        Some(hostname.clone()),
        "Should return correct hostname"
    );
    assert_eq!(
        mock_resolver.reverse_lookup_count(),
        1,
        "Should have performed 1 reverse lookup"
    );

    // Test forward lookup
    let forward_result = mock_resolver.forward_lookup_mock(&hostname.clone()).await;
    assert!(forward_result.is_ok(), "Forward lookup should succeed");
    assert!(
        forward_result.unwrap().contains(&legitimate_ip),
        "Forward lookup should return original IP"
    );
    assert_eq!(
        mock_resolver.forward_lookup_count(),
        1,
        "Should have performed 1 forward lookup"
    );

    // Test IP verification (round-trip)
    let verify_result = mock_resolver.verify_ip_mock(legitimate_ip).await;
    assert!(verify_result.is_ok(), "Verification should succeed");
    let (verified, resolved_hostname) = verify_result.unwrap();
    assert!(verified, "Round-trip verification should succeed");
    assert_eq!(
        resolved_hostname,
        Some(hostname.clone()),
        "Should return correct hostname"
    );
}

// ============================================================================
// Test 3: DNS Cache Behavior and TTL
// ============================================================================

#[tokio::test]
async fn test_dns_cache_ttl_behavior() {
    // Setup: Configure short TTL for testing
    let mut config = CrawlerConfig::default();
    config.dns_cache_ttl_secs = 1; // 1 second TTL for testing
    config.verify_legitimate_crawlers = true;
    config.dns_failure_policy = DnsFailurePolicy::ApplyRiskPenalty;

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    let googlebot_ip = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 100));
    let googlebot_ua = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";

    // First verification - should perform DNS lookup
    let initial_stats = detector.stats();
    let first_result = detector.verify(googlebot_ua, googlebot_ip).await;
    let stats_after_first = detector.stats();

    println!("Stats after first verify: {:?}", stats_after_first);

    // Second verification with same UA and IP - should hit cache
    let second_result = detector.verify(googlebot_ua, googlebot_ip).await;
    let stats_after_second = detector.stats();

    println!("Stats after second verify: {:?}", stats_after_second);

    // Cache hit should have occurred
    assert!(
        stats_after_second.cache_hits > initial_stats.cache_hits,
        "Should have cache hit on second request"
    );

    // Results should be consistent
    assert_eq!(
        first_result.is_crawler, second_result.is_crawler,
        "Cache should return same crawler status"
    );
    assert_eq!(
        first_result.crawler_name, second_result.crawler_name,
        "Cache should return same crawler name"
    );

    // Wait for TTL to expire
    tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;

    // After TTL expiration, new verification should miss cache
    let _third_result = detector.verify(googlebot_ua, googlebot_ip).await;
    let stats_after_third = detector.stats();

    println!(
        "Stats after third verify (post-TTL): {:?}",
        stats_after_third
    );

    // The cache miss count should increase on the next request
    // (TTL expiration will cause the next lookup to be a cache miss)
    assert!(
        stats_after_third.cache_misses >= stats_after_second.cache_misses,
        "Should eventually have cache misses after TTL expiration"
    );
}

// ============================================================================
// Test 4: Bad Bot Blocking Policy Enforcement
// ============================================================================

#[tokio::test]
async fn test_bad_bot_blocking_dns_failure_policy_block() {
    // Setup: Configure to BLOCK on DNS failure
    let mut config = CrawlerConfig::default();
    config.block_bad_bots = true;
    config.verify_legitimate_crawlers = true;
    config.dns_failure_policy = DnsFailurePolicy::Block; // Block on DNS failure

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    // SQLMap is a known bad bot with HIGH severity
    let bad_bot_ua = "sqlmap/1.0";
    let client_ip = IpAddr::V4(Ipv4Addr::new(203, 0, 113, 1));

    let result = detector.verify(bad_bot_ua, client_ip).await;

    // Bad bot should be detected
    assert!(result.suspicious, "Bad bot should be marked suspicious");
    assert_eq!(
        result.bad_bot_match,
        Some("SQLMap".to_string()),
        "Should detect SQLMap"
    );
    assert_eq!(
        result.bad_bot_severity,
        Some(BadBotSeverity::High),
        "SQLMap should have HIGH severity"
    );

    // Stats should reflect bad bot detection
    let stats = detector.stats();
    assert!(stats.bad_bots > 0, "Stats should track bad bot detection");

    println!("Bad bot detection result: {:?}", result);
    println!("Detector stats: {:?}", stats);
}

#[tokio::test]
async fn test_dns_failure_policy_block() {
    // Setup: Configure to BLOCK on DNS failure
    let mut config = CrawlerConfig::default();
    config.verify_legitimate_crawlers = true;
    config.dns_failure_policy = DnsFailurePolicy::Block;
    config.dns_timeout_ms = 100; // Very short timeout to trigger failures

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    let googlebot_ua = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
    let client_ip = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 1));

    let result = detector.verify(googlebot_ua, client_ip).await;

    // When DNS verification fails and policy is Block:
    // - Result should be marked suspicious for blocking
    // - OR result should be unverified if DNS times out
    assert!(
        result.suspicious || !result.verified,
        "Should be suspicious or unverified when DNS fails with Block policy"
    );

    println!("DNS failure block policy result: {:?}", result);
    println!("Suspicion reasons: {:?}", result.suspicion_reasons);
}

#[tokio::test]
async fn test_dns_failure_policy_apply_risk_penalty() {
    // Setup: Configure to APPLY RISK PENALTY on DNS failure (default)
    let mut config = CrawlerConfig::default();
    config.verify_legitimate_crawlers = true;
    config.dns_failure_policy = DnsFailurePolicy::ApplyRiskPenalty;
    config.dns_failure_risk_penalty = 75;

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    let googlebot_ua = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
    let client_ip = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 1));

    let result = detector.verify(googlebot_ua, client_ip).await;

    // When DNS verification fails and policy is ApplyRiskPenalty:
    // - Result should not necessarily be suspicious (may allow through)
    // - But should have a DNS failure penalty applied
    assert!(
        result.dns_failure_penalty > 0
            || result.suspicion_reasons.iter().any(|r| r.contains("DNS")),
        "Should apply risk penalty or include DNS-related suspicion reason"
    );

    println!("DNS failure risk penalty result: {:?}", result);
}

#[tokio::test]
async fn test_dns_failure_policy_allow() {
    // Setup: Configure to ALLOW on DNS failure (fail-open)
    let mut config = CrawlerConfig::default();
    config.verify_legitimate_crawlers = true;
    config.dns_failure_policy = DnsFailurePolicy::Allow;

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    let googlebot_ua = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
    let client_ip = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 1));

    let result = detector.verify(googlebot_ua, client_ip).await;

    // When DNS verification fails and policy is Allow:
    // - Result should generally allow the request through
    // - May be unverified but not necessarily suspicious
    assert!(
        result.is_crawler,
        "Should still recognize as crawler with Allow policy"
    );

    println!("DNS failure allow policy result: {:?}", result);
}

// ============================================================================
// Test 5: Bad Bot Severity Levels
// ============================================================================

#[tokio::test]
async fn test_bad_bot_severity_known_crawler() {
    // Test: Known crawler like Googlebot should have LOW severity if detected as bot
    let mut config = CrawlerConfig::default();
    config.block_bad_bots = true;
    config.verify_legitimate_crawlers = false; // Skip DNS verification for this test

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    let googlebot_ua = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
    let client_ip = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 1));

    let result = detector.verify(googlebot_ua, client_ip).await;

    // Googlebot should be detected as legitimate crawler, not bad bot
    assert!(result.is_crawler, "Should detect as legitimate crawler");
    assert_eq!(
        result.crawler_name,
        Some("Googlebot".to_string()),
        "Should identify as Googlebot"
    );
    assert!(
        result.bad_bot_match.is_none(),
        "Should NOT match as bad bot"
    );

    println!("Known crawler result: {:?}", result);
}

#[tokio::test]
async fn test_bad_bot_severity_high() {
    // Test: High severity bad bots like SQLMap
    let mut config = CrawlerConfig::default();
    config.block_bad_bots = true;

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    let sqlmap_ua = "sqlmap/1.0";
    let client_ip = IpAddr::V4(Ipv4Addr::new(203, 0, 113, 1));

    let result = detector.verify(sqlmap_ua, client_ip).await;

    assert!(result.suspicious, "SQLMap should be suspicious");
    assert_eq!(
        result.bad_bot_severity,
        Some(BadBotSeverity::High),
        "SQLMap should have HIGH severity"
    );

    println!("HIGH severity bad bot result: {:?}", result);
}

#[tokio::test]
async fn test_bad_bot_severity_medium() {
    // Test: Medium severity bad bots like Burp Suite
    let mut config = CrawlerConfig::default();
    config.block_bad_bots = true;

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    let burp_ua = "Burp Suite";
    let client_ip = IpAddr::V4(Ipv4Addr::new(203, 0, 113, 2));

    let result = detector.verify(burp_ua, client_ip).await;

    assert!(result.suspicious, "Burp should be suspicious");
    assert_eq!(
        result.bad_bot_severity,
        Some(BadBotSeverity::Medium),
        "Burp Suite should have MEDIUM severity"
    );

    println!("MEDIUM severity bad bot result: {:?}", result);
}

#[tokio::test]
async fn test_bad_bot_severity_low() {
    // Test: Low severity bad bots like Python scraper
    let mut config = CrawlerConfig::default();
    config.block_bad_bots = true;

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    let python_ua = "python-urllib";
    let client_ip = IpAddr::V4(Ipv4Addr::new(203, 0, 113, 3));

    let result = detector.verify(python_ua, client_ip).await;

    assert!(result.suspicious, "Python scraper should be suspicious");
    assert_eq!(
        result.bad_bot_severity,
        Some(BadBotSeverity::Low),
        "Python urllib should have LOW severity"
    );

    println!("LOW severity bad bot result: {:?}", result);
}

// ============================================================================
// Test 6: Legitimate Crawler Detection (no bad bot flag)
// ============================================================================

#[tokio::test]
async fn test_legitimate_crawler_no_bad_bot_match() {
    // Test: Legitimate crawlers should not match bad bot signatures
    let mut config = CrawlerConfig::default();
    config.block_bad_bots = true;
    config.verify_legitimate_crawlers = false; // Skip DNS for this test

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    let legitimate_crawlers = vec![
        (
            "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
            "Googlebot",
        ),
        (
            "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
            "Bingbot",
        ),
        (
            "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
            "Baiduspider",
        ),
        (
            "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
            "YandexBot",
        ),
    ];

    for (ua, expected_name) in legitimate_crawlers {
        let client_ip = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 100));
        let result = detector.verify(ua, client_ip).await;

        assert!(
            result.is_crawler,
            "Should detect {} as crawler",
            expected_name
        );
        assert_eq!(
            result.crawler_name,
            Some(expected_name.to_string()),
            "Should identify as {}",
            expected_name
        );
        assert!(
            result.bad_bot_match.is_none(),
            "Should NOT match as bad bot: {}",
            expected_name
        );

        println!(
            "Legitimate crawler {}: verified={}, suspicious={}",
            expected_name, result.verified, result.suspicious
        );
    }
}

// ============================================================================
// Test 7: Cache Hit Tracking
// ============================================================================

#[tokio::test]
async fn test_cache_hits_and_misses() {
    let mut config = CrawlerConfig::default();
    config.verify_legitimate_crawlers = false;

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    let ua = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
    let ip = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 1));

    // Initial stats
    let initial_stats = detector.stats();
    assert_eq!(initial_stats.cache_hits, 0);
    assert_eq!(initial_stats.cache_misses, 0);

    // First request - should be a cache miss
    detector.verify(ua, ip).await;
    let stats_after_first = detector.stats();
    assert_eq!(
        stats_after_first.cache_misses, 1,
        "First request should be cache miss"
    );

    // Second request - should be a cache hit
    detector.verify(ua, ip).await;
    let stats_after_second = detector.stats();
    assert_eq!(
        stats_after_second.cache_hits, 1,
        "Second request should be cache hit"
    );
    assert_eq!(
        stats_after_second.cache_misses, 1,
        "Cache miss count should not increase"
    );

    // Third request - should be another cache hit
    detector.verify(ua, ip).await;
    let stats_after_third = detector.stats();
    assert_eq!(
        stats_after_third.cache_hits, 2,
        "Third request should be cache hit"
    );
    assert_eq!(
        stats_after_third.cache_misses, 1,
        "Cache miss count should remain unchanged"
    );

    println!("Cache statistics: {:?}", stats_after_third);
}

// ============================================================================
// Test 8: User-Agent Length Validation (ReDoS protection)
// ============================================================================

#[tokio::test]
async fn test_oversized_user_agent_rejection() {
    let mut config = CrawlerConfig::default();
    config.verify_legitimate_crawlers = false;

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    // Create a UA that exceeds MAX_USER_AGENT_LENGTH (512 bytes)
    let oversized_ua = "a".repeat(513);
    let client_ip = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 1));

    let result = detector.verify(&oversized_ua, client_ip).await;

    assert!(result.input_rejected, "Oversized UA should be rejected");
    assert!(result.suspicious, "Oversized UA should be suspicious");
    assert!(
        result
            .suspicion_reasons
            .iter()
            .any(|r| r.contains("exceeds maximum")),
        "Should include rejection reason"
    );

    let stats = detector.stats();
    assert!(
        stats.input_rejected > 0,
        "Stats should track rejected input"
    );

    println!("Oversized UA result: {:?}", result);
}

// ============================================================================
// Test 9: Stats Distribution
// ============================================================================

#[tokio::test]
async fn test_crawler_stats_distribution() {
    let mut config = CrawlerConfig::default();
    config.verify_legitimate_crawlers = false;

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    // Generate multiple requests from different crawlers
    // Use same IP for each to bypass caching
    let test_cases = vec![
        (
            "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
            "Googlebot",
        ),
        (
            "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
            "Bingbot",
        ),
        (
            "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
            "Googlebot",
        ),
        (
            "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
            "Googlebot",
        ),
    ];

    for (idx, (ua, _name)) in test_cases.iter().enumerate() {
        // Use different IPs to avoid cache hits (cache key includes IP)
        let ip = IpAddr::V4(Ipv4Addr::new(192, 0, 2, idx as u8 + 1));
        detector.verify(ua, ip).await;
    }

    // Check distribution
    let distribution = detector.get_crawler_distribution(10);
    println!("Crawler distribution: {:?}", distribution);

    // Googlebot should have 3 hits, Bingbot should have 1
    assert!(!distribution.is_empty(), "Distribution should have entries");
    assert_eq!(
        distribution.first().map(|(_, count)| *count),
        Some(3),
        "Most frequent crawler should have 3 hits"
    );
}

#[tokio::test]
async fn test_bad_bot_stats_distribution() {
    let mut config = CrawlerConfig::default();
    config.block_bad_bots = true;

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    // Generate multiple bad bot detections
    let test_cases = vec!["sqlmap/1.0", "sqlmap/1.0", "nikto/2.0", "sqlmap/1.0"];

    for (idx, ua) in test_cases.iter().enumerate() {
        // Use different IPs to avoid cache hits (cache key includes IP)
        let ip = IpAddr::V4(Ipv4Addr::new(203, 0, 113, idx as u8 + 1));
        detector.verify(ua, ip).await;
    }

    // Check distribution
    let distribution = detector.get_bad_bot_distribution(10);
    println!("Bad bot distribution: {:?}", distribution);

    // SQLMap should have 3 hits, Nikto should have 1
    assert!(!distribution.is_empty(), "Distribution should have entries");
    assert_eq!(
        distribution.first().map(|(_, count)| *count),
        Some(3),
        "Most frequent bad bot should have 3 hits"
    );
}

// ============================================================================
// Test 10: Verification Method Tracking
// ============================================================================

#[tokio::test]
async fn test_verification_method_unverified() {
    // UA match without DNS verification (verify_legitimate_crawlers = false)
    let mut config = CrawlerConfig::default();
    config.verify_legitimate_crawlers = false;

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    let ua = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
    let ip = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 1));

    let result = detector.verify(ua, ip).await;

    assert_eq!(
        result.verification_method,
        VerificationMethod::Unverified,
        "Should be unverified when DNS verification is disabled"
    );
    assert!(result.user_agent_match, "Should have matched UA");
    assert!(!result.reverse_dns_match, "Should not have checked DNS");

    println!("Unverified method result: {:?}", result);
}

// ============================================================================
// Test 11: Disabled Detector
// ============================================================================

#[tokio::test]
async fn test_disabled_detector() {
    let mut config = CrawlerConfig::default();
    config.enabled = false;
    config.block_bad_bots = false;

    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    assert!(!detector.is_enabled(), "Detector should be disabled");

    let ua = "sqlmap/1.0";
    let ip = IpAddr::V4(Ipv4Addr::new(203, 0, 113, 1));

    // Even with bad bot UA, disabled detector still performs checks
    // but returns default/unverified results
    let result = detector.verify(ua, ip).await;

    // The detector still checks for bad bots regardless of enabled flag,
    // but we can verify it's disabled by checking the flag
    assert!(
        !detector.is_enabled(),
        "Detector should be marked as disabled"
    );

    println!("Disabled detector result: {:?}", result);
}

// ============================================================================
// Test 12: Normal Browser UA (should not match crawler or bad bot)
// ============================================================================

#[tokio::test]
async fn test_normal_browser_ua() {
    let config = CrawlerConfig::default();
    let detector = CrawlerDetector::new(config)
        .await
        .expect("Failed to create detector");

    let normal_ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
    let client_ip = IpAddr::V4(Ipv4Addr::new(203, 0, 113, 100));

    let result = detector.verify(normal_ua, client_ip).await;

    assert!(
        !result.is_crawler,
        "Normal browser should not be detected as crawler"
    );
    assert!(
        result.bad_bot_match.is_none(),
        "Normal browser should not match bad bot"
    );
    assert!(
        !result.suspicious,
        "Normal browser should not be suspicious"
    );

    println!("Normal browser result: {:?}", result);
}