scatter-proxy 0.5.0

Async request scheduler for unreliable SOCKS5 proxies — multi-path race for maximum throughput
Documentation
use rand::Rng;

use crate::health::HealthTracker;

/// Compute a score for a `(proxy, host)` pair used to rank proxies during selection.
///
/// ```text
/// score = affinity × 0.5 + global × 0.3 + recency × 0.2 + jitter × 0.1
/// ```
///
/// - **affinity** — sliding-window success rate for this specific `(proxy, host)` pair
///   (defaults to 0.5 when no data exists).
/// - **global** — sliding-window success rate for this proxy across *all* hosts
///   (defaults to 0.5 when no data exists).
/// - **recency** — `1.0 / (1.0 + minutes_since_last_success)`, rewarding proxies
///   that have succeeded recently.
/// - **jitter** — uniform random noise in `[0.0, 0.1)` for anti-starvation /
///   load-spreading.
pub fn compute_score(health: &HealthTracker, proxy: &str, host: &str) -> f64 {
    let affinity = health.get_affinity(proxy, host);
    let global = health.get_global_health(proxy);
    let minutes = health.minutes_since_last_success(proxy, host);
    let recency = 1.0 / (1.0 + minutes);
    let jitter: f64 = rand::thread_rng().gen_range(0.0..0.1);

    affinity * 0.5 + global * 0.3 + recency * 0.2 + jitter * 0.1
}

/// Deterministic version of [`compute_score`] used for testing (no random jitter).
#[cfg(test)]
fn compute_score_deterministic(
    health: &HealthTracker,
    proxy: &str,
    host: &str,
    jitter: f64,
) -> f64 {
    let affinity = health.get_affinity(proxy, host);
    let global = health.get_global_health(proxy);
    let minutes = health.minutes_since_last_success(proxy, host);
    let recency = 1.0 / (1.0 + minutes);

    affinity * 0.5 + global * 0.3 + recency * 0.2 + jitter * 0.1
}

/// Adapt the race fan-out *K* based on available proxy count and recent success rate.
///
/// The goal is to avoid unnecessary parallel requests when the pool is healthy,
/// while still providing redundancy when things are degraded.
///
/// | `available` | `avg_success_rate` | Result |
/// |---|---|---|
/// | ≤ 2 | any | `available` |
/// | 3 ..= 6 | > 0.85 | 1 |
/// | 3 ..= 6 | ≤ 0.85 | `min(base_k, 2)` |
/// | > 10 | > 0.85 | 1 |
/// | 7 ..= 10 **or** > 10 | > 0.6 | `min(base_k, 2)` |
/// | otherwise | any | `base_k` |
pub fn adaptive_k(available_count: usize, avg_success_rate: f64, base_k: usize) -> usize {
    if available_count <= 2 {
        return available_count;
    }

    if available_count <= 6 {
        if avg_success_rate > 0.85 {
            return 1;
        }
        return base_k.min(2);
    }

    // available > 6
    if available_count > 10 && avg_success_rate > 0.85 {
        return 1;
    }

    if avg_success_rate > 0.6 {
        return base_k.min(2);
    }

    base_k
}

// ─── Tests ───────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    const PROXY_A: &str = "socks5://1.2.3.4:1080";
    const PROXY_B: &str = "socks5://5.6.7.8:9050";
    const HOST_X: &str = "yunhq.sse.com.cn";
    const HOST_Y: &str = "www.szse.cn";

    fn tracker() -> HealthTracker {
        HealthTracker::new(10)
    }

    // ── compute_score ───────────────────────────────────────────────────

    #[test]
    fn score_for_unknown_pair_uses_priors() {
        let ht = tracker();
        // affinity = 0.5, global = 0.5, recency ≈ 0 (f64::MAX minutes)
        // score ≈ 0.5*0.5 + 0.5*0.3 + ~0*0.2 + jitter*0.1
        //       ≈ 0.25 + 0.15 + 0 + [0..0.01)
        //       ≈ 0.40 + small jitter
        let score = compute_score(&ht, PROXY_A, HOST_X);
        assert!(score >= 0.39, "score = {score}");
        assert!(score < 0.42, "score = {score}");
    }

    #[test]
    fn score_is_higher_for_successful_proxy() {
        let ht = tracker();

        // PROXY_A has good history.
        for _ in 0..5 {
            ht.record_success(PROXY_A, HOST_X, 50.0);
        }
        // PROXY_B has poor history.
        for _ in 0..5 {
            ht.record_failure(PROXY_B, HOST_X);
        }

        // Run many times to account for jitter, check that A usually wins.
        let mut a_wins = 0;
        for _ in 0..100 {
            let sa = compute_score(&ht, PROXY_A, HOST_X);
            let sb = compute_score(&ht, PROXY_B, HOST_X);
            if sa > sb {
                a_wins += 1;
            }
        }
        assert!(
            a_wins > 90,
            "expected PROXY_A to win most of the time, but only won {a_wins}/100"
        );
    }

    #[test]
    fn deterministic_score_no_jitter() {
        let ht = tracker();
        ht.record_success(PROXY_A, HOST_X, 100.0);

        let s1 = compute_score_deterministic(&ht, PROXY_A, HOST_X, 0.0);
        let s2 = compute_score_deterministic(&ht, PROXY_A, HOST_X, 0.0);
        assert!(
            (s1 - s2).abs() < 1e-6,
            "deterministic scores should be equal"
        );
    }

    #[test]
    fn score_weights_sum_correctly() {
        // With perfect data: affinity=1.0, global=1.0, recency≈1.0 (just succeeded),
        // jitter=0.05 (middle of range).
        // score ≈ 1.0*0.5 + 1.0*0.3 + 1.0*0.2 + 0.05*0.1 = 1.005
        let ht = tracker();
        ht.record_success(PROXY_A, HOST_X, 10.0);

        let score = compute_score_deterministic(&ht, PROXY_A, HOST_X, 0.05);
        // recency = 1/(1 + very_small_minutes) ≈ 1.0
        assert!(score > 0.99, "score = {score}");
        assert!(score < 1.01, "score = {score}");
    }

    #[test]
    fn score_recency_decays_with_time() {
        // We can't easily simulate time passing, but we can verify the formula
        // by using the deterministic helper and manually computing.
        let ht = tracker();
        // No successes → minutes_since_last_success = f64::MAX → recency ≈ 0
        ht.record_failure(PROXY_A, HOST_X);

        let score = compute_score_deterministic(&ht, PROXY_A, HOST_X, 0.0);
        // affinity = 0.0 (all failures), global = 0.0, recency ≈ 0
        // score ≈ 0
        assert!(score < 0.01, "score = {score}");
    }

    #[test]
    fn score_bounded_in_reasonable_range() {
        let ht = tracker();
        for _ in 0..10 {
            let score = compute_score(&ht, PROXY_A, HOST_X);
            assert!(score >= 0.0, "score should be non-negative: {score}");
            assert!(score <= 1.1, "score should be bounded: {score}");
        }
    }

    #[test]
    fn score_uses_global_health_for_unknown_host() {
        let ht = tracker();
        // PROXY_A has good history against HOST_X, but we query HOST_Y (unknown).
        for _ in 0..5 {
            ht.record_success(PROXY_A, HOST_X, 50.0);
        }
        // For HOST_Y: affinity = 0.5 (unknown), global = 1.0 (all successes on PROXY_A)
        let score = compute_score_deterministic(&ht, PROXY_A, HOST_Y, 0.0);
        // score = 0.5*0.5 + 1.0*0.3 + ~0.0*0.2 = 0.25 + 0.30 ≈ 0.55
        assert!(score > 0.54, "score = {score}");
        assert!(score < 0.57, "score = {score}");
    }

    #[test]
    fn score_mixed_history() {
        let ht = HealthTracker::new(4);
        // 3 successes, 1 failure → affinity = 0.75, global = 0.75
        ht.record_success(PROXY_A, HOST_X, 10.0);
        ht.record_success(PROXY_A, HOST_X, 10.0);
        ht.record_success(PROXY_A, HOST_X, 10.0);
        ht.record_failure(PROXY_A, HOST_X);

        let score = compute_score_deterministic(&ht, PROXY_A, HOST_X, 0.0);
        // affinity = 0.75, global = 0.75, recency ≈ small (last was failure, but last_success exists)
        // score = 0.75*0.5 + 0.75*0.3 + recency*0.2
        //       = 0.375 + 0.225 + recency*0.2
        //       ≈ 0.60 + small
        assert!(score > 0.59, "score = {score}");
        assert!(score < 0.82, "score = {score}");
    }

    // ── adaptive_k ──────────────────────────────────────────────────────

    #[test]
    fn adaptive_k_zero_available() {
        assert_eq!(adaptive_k(0, 0.9, 3), 0);
    }

    #[test]
    fn adaptive_k_one_available() {
        assert_eq!(adaptive_k(1, 0.9, 3), 1);
    }

    #[test]
    fn adaptive_k_two_available() {
        assert_eq!(adaptive_k(2, 0.1, 5), 2);
    }

    #[test]
    fn adaptive_k_small_pool_high_success() {
        // 3..=6 proxies, success > 0.85 → K=1
        assert_eq!(adaptive_k(3, 0.90, 3), 1);
        assert_eq!(adaptive_k(4, 0.95, 5), 1);
        assert_eq!(adaptive_k(6, 0.99, 4), 1);
    }

    #[test]
    fn adaptive_k_small_pool_low_success() {
        // 3..=6 proxies, success ≤ 0.85 → min(base_k, 2)
        assert_eq!(adaptive_k(3, 0.50, 3), 2);
        assert_eq!(adaptive_k(5, 0.85, 5), 2); // 0.85 is NOT > 0.85
        assert_eq!(adaptive_k(6, 0.30, 1), 1); // min(1, 2) = 1
    }

    #[test]
    fn adaptive_k_large_pool_high_success() {
        // > 10 proxies, success > 0.85 → K=1
        assert_eq!(adaptive_k(11, 0.90, 3), 1);
        assert_eq!(adaptive_k(50, 0.99, 5), 1);
    }

    #[test]
    fn adaptive_k_medium_pool_moderate_success() {
        // 7..=10 proxies, success > 0.6 → min(base_k, 2)
        assert_eq!(adaptive_k(7, 0.70, 3), 2);
        assert_eq!(adaptive_k(10, 0.80, 5), 2);
        assert_eq!(adaptive_k(8, 0.65, 1), 1); // min(1, 2) = 1
    }

    #[test]
    fn adaptive_k_large_pool_moderate_success() {
        // > 10 proxies, 0.6 < success ≤ 0.85 → min(base_k, 2)
        assert_eq!(adaptive_k(15, 0.70, 4), 2);
        assert_eq!(adaptive_k(20, 0.80, 3), 2);
    }

    #[test]
    fn adaptive_k_medium_pool_low_success() {
        // 7..=10 proxies, success ≤ 0.6 → base_k
        assert_eq!(adaptive_k(7, 0.50, 3), 3);
        assert_eq!(adaptive_k(10, 0.60, 5), 5); // 0.6 is NOT > 0.6
        assert_eq!(adaptive_k(8, 0.10, 4), 4);
    }

    #[test]
    fn adaptive_k_large_pool_low_success() {
        // > 10 proxies, success ≤ 0.6 → base_k
        assert_eq!(adaptive_k(15, 0.30, 4), 4);
        assert_eq!(adaptive_k(50, 0.60, 3), 3); // 0.6 is NOT > 0.6
    }

    #[test]
    fn adaptive_k_boundary_at_3_proxies() {
        // Exactly 3 proxies: should be in the 3..=6 range, not ≤2.
        assert_eq!(adaptive_k(3, 0.90, 3), 1); // high success → 1
        assert_eq!(adaptive_k(3, 0.50, 3), 2); // low success → min(3,2)
    }

    #[test]
    fn adaptive_k_boundary_at_7_proxies() {
        // Exactly 7 proxies: should be in the >6 range.
        assert_eq!(adaptive_k(7, 0.90, 3), 2); // >0.6 → min(3,2); NOT >10 so high_success path doesn't apply
        assert_eq!(adaptive_k(7, 0.50, 4), 4); // ≤0.6 → base_k
    }

    #[test]
    fn adaptive_k_boundary_at_11_proxies() {
        // Exactly 11 proxies: should be in the >10 range.
        assert_eq!(adaptive_k(11, 0.90, 3), 1); // >10 and >0.85 → 1
        assert_eq!(adaptive_k(11, 0.70, 3), 2); // >0.6 → min(3,2)
        assert_eq!(adaptive_k(11, 0.50, 4), 4); // ≤0.6 → base_k
    }

    #[test]
    fn adaptive_k_base_k_of_one() {
        // When base_k is 1, min(base_k, 2) = 1.
        assert_eq!(adaptive_k(5, 0.70, 1), 1);
        assert_eq!(adaptive_k(8, 0.70, 1), 1);
    }

    #[test]
    fn adaptive_k_never_returns_more_than_available() {
        // With 2 available and base_k=10, we get 2 (the available count).
        assert_eq!(adaptive_k(2, 0.10, 10), 2);
        assert_eq!(adaptive_k(1, 0.10, 10), 1);
    }
}