scatter-proxy 0.5.0

Async request scheduler for unreliable SOCKS5 proxies — multi-path race for maximum throughput
Documentation
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Duration;

/// Default free SOCKS5 proxy sources used when no custom sources are configured.
/// These are fetched from the scatter-proxy GitHub Pages and popular community lists.
pub const DEFAULT_PROXY_SOURCES: &[&str] = &[
    // scatter-proxy's own curated list (GitHub Pages + jsDelivr CDN)
    "https://cdn.jsdelivr.net/gh/letllmrun/scatter-proxy@main/docs/socks5.txt",
    // Community-maintained lists
    "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt",
    "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt",
];

/// Main configuration for ScatterProxy.
pub struct ScatterProxyConfig {
    /// URLs of proxy sources (line-delimited `ip:port` or `socks5://ip:port`).
    /// When empty, [`DEFAULT_PROXY_SOURCES`] are used automatically.
    pub sources: Vec<String>,
    /// How often to re-fetch proxy sources (default: 10 min).
    pub source_refresh_interval: Duration,
    /// Per-(proxy, host) rate-limiting configuration.
    pub rate_limit: RateLimitConfig,
    /// Timeout for a single proxy connection attempt (default: 8s).
    pub proxy_timeout: Duration,
    /// Number of concurrent proxy paths raced per request (default: 3).
    pub max_concurrent_per_request: usize,
    /// Global in-flight concurrency limit (default: 100).
    pub max_inflight: usize,
    /// Maximum number of pending tasks in the pool (default: 1000).
    pub task_pool_capacity: usize,
    /// Sliding window size for health tracking (default: 30).
    pub health_window: usize,
    /// Base cooldown duration after consecutive failures (default: 30s).
    pub cooldown_base: Duration,
    /// Maximum cooldown duration (default: 300s).
    pub cooldown_max: Duration,
    /// Number of consecutive failures before entering cooldown (default: 3).
    pub cooldown_consecutive_fails: usize,
    /// Minimum samples required before a proxy can be evicted (default: 30).
    pub eviction_min_samples: usize,
    /// Optional file path for persisting proxy state as JSON.
    pub state_file: Option<PathBuf>,
    /// How often to save state to disk (default: 5 min).
    pub state_save_interval: Duration,
    /// How often to log the metrics summary line (default: 30s).
    pub metrics_log_interval: Duration,
    /// Whether to prefer remote DNS resolution through the SOCKS5 proxy (default: true).
    pub prefer_remote_dns: bool,
}

impl Default for ScatterProxyConfig {
    fn default() -> Self {
        Self {
            sources: Vec::new(),
            source_refresh_interval: Duration::from_secs(600),
            rate_limit: RateLimitConfig::default(),
            proxy_timeout: Duration::from_secs(8),
            max_concurrent_per_request: 3,
            max_inflight: 100,
            task_pool_capacity: 1000,
            health_window: 30,
            cooldown_base: Duration::from_secs(30),
            cooldown_max: Duration::from_secs(300),
            cooldown_consecutive_fails: 3,
            eviction_min_samples: 30,
            state_file: None,
            state_save_interval: Duration::from_secs(300),
            metrics_log_interval: Duration::from_secs(30),
            prefer_remote_dns: true,
        }
    }
}

/// Per-(proxy, host) rate-limiting configuration.
pub struct RateLimitConfig {
    /// Default minimum interval between requests through the same proxy to the same host (default: 500ms).
    pub default_interval: Duration,
    /// Per-host overrides for the minimum interval.
    pub host_overrides: HashMap<String, Duration>,
}

impl Default for RateLimitConfig {
    fn default() -> Self {
        Self {
            default_interval: Duration::from_millis(500),
            host_overrides: HashMap::new(),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_scatter_proxy_config_defaults() {
        let cfg = ScatterProxyConfig::default();

        assert!(cfg.sources.is_empty());
        assert_eq!(cfg.source_refresh_interval, Duration::from_secs(600));
        assert_eq!(cfg.proxy_timeout, Duration::from_secs(8));
        assert_eq!(cfg.max_concurrent_per_request, 3);
        assert_eq!(cfg.max_inflight, 100);
        assert_eq!(cfg.task_pool_capacity, 1000);
        assert_eq!(cfg.health_window, 30);
        assert_eq!(cfg.cooldown_base, Duration::from_secs(30));
        assert_eq!(cfg.cooldown_max, Duration::from_secs(300));
        assert_eq!(cfg.cooldown_consecutive_fails, 3);
        assert_eq!(cfg.eviction_min_samples, 30);
        assert!(cfg.state_file.is_none());
        assert_eq!(cfg.state_save_interval, Duration::from_secs(300));
        assert_eq!(cfg.metrics_log_interval, Duration::from_secs(30));
        assert!(cfg.prefer_remote_dns);
    }

    #[test]
    fn test_rate_limit_config_defaults() {
        let rl = RateLimitConfig::default();

        assert_eq!(rl.default_interval, Duration::from_millis(500));
        assert!(rl.host_overrides.is_empty());
    }

    #[test]
    fn test_rate_limit_config_nested_in_scatter_proxy_config() {
        let cfg = ScatterProxyConfig::default();

        assert_eq!(cfg.rate_limit.default_interval, Duration::from_millis(500));
        assert!(cfg.rate_limit.host_overrides.is_empty());
    }

    #[test]
    fn test_config_can_be_customised() {
        let cfg = ScatterProxyConfig {
            sources: vec!["https://example.com/proxies.txt".into()],
            max_concurrent_per_request: 5,
            max_inflight: 200,
            state_file: Some(PathBuf::from("/tmp/scatter.json")),
            prefer_remote_dns: false,
            rate_limit: RateLimitConfig {
                default_interval: Duration::from_millis(250),
                host_overrides: {
                    let mut m = HashMap::new();
                    m.insert("slow.example.com".into(), Duration::from_secs(2));
                    m
                },
            },
            ..ScatterProxyConfig::default()
        };

        assert_eq!(cfg.sources.len(), 1);
        assert_eq!(cfg.max_concurrent_per_request, 5);
        assert_eq!(cfg.max_inflight, 200);
        assert_eq!(cfg.state_file, Some(PathBuf::from("/tmp/scatter.json")));
        assert!(!cfg.prefer_remote_dns);
        assert_eq!(cfg.rate_limit.default_interval, Duration::from_millis(250));
        assert_eq!(
            cfg.rate_limit.host_overrides.get("slow.example.com"),
            Some(&Duration::from_secs(2))
        );
        // fields that should still have defaults
        assert_eq!(cfg.proxy_timeout, Duration::from_secs(8));
        assert_eq!(cfg.health_window, 30);
    }

    #[test]
    fn test_cooldown_max_gte_cooldown_base() {
        let cfg = ScatterProxyConfig::default();
        assert!(cfg.cooldown_max >= cfg.cooldown_base);
    }

    #[test]
    fn test_source_refresh_interval_is_10_minutes() {
        let cfg = ScatterProxyConfig::default();
        assert_eq!(cfg.source_refresh_interval.as_secs(), 10 * 60);
    }

    #[test]
    fn test_state_save_interval_is_5_minutes() {
        let cfg = ScatterProxyConfig::default();
        assert_eq!(cfg.state_save_interval.as_secs(), 5 * 60);
    }

    #[test]
    fn test_default_proxy_sources_not_empty() {
        assert!(!DEFAULT_PROXY_SOURCES.is_empty());
        for source in DEFAULT_PROXY_SOURCES {
            assert!(source.starts_with("https://"));
        }
    }
}