Skip to main content

reqwest_proxy_pool/
config.rs

1//! Configuration for the proxy pool.
2
3use crate::classifier::{BodyClassifier, DefaultBodyClassifier};
4use std::fmt;
5use std::sync::Arc;
6use std::time::Duration;
7
8/// Factory used by middleware to create request clients before attaching proxy.
9pub type ClientBuilderFactory = Arc<dyn Fn() -> reqwest::ClientBuilder + Send + Sync>;
10
11/// Strategy for selecting a proxy from the pool.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum ProxySelectionStrategy {
14    /// Select the proxy with the fastest response time.
15    FastestResponse,
16    /// Select the proxy with the highest success rate.
17    MostReliable,
18    /// Randomly select one proxy from Top-K by success rate.
19    TopKReliableRandom,
20    /// Select a random healthy proxy.
21    Random,
22    /// Select proxies in round-robin fashion.
23    RoundRobin,
24}
25
26/// Retry strategy for request retries.
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum RetryStrategy {
29    /// Keep current behavior: each retry re-selects by `selection_strategy`
30    /// and may pick the same proxy again.
31    DefaultSelection,
32    /// On retries, always pick a proxy that has not been used by this request yet.
33    NewProxyOnRetry,
34}
35
36/// Per-host configuration.
37///
38/// Each `HostConfig` initializes one dedicated proxy pool.
39#[derive(Clone)]
40pub struct HostConfig {
41    pub(crate) host: String,
42    pub(crate) primary: bool,
43    /// Interval between health checks.
44    pub(crate) health_check_interval: Duration,
45    /// Timeout for health checks.
46    pub(crate) health_check_timeout: Duration,
47    /// Minimum number of available proxies.
48    pub(crate) min_available_proxies: usize,
49    /// URL used for health checks.
50    pub(crate) health_check_url: String,
51    /// Number of times to retry a request with different proxies.
52    pub(crate) retry_count: usize,
53    /// Retry behavior.
54    pub(crate) retry_strategy: RetryStrategy,
55    /// Strategy for selecting proxies.
56    pub(crate) selection_strategy: ProxySelectionStrategy,
57    /// Minimum interval between requests on the same proxy instance.
58    pub(crate) min_request_interval_ms: u64,
59    /// Body classifier for business-level proxy health feedback.
60    pub(crate) body_classifier: Arc<dyn BodyClassifier>,
61    /// Cooldown duration after a proxy failure.
62    pub(crate) proxy_cooldown: Duration,
63    /// K value for `TopKReliableRandom`.
64    pub(crate) reliable_top_k: usize,
65}
66
67impl fmt::Debug for HostConfig {
68    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
69        f.debug_struct("HostConfig")
70            .field("host", &self.host)
71            .field("primary", &self.primary)
72            .field("health_check_interval", &self.health_check_interval)
73            .field("health_check_timeout", &self.health_check_timeout)
74            .field("min_available_proxies", &self.min_available_proxies)
75            .field("health_check_url", &self.health_check_url)
76            .field("retry_count", &self.retry_count)
77            .field("retry_strategy", &self.retry_strategy)
78            .field("selection_strategy", &self.selection_strategy)
79            .field("min_request_interval_ms", &self.min_request_interval_ms)
80            .field("body_classifier", &"<dyn BodyClassifier>")
81            .field("proxy_cooldown", &self.proxy_cooldown)
82            .field("reliable_top_k", &self.reliable_top_k)
83            .finish()
84    }
85}
86
87impl HostConfig {
88    /// Create a new builder.
89    pub fn builder(host: impl Into<String>) -> HostConfigBuilder {
90        HostConfigBuilder::new(host)
91    }
92
93    /// Bound host.
94    pub fn host(&self) -> &str {
95        &self.host
96    }
97
98    /// Whether this host is the primary fallback host pool.
99    pub fn primary(&self) -> bool {
100        self.primary
101    }
102
103    /// Interval between health checks.
104    pub fn health_check_interval(&self) -> Duration {
105        self.health_check_interval
106    }
107
108    /// Timeout for health checks.
109    pub fn health_check_timeout(&self) -> Duration {
110        self.health_check_timeout
111    }
112
113    /// Minimum number of available proxies.
114    pub fn min_available_proxies(&self) -> usize {
115        self.min_available_proxies
116    }
117
118    /// URL used for health checks.
119    pub fn health_check_url(&self) -> &str {
120        &self.health_check_url
121    }
122
123    /// Number of times to retry.
124    pub fn retry_count(&self) -> usize {
125        self.retry_count
126    }
127
128    /// Retry strategy.
129    pub fn retry_strategy(&self) -> RetryStrategy {
130        self.retry_strategy
131    }
132
133    /// Selection strategy.
134    pub fn selection_strategy(&self) -> ProxySelectionStrategy {
135        self.selection_strategy
136    }
137
138    /// Minimum interval between requests on the same proxy instance.
139    pub fn min_request_interval_ms(&self) -> u64 {
140        self.min_request_interval_ms
141    }
142
143    /// Body classifier.
144    pub fn body_classifier(&self) -> &Arc<dyn BodyClassifier> {
145        &self.body_classifier
146    }
147
148    /// Cooldown duration after a proxy failure.
149    pub fn proxy_cooldown(&self) -> Duration {
150        self.proxy_cooldown
151    }
152
153    /// K value for `TopKReliableRandom`.
154    pub fn reliable_top_k(&self) -> usize {
155        self.reliable_top_k
156    }
157}
158
159/// Builder for `HostConfig`.
160pub struct HostConfigBuilder {
161    host: String,
162    primary: bool,
163    health_check_interval: Option<Duration>,
164    health_check_timeout: Option<Duration>,
165    min_available_proxies: Option<usize>,
166    health_check_url: Option<String>,
167    retry_count: Option<usize>,
168    retry_strategy: Option<RetryStrategy>,
169    selection_strategy: Option<ProxySelectionStrategy>,
170    min_request_interval_ms: Option<u64>,
171    body_classifier: Option<Arc<dyn BodyClassifier>>,
172    proxy_cooldown: Option<Duration>,
173    reliable_top_k: Option<usize>,
174}
175
176impl HostConfigBuilder {
177    /// Create builder with a target host.
178    pub fn new(host: impl Into<String>) -> Self {
179        Self {
180            host: normalize_host(host.into()),
181            primary: false,
182            health_check_interval: None,
183            health_check_timeout: None,
184            min_available_proxies: None,
185            health_check_url: None,
186            retry_count: None,
187            retry_strategy: None,
188            selection_strategy: None,
189            min_request_interval_ms: None,
190            body_classifier: None,
191            proxy_cooldown: None,
192            reliable_top_k: None,
193        }
194    }
195
196    /// Set the interval between health checks.
197    pub fn health_check_interval(mut self, interval: Duration) -> Self {
198        self.health_check_interval = Some(interval);
199        self
200    }
201
202    /// Set whether this host is primary fallback.
203    pub fn primary(mut self, primary: bool) -> Self {
204        self.primary = primary;
205        self
206    }
207
208    /// Set the timeout for health checks.
209    pub fn health_check_timeout(mut self, timeout: Duration) -> Self {
210        self.health_check_timeout = Some(timeout);
211        self
212    }
213
214    /// Set the minimum number of available proxies.
215    pub fn min_available_proxies(mut self, count: usize) -> Self {
216        self.min_available_proxies = Some(count);
217        self
218    }
219
220    /// Set the URL used for health checks.
221    pub fn health_check_url(mut self, url: impl Into<String>) -> Self {
222        self.health_check_url = Some(url.into());
223        self
224    }
225
226    /// Set retry count.
227    pub fn retry_count(mut self, count: usize) -> Self {
228        self.retry_count = Some(count);
229        self
230    }
231
232    /// Set retry strategy.
233    pub fn retry_strategy(mut self, strategy: RetryStrategy) -> Self {
234        self.retry_strategy = Some(strategy);
235        self
236    }
237
238    /// Set selection strategy.
239    pub fn selection_strategy(mut self, strategy: ProxySelectionStrategy) -> Self {
240        self.selection_strategy = Some(strategy);
241        self
242    }
243
244    /// Set minimum interval milliseconds between requests on one proxy instance.
245    pub fn min_request_interval_ms(mut self, interval_ms: u64) -> Self {
246        self.min_request_interval_ms = Some(interval_ms);
247        self
248    }
249
250    /// Set custom body classifier.
251    pub fn body_classifier(mut self, classifier: impl BodyClassifier) -> Self {
252        self.body_classifier = Some(Arc::new(classifier));
253        self
254    }
255
256    /// Set cooldown duration after one failed request on a proxy.
257    pub fn proxy_cooldown(mut self, cooldown: Duration) -> Self {
258        self.proxy_cooldown = Some(cooldown);
259        self
260    }
261
262    /// Set K for `TopKReliableRandom`.
263    pub fn reliable_top_k(mut self, top_k: usize) -> Self {
264        self.reliable_top_k = Some(top_k.max(1));
265        self
266    }
267
268    /// Build host config.
269    pub fn build(self) -> HostConfig {
270        let health_check_url = self
271            .health_check_url
272            .unwrap_or_else(|| "https://www.google.com".to_string());
273        let health_check_url = if health_check_url.trim().is_empty() {
274            "https://www.google.com".to_string()
275        } else {
276            health_check_url
277        };
278
279        HostConfig {
280            host: if self.host.is_empty() {
281                "default".to_string()
282            } else {
283                self.host
284            },
285            primary: self.primary,
286            health_check_interval: self
287                .health_check_interval
288                .unwrap_or(Duration::from_secs(300)),
289            health_check_timeout: self.health_check_timeout.unwrap_or(Duration::from_secs(10)),
290            min_available_proxies: self.min_available_proxies.unwrap_or(3),
291            health_check_url,
292            retry_count: self.retry_count.unwrap_or(3),
293            retry_strategy: self
294                .retry_strategy
295                .unwrap_or(RetryStrategy::DefaultSelection),
296            selection_strategy: self
297                .selection_strategy
298                .unwrap_or(ProxySelectionStrategy::FastestResponse),
299            min_request_interval_ms: self.min_request_interval_ms.unwrap_or(500).max(1),
300            body_classifier: self
301                .body_classifier
302                .unwrap_or_else(|| Arc::new(DefaultBodyClassifier)),
303            proxy_cooldown: self.proxy_cooldown.unwrap_or(Duration::from_secs(30)),
304            reliable_top_k: self.reliable_top_k.unwrap_or(8).max(1),
305        }
306    }
307}
308
309/// Top-level configuration.
310#[derive(Clone)]
311pub struct ProxyPoolConfig {
312    /// Shared source URLs used to build proxy lists for all host pools.
313    pub(crate) sources: Vec<String>,
314    /// Host-specific pool definitions.
315    pub(crate) hosts: Vec<HostConfig>,
316    /// Factory used by middleware to create request clients before attaching proxy.
317    pub(crate) client_builder_factory: ClientBuilderFactory,
318}
319
320impl fmt::Debug for ProxyPoolConfig {
321    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
322        f.debug_struct("ProxyPoolConfig")
323            .field("sources", &self.sources)
324            .field("hosts", &self.hosts)
325            .field(
326                "client_builder_factory",
327                &"<dyn Fn() -> reqwest::ClientBuilder>",
328            )
329            .finish()
330    }
331}
332
333impl ProxyPoolConfig {
334    /// Create builder.
335    pub fn builder() -> ProxyPoolConfigBuilder {
336        ProxyPoolConfigBuilder::new()
337    }
338
339    /// Sources.
340    pub fn sources(&self) -> &[String] {
341        &self.sources
342    }
343
344    /// Host configs.
345    pub fn hosts(&self) -> &[HostConfig] {
346        &self.hosts
347    }
348
349    /// Factory used by middleware to create request clients before attaching proxy.
350    pub fn client_builder_factory(&self) -> &ClientBuilderFactory {
351        &self.client_builder_factory
352    }
353}
354
355/// Builder for `ProxyPoolConfig`.
356pub struct ProxyPoolConfigBuilder {
357    sources: Vec<String>,
358    hosts: Vec<HostConfig>,
359    client_builder_factory: Option<ClientBuilderFactory>,
360}
361
362impl ProxyPoolConfigBuilder {
363    /// Create builder.
364    pub fn new() -> Self {
365        Self {
366            sources: Vec::new(),
367            hosts: Vec::new(),
368            client_builder_factory: None,
369        }
370    }
371
372    /// Set source URLs.
373    pub fn sources(mut self, sources: Vec<impl Into<String>>) -> Self {
374        self.sources = sources.into_iter().map(Into::into).collect();
375        self
376    }
377
378    /// Set all host configs.
379    ///
380    /// Exactly one host should set `primary(true)` as fallback for unknown hosts.
381    pub fn hosts(mut self, hosts: Vec<HostConfig>) -> Self {
382        self.hosts = hosts;
383        self
384    }
385
386    /// Add one host config.
387    ///
388    /// Exactly one host should set `primary(true)` as fallback for unknown hosts.
389    pub fn add_host(mut self, host: HostConfig) -> Self {
390        self.hosts.push(host);
391        self
392    }
393
394    /// Set request client builder factory.
395    ///
396    /// Middleware will call this factory on each attempt, then append proxy settings.
397    /// Use this to keep timeout/pool/TLS settings aligned with your outer client setup.
398    pub fn client_builder_factory<F>(mut self, factory: F) -> Self
399    where
400        F: Fn() -> reqwest::ClientBuilder + Send + Sync + 'static,
401    {
402        self.client_builder_factory = Some(Arc::new(factory));
403        self
404    }
405
406    /// Build config.
407    pub fn build(self) -> ProxyPoolConfig {
408        ProxyPoolConfig {
409            sources: self.sources,
410            hosts: self.hosts,
411            client_builder_factory: self
412                .client_builder_factory
413                .unwrap_or_else(|| Arc::new(reqwest::Client::builder)),
414        }
415    }
416}
417
418impl Default for ProxyPoolConfigBuilder {
419    fn default() -> Self {
420        Self::new()
421    }
422}
423
424fn normalize_host(host: String) -> String {
425    host.trim().to_ascii_lowercase()
426}
427
428#[cfg(test)]
429mod tests {
430    use super::{HostConfig, ProxyPoolConfig};
431
432    #[test]
433    fn host_config_normalizes_host() {
434        let host = HostConfig::builder(" API.EXAMPLE.COM ").build();
435        assert_eq!(host.host(), "api.example.com");
436    }
437
438    #[test]
439    fn pool_config_keeps_hosts() {
440        let api = HostConfig::builder("api.example.com").build();
441        let web = HostConfig::builder("web.example.com").build();
442        let config = ProxyPoolConfig::builder().hosts(vec![api, web]).build();
443        assert_eq!(config.hosts().len(), 2);
444    }
445}