reqwest-proxy-pool 0.4.0

proxy pool middleware for reqwest
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
//! Configuration for the proxy pool.

use crate::classifier::{BodyClassifier, DefaultBodyClassifier};
use std::fmt;
use std::sync::Arc;
use std::time::Duration;

/// Factory used by middleware to create request clients before attaching proxy.
pub type ClientBuilderFactory = Arc<dyn Fn() -> reqwest::ClientBuilder + Send + Sync>;

/// Strategy for selecting a proxy from the pool.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ProxySelectionStrategy {
    /// Select the proxy with the fastest response time.
    FastestResponse,
    /// Select the proxy with the highest success rate.
    MostReliable,
    /// Randomly select one proxy from Top-K by success rate.
    TopKReliableRandom,
    /// Select a random healthy proxy.
    Random,
    /// Select proxies in round-robin fashion.
    RoundRobin,
}

/// Retry strategy for request retries.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RetryStrategy {
    /// Keep current behavior: each retry re-selects by `selection_strategy`
    /// and may pick the same proxy again.
    DefaultSelection,
    /// On retries, always pick a proxy that has not been used by this request yet.
    NewProxyOnRetry,
}

/// Per-host configuration.
///
/// Each `HostConfig` initializes one dedicated proxy pool.
#[derive(Clone)]
pub struct HostConfig {
    pub(crate) host: String,
    pub(crate) primary: bool,
    /// Interval between health checks.
    pub(crate) health_check_interval: Duration,
    /// Timeout for health checks.
    pub(crate) health_check_timeout: Duration,
    /// Minimum number of available proxies.
    pub(crate) min_available_proxies: usize,
    /// URL used for health checks.
    pub(crate) health_check_url: String,
    /// Number of times to retry a request with different proxies.
    pub(crate) retry_count: usize,
    /// Retry behavior.
    pub(crate) retry_strategy: RetryStrategy,
    /// Strategy for selecting proxies.
    pub(crate) selection_strategy: ProxySelectionStrategy,
    /// Minimum interval between requests on the same proxy instance.
    pub(crate) min_request_interval_ms: u64,
    /// Body classifier for business-level proxy health feedback.
    pub(crate) body_classifier: Arc<dyn BodyClassifier>,
    /// Cooldown duration after a proxy failure.
    pub(crate) proxy_cooldown: Duration,
    /// K value for `TopKReliableRandom`.
    pub(crate) reliable_top_k: usize,
}

impl fmt::Debug for HostConfig {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("HostConfig")
            .field("host", &self.host)
            .field("primary", &self.primary)
            .field("health_check_interval", &self.health_check_interval)
            .field("health_check_timeout", &self.health_check_timeout)
            .field("min_available_proxies", &self.min_available_proxies)
            .field("health_check_url", &self.health_check_url)
            .field("retry_count", &self.retry_count)
            .field("retry_strategy", &self.retry_strategy)
            .field("selection_strategy", &self.selection_strategy)
            .field("min_request_interval_ms", &self.min_request_interval_ms)
            .field("body_classifier", &"<dyn BodyClassifier>")
            .field("proxy_cooldown", &self.proxy_cooldown)
            .field("reliable_top_k", &self.reliable_top_k)
            .finish()
    }
}

impl HostConfig {
    /// Create a new builder.
    pub fn builder(host: impl Into<String>) -> HostConfigBuilder {
        HostConfigBuilder::new(host)
    }

    /// Bound host.
    pub fn host(&self) -> &str {
        &self.host
    }

    /// Whether this host is the primary fallback host pool.
    pub fn primary(&self) -> bool {
        self.primary
    }

    /// Interval between health checks.
    pub fn health_check_interval(&self) -> Duration {
        self.health_check_interval
    }

    /// Timeout for health checks.
    pub fn health_check_timeout(&self) -> Duration {
        self.health_check_timeout
    }

    /// Minimum number of available proxies.
    pub fn min_available_proxies(&self) -> usize {
        self.min_available_proxies
    }

    /// URL used for health checks.
    pub fn health_check_url(&self) -> &str {
        &self.health_check_url
    }

    /// Number of times to retry.
    pub fn retry_count(&self) -> usize {
        self.retry_count
    }

    /// Retry strategy.
    pub fn retry_strategy(&self) -> RetryStrategy {
        self.retry_strategy
    }

    /// Selection strategy.
    pub fn selection_strategy(&self) -> ProxySelectionStrategy {
        self.selection_strategy
    }

    /// Minimum interval between requests on the same proxy instance.
    pub fn min_request_interval_ms(&self) -> u64 {
        self.min_request_interval_ms
    }

    /// Body classifier.
    pub fn body_classifier(&self) -> &Arc<dyn BodyClassifier> {
        &self.body_classifier
    }

    /// Cooldown duration after a proxy failure.
    pub fn proxy_cooldown(&self) -> Duration {
        self.proxy_cooldown
    }

    /// K value for `TopKReliableRandom`.
    pub fn reliable_top_k(&self) -> usize {
        self.reliable_top_k
    }
}

/// Builder for `HostConfig`.
pub struct HostConfigBuilder {
    host: String,
    primary: bool,
    health_check_interval: Option<Duration>,
    health_check_timeout: Option<Duration>,
    min_available_proxies: Option<usize>,
    health_check_url: Option<String>,
    retry_count: Option<usize>,
    retry_strategy: Option<RetryStrategy>,
    selection_strategy: Option<ProxySelectionStrategy>,
    min_request_interval_ms: Option<u64>,
    body_classifier: Option<Arc<dyn BodyClassifier>>,
    proxy_cooldown: Option<Duration>,
    reliable_top_k: Option<usize>,
}

impl HostConfigBuilder {
    /// Create builder with a target host.
    pub fn new(host: impl Into<String>) -> Self {
        Self {
            host: normalize_host(host.into()),
            primary: false,
            health_check_interval: None,
            health_check_timeout: None,
            min_available_proxies: None,
            health_check_url: None,
            retry_count: None,
            retry_strategy: None,
            selection_strategy: None,
            min_request_interval_ms: None,
            body_classifier: None,
            proxy_cooldown: None,
            reliable_top_k: None,
        }
    }

    /// Set the interval between health checks.
    pub fn health_check_interval(mut self, interval: Duration) -> Self {
        self.health_check_interval = Some(interval);
        self
    }

    /// Set whether this host is primary fallback.
    pub fn primary(mut self, primary: bool) -> Self {
        self.primary = primary;
        self
    }

    /// Set the timeout for health checks.
    pub fn health_check_timeout(mut self, timeout: Duration) -> Self {
        self.health_check_timeout = Some(timeout);
        self
    }

    /// Set the minimum number of available proxies.
    pub fn min_available_proxies(mut self, count: usize) -> Self {
        self.min_available_proxies = Some(count);
        self
    }

    /// Set the URL used for health checks.
    pub fn health_check_url(mut self, url: impl Into<String>) -> Self {
        self.health_check_url = Some(url.into());
        self
    }

    /// Set retry count.
    pub fn retry_count(mut self, count: usize) -> Self {
        self.retry_count = Some(count);
        self
    }

    /// Set retry strategy.
    pub fn retry_strategy(mut self, strategy: RetryStrategy) -> Self {
        self.retry_strategy = Some(strategy);
        self
    }

    /// Set selection strategy.
    pub fn selection_strategy(mut self, strategy: ProxySelectionStrategy) -> Self {
        self.selection_strategy = Some(strategy);
        self
    }

    /// Set minimum interval milliseconds between requests on one proxy instance.
    pub fn min_request_interval_ms(mut self, interval_ms: u64) -> Self {
        self.min_request_interval_ms = Some(interval_ms);
        self
    }

    /// Set custom body classifier.
    pub fn body_classifier(mut self, classifier: impl BodyClassifier) -> Self {
        self.body_classifier = Some(Arc::new(classifier));
        self
    }

    /// Set cooldown duration after one failed request on a proxy.
    pub fn proxy_cooldown(mut self, cooldown: Duration) -> Self {
        self.proxy_cooldown = Some(cooldown);
        self
    }

    /// Set K for `TopKReliableRandom`.
    pub fn reliable_top_k(mut self, top_k: usize) -> Self {
        self.reliable_top_k = Some(top_k.max(1));
        self
    }

    /// Build host config.
    pub fn build(self) -> HostConfig {
        let health_check_url = self
            .health_check_url
            .unwrap_or_else(|| "https://www.google.com".to_string());
        let health_check_url = if health_check_url.trim().is_empty() {
            "https://www.google.com".to_string()
        } else {
            health_check_url
        };

        HostConfig {
            host: if self.host.is_empty() {
                "default".to_string()
            } else {
                self.host
            },
            primary: self.primary,
            health_check_interval: self
                .health_check_interval
                .unwrap_or(Duration::from_secs(300)),
            health_check_timeout: self.health_check_timeout.unwrap_or(Duration::from_secs(10)),
            min_available_proxies: self.min_available_proxies.unwrap_or(3),
            health_check_url,
            retry_count: self.retry_count.unwrap_or(3),
            retry_strategy: self
                .retry_strategy
                .unwrap_or(RetryStrategy::DefaultSelection),
            selection_strategy: self
                .selection_strategy
                .unwrap_or(ProxySelectionStrategy::FastestResponse),
            min_request_interval_ms: self.min_request_interval_ms.unwrap_or(500).max(1),
            body_classifier: self
                .body_classifier
                .unwrap_or_else(|| Arc::new(DefaultBodyClassifier)),
            proxy_cooldown: self.proxy_cooldown.unwrap_or(Duration::from_secs(30)),
            reliable_top_k: self.reliable_top_k.unwrap_or(8).max(1),
        }
    }
}

/// Top-level configuration.
#[derive(Clone)]
pub struct ProxyPoolConfig {
    /// Shared source URLs used to build proxy lists for all host pools.
    pub(crate) sources: Vec<String>,
    /// Host-specific pool definitions.
    pub(crate) hosts: Vec<HostConfig>,
    /// Factory used by middleware to create request clients before attaching proxy.
    pub(crate) client_builder_factory: ClientBuilderFactory,
}

impl fmt::Debug for ProxyPoolConfig {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("ProxyPoolConfig")
            .field("sources", &self.sources)
            .field("hosts", &self.hosts)
            .field(
                "client_builder_factory",
                &"<dyn Fn() -> reqwest::ClientBuilder>",
            )
            .finish()
    }
}

impl ProxyPoolConfig {
    /// Create builder.
    pub fn builder() -> ProxyPoolConfigBuilder {
        ProxyPoolConfigBuilder::new()
    }

    /// Sources.
    pub fn sources(&self) -> &[String] {
        &self.sources
    }

    /// Host configs.
    pub fn hosts(&self) -> &[HostConfig] {
        &self.hosts
    }

    /// Factory used by middleware to create request clients before attaching proxy.
    pub fn client_builder_factory(&self) -> &ClientBuilderFactory {
        &self.client_builder_factory
    }
}

/// Builder for `ProxyPoolConfig`.
pub struct ProxyPoolConfigBuilder {
    sources: Vec<String>,
    hosts: Vec<HostConfig>,
    client_builder_factory: Option<ClientBuilderFactory>,
}

impl ProxyPoolConfigBuilder {
    /// Create builder.
    pub fn new() -> Self {
        Self {
            sources: Vec::new(),
            hosts: Vec::new(),
            client_builder_factory: None,
        }
    }

    /// Set source URLs.
    pub fn sources(mut self, sources: Vec<impl Into<String>>) -> Self {
        self.sources = sources.into_iter().map(Into::into).collect();
        self
    }

    /// Set all host configs.
    ///
    /// Exactly one host should set `primary(true)` as fallback for unknown hosts.
    pub fn hosts(mut self, hosts: Vec<HostConfig>) -> Self {
        self.hosts = hosts;
        self
    }

    /// Add one host config.
    ///
    /// Exactly one host should set `primary(true)` as fallback for unknown hosts.
    pub fn add_host(mut self, host: HostConfig) -> Self {
        self.hosts.push(host);
        self
    }

    /// Set request client builder factory.
    ///
    /// Middleware will call this factory on each attempt, then append proxy settings.
    /// Use this to keep timeout/pool/TLS settings aligned with your outer client setup.
    pub fn client_builder_factory<F>(mut self, factory: F) -> Self
    where
        F: Fn() -> reqwest::ClientBuilder + Send + Sync + 'static,
    {
        self.client_builder_factory = Some(Arc::new(factory));
        self
    }

    /// Build config.
    pub fn build(self) -> ProxyPoolConfig {
        ProxyPoolConfig {
            sources: self.sources,
            hosts: self.hosts,
            client_builder_factory: self
                .client_builder_factory
                .unwrap_or_else(|| Arc::new(reqwest::Client::builder)),
        }
    }
}

impl Default for ProxyPoolConfigBuilder {
    fn default() -> Self {
        Self::new()
    }
}

fn normalize_host(host: String) -> String {
    host.trim().to_ascii_lowercase()
}

#[cfg(test)]
mod tests {
    use super::{HostConfig, ProxyPoolConfig};

    #[test]
    fn host_config_normalizes_host() {
        let host = HostConfig::builder(" API.EXAMPLE.COM ").build();
        assert_eq!(host.host(), "api.example.com");
    }

    #[test]
    fn pool_config_keeps_hosts() {
        let api = HostConfig::builder("api.example.com").build();
        let web = HostConfig::builder("web.example.com").build();
        let config = ProxyPoolConfig::builder().hosts(vec![api, web]).build();
        assert_eq!(config.hosts().len(), 2);
    }
}