1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
use std::time::Duration;
use crate::error::KumoError;
/// Configures how the engine retries failed fetch attempts.
///
/// # Example
/// ```rust,ignore
/// CrawlEngine::builder()
/// .retry_policy(
/// RetryPolicy::new(3)
/// .base_delay(Duration::from_millis(200))
/// .max_delay(Duration::from_secs(30))
/// .jitter(true)
/// .on_status(429)
/// .on_status(503),
/// )
/// ```
#[derive(Debug, Clone)]
pub struct RetryPolicy {
pub(crate) max_attempts: u32,
pub(crate) base_delay: Duration,
pub(crate) max_delay: Duration,
pub(crate) jitter: bool,
/// Empty = retry any `HttpStatus` or `Fetch` error.
/// Non-empty = only retry `HttpStatus` where the code is in this list.
pub(crate) retriable_statuses: Vec<u16>,
}
impl RetryPolicy {
/// Create a policy with `max_attempts` retries, 500ms base delay, 60s cap, no jitter.
///
/// `max_attempts` is the number of *retries* — total fetch calls = `max_attempts + 1`.
pub fn new(max_attempts: u32) -> Self {
Self {
max_attempts,
base_delay: Duration::from_millis(500),
max_delay: Duration::from_secs(60),
jitter: false,
retriable_statuses: Vec::new(),
}
}
pub fn base_delay(mut self, d: Duration) -> Self {
self.base_delay = d;
self
}
pub fn max_delay(mut self, d: Duration) -> Self {
self.max_delay = d;
self
}
/// Add ≤25% random jitter to each delay so concurrent retries don't thundering-herd.
pub fn jitter(mut self, enabled: bool) -> Self {
self.jitter = enabled;
self
}
/// Only retry when the HTTP response status code matches.
/// Call multiple times to allow several codes.
///
/// If never called, retries on any `KumoError::HttpStatus` or `KumoError::Fetch`.
pub fn on_status(mut self, status: u16) -> Self {
self.retriable_statuses.push(status);
self
}
/// Compute the sleep duration before retry `attempt` (0-indexed).
/// Result is capped at `max_delay`. If jitter is on, adds up to 25% extra.
pub(crate) fn delay_for(&self, attempt: u32) -> Duration {
let factor = 2_u32.saturating_pow(attempt);
let raw = self.base_delay.saturating_mul(factor).min(self.max_delay);
if self.jitter {
use rand::Rng;
let extra_frac = rand::rng().random_range(0.0_f64..0.25);
let extra = Duration::from_secs_f64(raw.as_secs_f64() * extra_frac);
(raw + extra).min(self.max_delay)
} else {
raw
}
}
/// Return `true` if `err` should trigger a retry under this policy.
pub(crate) fn is_retriable(&self, err: &KumoError) -> bool {
match err {
KumoError::HttpStatus { status, .. } => {
if self.retriable_statuses.is_empty() {
true
} else {
self.retriable_statuses.contains(status)
}
}
KumoError::Fetch(_) => self.retriable_statuses.is_empty(),
// Never retry parse, store, domain, depth, llm, or browser errors.
_ => false,
}
}
}