1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
use serde::{Deserialize, Serialize};
/// Backoff strategy for workflow step retries.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum BackoffStrategy {
/// Fixed delay between retries
Fixed {
/// Delay in seconds
delay_seconds: u32,
},
/// Exponential backoff: delay = base * 2^attempt
Exponential {
/// Base delay in seconds
base_seconds: u32,
/// Maximum delay in seconds
max_seconds: u32,
},
/// Exponential backoff with jitter (±25%)
ExponentialWithJitter {
/// Base delay in seconds
base_seconds: u32,
/// Maximum delay in seconds
max_seconds: u32,
},
}
/// Retry policy for workflow steps.
///
/// Controls max attempts and backoff strategy.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StepRetryPolicy {
/// Maximum number of retry attempts (0 = no retry, 1 = one retry after initial attempt)
pub max_attempts: u32,
/// Backoff strategy for calculating delay between attempts
pub backoff: BackoffStrategy,
}
impl Default for StepRetryPolicy {
fn default() -> Self {
Self {
max_attempts: 3,
backoff: BackoffStrategy::ExponentialWithJitter {
base_seconds: 1,
max_seconds: 60,
},
}
}
}
impl StepRetryPolicy {
/// Calculate delay in seconds for a retry attempt.
pub fn calculate_delay(&self, attempt: u32) -> u32 {
match &self.backoff {
BackoffStrategy::Fixed { delay_seconds } => *delay_seconds,
BackoffStrategy::Exponential {
base_seconds,
max_seconds,
} => {
let delay = base_seconds.saturating_mul(2u32.saturating_pow(attempt));
delay.min(*max_seconds)
}
BackoffStrategy::ExponentialWithJitter {
base_seconds,
max_seconds,
} => {
let base_delay = base_seconds.saturating_mul(2u32.saturating_pow(attempt));
let capped_delay = base_delay.min(*max_seconds);
// Add jitter: ±25% using true randomness to prevent thundering herd
let jitter_range = capped_delay / 4; // 25% of delay
if jitter_range == 0 {
// No room for jitter on very small delays
return capped_delay;
}
// Cap jitter_range to i32::MAX to prevent overflow when casting
// This ensures safe conversion from i64 random value to i32
let jitter_range_i32 = jitter_range.min(i32::MAX as u32);
// Use true randomness for jitter to prevent multiple workflows
// retrying at exactly the same time (thundering herd problem)
// Use signed arithmetic for true ±25% jitter distribution
use rand::Rng;
let jitter = rand::thread_rng()
.gen_range(-(jitter_range_i32 as i64)..=(jitter_range_i32 as i64))
as i32; // Safe cast: range is within i32::MIN..=i32::MAX
let with_jitter = capped_delay.saturating_add_signed(jitter);
// Ensure we don't accidentally schedule an immediate retry when the
// policy is configured for a non-zero delay. This is specifically
// important for the default policy where base_seconds = 1.
with_jitter.max(1)
}
}
}
/// Return true if the given attempt should retry.
pub fn should_retry(&self, attempt: u32) -> bool {
attempt < self.max_attempts
}
/// Extract retry delay from an error or policy.
pub fn extract_retry_delay(&self, error: &serde_json::Value, attempt: u32) -> u32 {
if let Some(retry_after_val) = error.get("retry_after") {
if let Some(secs) = retry_after_val.as_u64() {
// Use custom delay from error as plain seconds (e.g., Retry-After header)
return secs as u32;
} else if let Some(secs) = retry_after_val.get("secs").and_then(|v| v.as_u64()) {
// Use custom delay from error when serialized as a Duration { secs, nanos }
return secs as u32;
}
}
// Use policy backoff
self.calculate_delay(attempt)
}
}
/// Workflow configuration (future use).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WorkflowConfig {
/// Default retry policy for all steps in the workflow
pub default_step_retry_policy: Option<StepRetryPolicy>,
}