d_engine/config/
retry.rs

1use std::fmt::Debug;
2
3use serde::Deserialize;
4use serde::Serialize;
5
6use crate::Error;
7use crate::Result;
8
9/// Configuration for exponential backoff retry strategy
10#[derive(Debug, Serialize, Deserialize, Clone, Copy, Default)]
11pub struct BackoffPolicy {
12    /// Maximum number of retries (0 means unlimited retries)
13    #[serde(default = "default_max_retries")]
14    pub max_retries: usize,
15
16    /// Single operation timeout (unit: milliseconds)
17    #[serde(default = "default_op_timeout_ms")]
18    pub timeout_ms: u64,
19
20    /// Backoff base (unit: milliseconds)
21    #[serde(default = "default_base_delay_ms")]
22    pub base_delay_ms: u64,
23
24    /// Maximum backoff time (unit: milliseconds)
25    #[serde(default = "default_max_delay_ms")]
26    pub max_delay_ms: u64,
27}
28
29/// Domain-specific retry strategy configurations for Raft subsystems
30/// Enables fine-grained control over different RPC types and operations
31#[derive(Serialize, Deserialize, Clone)]
32pub struct RetryPolicies {
33    /// Retry policy for AppendEntries RPC operations
34    /// Governs log replication attempts between leader and followers
35    #[serde(default)]
36    pub append_entries: BackoffPolicy,
37
38    /// Retry policy for RequestVote RPC operations
39    /// Controls election-related communication retry behavior
40    #[serde(default)]
41    pub election: BackoffPolicy,
42
43    /// Retry policy for cluster membership changes
44    /// Requires higher reliability for configuration change operations
45    #[serde(default)]
46    pub membership: BackoffPolicy,
47
48    /// Retry policy for node health checks
49    /// Optimized for frequent liveness detection with lower overhead
50    #[serde(default)]
51    pub healthcheck: BackoffPolicy,
52}
53
54impl Debug for RetryPolicies {
55    fn fmt(
56        &self,
57        f: &mut std::fmt::Formatter<'_>,
58    ) -> std::fmt::Result {
59        f.debug_struct("RetryPolicies").finish()
60    }
61}
62// Default value implementation
63impl Default for RetryPolicies {
64    fn default() -> Self {
65        Self {
66            append_entries: BackoffPolicy {
67                max_retries: 1,
68                timeout_ms: 100,
69                base_delay_ms: 50,
70                max_delay_ms: 1000,
71            },
72            election: BackoffPolicy {
73                max_retries: 3, // Note: `retries` > 3 might prevent a successful election.
74                timeout_ms: 100,
75                base_delay_ms: 50,
76                max_delay_ms: 5000,
77            },
78            membership: BackoffPolicy {
79                max_retries: 120,
80                timeout_ms: 500,
81                base_delay_ms: 3000,
82                max_delay_ms: 60000,
83            },
84            healthcheck: BackoffPolicy {
85                max_retries: 10000,
86                timeout_ms: 100,
87                base_delay_ms: 1000,
88                max_delay_ms: 10000,
89            },
90        }
91    }
92}
93impl BackoffPolicy {
94    /// Validates backoff policy parameters
95    /// # Errors
96    /// Returns `Error::InvalidConfig` when:
97    /// - Timeout exceeds maximum delay
98    /// - Base delay > max delay
99    /// - Infinite retries without proper safeguards
100    pub fn validate(
101        &self,
102        policy_name: &str,
103    ) -> Result<()> {
104        // Validate retry limits
105        if self.max_retries == 0 {
106            return Err(Error::InvalidConfig(format!(
107                "{}: max_retries=0 means infinite retries - dangerous for {} operations",
108                policy_name, policy_name
109            )));
110        }
111
112        // Validate timeout constraints
113        if self.timeout_ms == 0 {
114            return Err(Error::InvalidConfig(format!("{}: timeout_ms cannot be 0", policy_name)));
115        }
116
117        // Validate delay progression
118        if self.base_delay_ms >= self.max_delay_ms {
119            return Err(Error::InvalidConfig(format!(
120                "{}: base_delay_ms({}) must be less than max_delay_ms({})",
121                policy_name, self.base_delay_ms, self.max_delay_ms
122            )));
123        }
124
125        // Ensure reasonable maximums
126        if self.max_delay_ms > 120_000 {
127            // 2 minutes
128            return Err(Error::InvalidConfig(format!(
129                "{}: max_delay_ms({}) exceeds 2min limit",
130                policy_name, self.max_delay_ms
131            )));
132        }
133
134        Ok(())
135    }
136}
137
138impl RetryPolicies {
139    /// Validates all retry policies according to Raft protocol requirements
140    pub fn validate(&self) -> Result<()> {
141        self.validate_append_entries()?;
142        self.validate_election()?;
143        self.validate_membership()?;
144        self.validate_healthcheck()?;
145        Ok(())
146    }
147
148    fn validate_append_entries(&self) -> Result<()> {
149        self.append_entries.validate("append_entries")?;
150
151        Ok(())
152    }
153
154    fn validate_election(&self) -> Result<()> {
155        self.election.validate("election")?;
156
157        Ok(())
158    }
159
160    fn validate_membership(&self) -> Result<()> {
161        self.membership.validate("membership")?;
162
163        Ok(())
164    }
165
166    fn validate_healthcheck(&self) -> Result<()> {
167        self.healthcheck.validate("healthcheck")?;
168
169        Ok(())
170    }
171}
172
173fn default_max_retries() -> usize {
174    3
175}
176fn default_op_timeout_ms() -> u64 {
177    100
178}
179fn default_base_delay_ms() -> u64 {
180    50
181}
182fn default_max_delay_ms() -> u64 {
183    1000
184}