1use std::io;
8
9pub type Result<T> = std::result::Result<T, ClusterError>;
11
12#[derive(Debug, thiserror::Error)]
14pub enum ClusterError {
15 #[error("Task scheduling error: {0}")]
17 SchedulerError(String),
18
19 #[error("Task not found: {0}")]
21 TaskNotFound(String),
22
23 #[error("Task dependency cycle detected: {0}")]
25 DependencyCycle(String),
26
27 #[error("Worker pool error: {0}")]
29 WorkerPoolError(String),
30
31 #[error("Worker not found: {0}")]
33 WorkerNotFound(String),
34
35 #[error("Worker unhealthy: {0}")]
37 WorkerUnhealthy(String),
38
39 #[error("Worker capacity exceeded: {0}")]
41 CapacityExceeded(String),
42
43 #[error("Data locality error: {0}")]
45 DataLocalityError(String),
46
47 #[error("Data not available: {0}")]
49 DataNotAvailable(String),
50
51 #[error("Fault tolerance error: {0}")]
53 FaultToleranceError(String),
54
55 #[error("Maximum retries exceeded for task: {0}")]
57 MaxRetriesExceeded(String),
58
59 #[error("Checkpoint error: {0}")]
61 CheckpointError(String),
62
63 #[error("Cache error: {0}")]
65 CacheError(String),
66
67 #[error("Cache coherency violation: {0}")]
69 CoherencyViolation(String),
70
71 #[error("Replication error: {0}")]
73 ReplicationError(String),
74
75 #[error("Quorum not reached: required {required}, got {actual}")]
77 QuorumNotReached {
78 required: usize,
80 actual: usize,
82 },
83
84 #[error("Replica placement error: {0}")]
86 ReplicaPlacementError(String),
87
88 #[error("Coordinator error: {0}")]
90 CoordinatorError(String),
91
92 #[error("Leader election failed: {0}")]
94 LeaderElectionFailed(String),
95
96 #[error("No leader available")]
98 NoLeader,
99
100 #[error("Consensus error: {0}")]
102 ConsensusError(String),
103
104 #[error("Metrics error: {0}")]
106 MetricsError(String),
107
108 #[error("Serialization error: {0}")]
110 SerializationError(String),
111
112 #[error("Network error: {0}")]
114 NetworkError(String),
115
116 #[error("Operation timed out: {0}")]
118 Timeout(String),
119
120 #[error("Configuration error: {0}")]
122 ConfigError(String),
123
124 #[error("Resource exhausted: {0}")]
126 ResourceExhausted(String),
127
128 #[error("Invalid state: {0}")]
130 InvalidState(String),
131
132 #[error("Task execution error: {0}")]
134 ExecutionError(String),
135
136 #[error("Task cancelled: {0}")]
138 TaskCancelled(String),
139
140 #[error("IO error: {0}")]
142 Io(#[from] io::Error),
143
144 #[error("JSON error: {0}")]
146 Json(#[from] serde_json::Error),
147
148 #[error("Raft error: {0}")]
150 RaftError(String),
151
152 #[error("Quota exceeded: {0}")]
154 QuotaExceeded(String),
155
156 #[error("Reservation not found: {0}")]
158 ReservationNotFound(String),
159
160 #[error("Resource not available: {0}")]
162 ResourceNotAvailable(String),
163
164 #[error("Invalid operation: {0}")]
166 InvalidOperation(String),
167
168 #[error("Invalid configuration: {0}")]
170 InvalidConfiguration(String),
171
172 #[error("Workflow not found: {0}")]
174 WorkflowNotFound(String),
175
176 #[error("Metric not found: {0}")]
178 MetricNotFound(String),
179
180 #[error("Alert not found: {0}")]
182 AlertNotFound(String),
183
184 #[error("Authentication failed: {0}")]
186 AuthenticationFailed(String),
187
188 #[error("Permission denied: {0}")]
190 PermissionDenied(String),
191
192 #[error("Secret not found: {0}")]
194 SecretNotFound(String),
195
196 #[error("Compression error: {0}")]
198 CompressionError(String),
199
200 #[error("Other error: {0}")]
202 Other(String),
203}
204
205impl ClusterError {
206 pub fn is_retryable(&self) -> bool {
208 matches!(
209 self,
210 ClusterError::NetworkError(_)
211 | ClusterError::Timeout(_)
212 | ClusterError::WorkerUnhealthy(_)
213 | ClusterError::QuorumNotReached { .. }
214 | ClusterError::NoLeader
215 | ClusterError::ResourceExhausted(_)
216 )
217 }
218
219 pub fn is_permanent(&self) -> bool {
221 matches!(
222 self,
223 ClusterError::DependencyCycle(_)
224 | ClusterError::TaskNotFound(_)
225 | ClusterError::ConfigError(_)
226 | ClusterError::InvalidState(_)
227 | ClusterError::MaxRetriesExceeded(_)
228 )
229 }
230
231 pub fn requires_failover(&self) -> bool {
233 matches!(
234 self,
235 ClusterError::WorkerNotFound(_)
236 | ClusterError::WorkerUnhealthy(_)
237 | ClusterError::NoLeader
238 | ClusterError::LeaderElectionFailed(_)
239 )
240 }
241}
242
243#[cfg(test)]
244#[allow(clippy::expect_used, clippy::unwrap_used)]
245mod tests {
246 use super::*;
247
248 #[test]
249 fn test_error_retryable() {
250 let err = ClusterError::NetworkError("connection failed".to_string());
251 assert!(err.is_retryable());
252 assert!(!err.is_permanent());
253
254 let err = ClusterError::DependencyCycle("cycle detected".to_string());
255 assert!(!err.is_retryable());
256 assert!(err.is_permanent());
257 }
258
259 #[test]
260 fn test_error_requires_failover() {
261 let err = ClusterError::WorkerNotFound("worker123".to_string());
262 assert!(err.requires_failover());
263
264 let err = ClusterError::NoLeader;
265 assert!(err.requires_failover());
266 }
267
268 #[test]
269 fn test_quorum_error() {
270 let err = ClusterError::QuorumNotReached {
271 required: 3,
272 actual: 2,
273 };
274 assert!(err.is_retryable());
275 assert!(!err.is_permanent());
276 }
277}