runctl/error.rs
1//! Error types for runctl
2//!
3//! This module defines the error handling strategy for runctl. There are two
4//! error types: `TrainctlError` (main error enum) and `ConfigError` (configuration-specific).
5//!
6//! ## Error Handling Philosophy
7//!
8//! Library code uses `crate::error::Result<T>` which returns `TrainctlError`.
9//! CLI code uses `anyhow::Result<T>` for top-level error handling. The conversion
10//! happens at the CLI boundary using `anyhow::Error::from` to preserve error chains.
11//!
12//! This split exists because:
13//! - Library code benefits from structured error types for programmatic handling
14//! - CLI code benefits from `anyhow`'s context chains and user-friendly display
15//! - Conversion preserves full error information (not just strings)
16//!
17//! ## Retry Awareness
18//!
19//! Errors implement `IsRetryable` to indicate whether an operation should be retried.
20//! The `RetryPolicy` in `src/retry.rs` uses this to determine retry behavior.
21//! Only `CloudProvider`, `Io`, and `Retryable` variants are retryable by default.
22//!
23//! Non-retryable errors (e.g., `Validation`, `Config`) fail immediately to avoid
24//! wasting time on operations that cannot succeed.
25//!
26//! ## When to Use Which Error
27//!
28//! - `ConfigError`: Configuration parsing and validation issues
29//! - Automatically converted to `TrainctlError::Config` via `#[from]`
30//!
31//! - `CloudProvider`: Generic cloud API failures (provider-agnostic)
32//! - Use for provider-agnostic errors that could occur with any cloud
33//! - Retryable by default
34//!
35//! - `Aws`/`S3`/`Ssm`: AWS-specific errors
36//! - Use when AWS-specific context matters for debugging
37//! - `Aws` is retryable (wrapped in `CloudProvider` internally)
38//!
39//! - `ResourceNotFound`/`ResourceExists`: Resource lifecycle errors
40//! - Use when resources don't exist or already exist
41//! - Not retryable (idempotency issues, not transient failures)
42//!
43//! - `Validation`: Input validation failures
44//! - Use for user input validation (instance IDs, paths, etc.)
45//! - Not retryable (invalid input won't become valid)
46
47use crate::provider::ResourceId;
48use thiserror::Error;
49
50/// Main error type for runctl
51#[derive(Error, Debug)]
52pub enum TrainctlError {
53 #[error("Configuration error: {0}")]
54 Config(#[from] ConfigError),
55
56 #[error("Cloud provider error: {provider} - {message}")]
57 CloudProvider {
58 provider: String,
59 message: String,
60 #[source]
61 source: Option<Box<dyn std::error::Error + Send + Sync>>,
62 },
63
64 #[error("Resource error: {resource_type} - {operation} failed")]
65 Resource {
66 resource_type: String,
67 operation: String,
68 resource_id: Option<ResourceId>,
69 message: String,
70 #[source]
71 source: Option<Box<dyn std::error::Error + Send + Sync>>,
72 },
73
74 #[error("Resource already exists: {resource_type} - {resource_id}")]
75 ResourceExists {
76 resource_type: String,
77 resource_id: ResourceId,
78 },
79
80 #[error("Resource not found: {resource_type} - {resource_id}")]
81 ResourceNotFound {
82 resource_type: String,
83 resource_id: ResourceId,
84 },
85
86 #[error("Retryable error (attempt {attempt}/{max_attempts}): {reason}")]
87 // Note: Used in IsRetryable trait implementation, but not yet constructed in code
88 #[allow(dead_code)] // Reserved for future retry logic
89 Retryable {
90 attempt: u32,
91 max_attempts: u32,
92 reason: String,
93 #[source]
94 source: Option<Box<dyn std::error::Error + Send + Sync>>,
95 },
96
97 #[error("I/O error: {0}")]
98 Io(#[from] std::io::Error),
99
100 #[error("AWS SDK error: {0}")]
101 Aws(String),
102
103 #[error("S3 error: {0}")]
104 S3(String),
105
106 #[error("SSM error: {0}")]
107 Ssm(String),
108
109 #[error("Validation error: {field} - {reason}")]
110 Validation { field: String, reason: String },
111
112 #[error("Cost tracking error: {0}")]
113 #[allow(dead_code)] // Reserved for future cost tracking
114 CostTracking(String),
115
116 #[error("Cleanup error: {0}")]
117 #[allow(dead_code)] // Reserved for future cleanup features
118 Cleanup(String),
119
120 #[error("Data transfer error: {0}")]
121 DataTransfer(String),
122
123 #[error("JSON serialization error: {0}")]
124 Json(#[from] serde_json::Error),
125}
126
127/// Configuration-specific errors
128#[derive(Error, Debug)]
129pub enum ConfigError {
130 #[error("Invalid cloud provider: {0}")]
131 #[allow(dead_code)] // Reserved for future provider validation
132 InvalidProvider(String),
133
134 #[error("Missing required field: {0}")]
135 MissingField(String),
136
137 #[error("Invalid value for {field}: {reason}")]
138 InvalidValue { field: String, reason: String },
139
140 #[error("Config file not found: {0}")]
141 #[allow(dead_code)] // Reserved for future resource lookup
142 NotFound(String),
143
144 #[error("Failed to parse config: {0}")]
145 ParseError(String),
146}
147
148/// Result type alias
149pub type Result<T> = std::result::Result<T, TrainctlError>;
150
151/// Trait for determining if an error is retryable
152///
153/// Used by `RetryPolicy` implementations to determine whether an error
154/// should trigger a retry attempt.
155///
156/// This trait is actively used by `src/retry.rs` - do not mark as dead_code.
157pub trait IsRetryable {
158 fn is_retryable(&self) -> bool;
159}
160
161impl IsRetryable for TrainctlError {
162 fn is_retryable(&self) -> bool {
163 matches!(
164 self,
165 TrainctlError::Retryable { .. }
166 | TrainctlError::CloudProvider { .. }
167 | TrainctlError::Io(_)
168 )
169 }
170}
171
172// Helper to convert AWS SDK errors
173// Note: AWS SDK v1 errors are complex, so we handle them manually in code