runctl/
error.rs

1//! Error types for runctl
2//!
3//! This module defines the error handling strategy for runctl. There are two
4//! error types: `TrainctlError` (main error enum) and `ConfigError` (configuration-specific).
5//!
6//! ## Error Handling Philosophy
7//!
8//! Library code uses `crate::error::Result<T>` which returns `TrainctlError`.
9//! CLI code uses `anyhow::Result<T>` for top-level error handling. The conversion
10//! happens at the CLI boundary using `anyhow::Error::from` to preserve error chains.
11//!
12//! This split exists because:
13//! - Library code benefits from structured error types for programmatic handling
14//! - CLI code benefits from `anyhow`'s context chains and user-friendly display
15//! - Conversion preserves full error information (not just strings)
16//!
17//! ## Retry Awareness
18//!
19//! Errors implement `IsRetryable` to indicate whether an operation should be retried.
20//! The `RetryPolicy` in `src/retry.rs` uses this to determine retry behavior.
21//! Only `CloudProvider`, `Io`, and `Retryable` variants are retryable by default.
22//!
23//! Non-retryable errors (e.g., `Validation`, `Config`) fail immediately to avoid
24//! wasting time on operations that cannot succeed.
25//!
26//! ## When to Use Which Error
27//!
28//! - `ConfigError`: Configuration parsing and validation issues
29//!   - Automatically converted to `TrainctlError::Config` via `#[from]`
30//!
31//! - `CloudProvider`: Generic cloud API failures (provider-agnostic)
32//!   - Use for provider-agnostic errors that could occur with any cloud
33//!   - Retryable by default
34//!
35//! - `Aws`/`S3`/`Ssm`: AWS-specific errors
36//!   - Use when AWS-specific context matters for debugging
37//!   - `Aws` is retryable (wrapped in `CloudProvider` internally)
38//!
39//! - `ResourceNotFound`/`ResourceExists`: Resource lifecycle errors
40//!   - Use when resources don't exist or already exist
41//!   - Not retryable (idempotency issues, not transient failures)
42//!
43//! - `Validation`: Input validation failures
44//!   - Use for user input validation (instance IDs, paths, etc.)
45//!   - Not retryable (invalid input won't become valid)
46
47use crate::provider::ResourceId;
48use thiserror::Error;
49
50/// Main error type for runctl
51#[derive(Error, Debug)]
52pub enum TrainctlError {
53    #[error("Configuration error: {0}")]
54    Config(#[from] ConfigError),
55
56    #[error("Cloud provider error: {provider} - {message}")]
57    CloudProvider {
58        provider: String,
59        message: String,
60        #[source]
61        source: Option<Box<dyn std::error::Error + Send + Sync>>,
62    },
63
64    #[error("Resource error: {resource_type} - {operation} failed")]
65    Resource {
66        resource_type: String,
67        operation: String,
68        resource_id: Option<ResourceId>,
69        message: String,
70        #[source]
71        source: Option<Box<dyn std::error::Error + Send + Sync>>,
72    },
73
74    #[error("Resource already exists: {resource_type} - {resource_id}")]
75    ResourceExists {
76        resource_type: String,
77        resource_id: ResourceId,
78    },
79
80    #[error("Resource not found: {resource_type} - {resource_id}")]
81    ResourceNotFound {
82        resource_type: String,
83        resource_id: ResourceId,
84    },
85
86    #[error("Retryable error (attempt {attempt}/{max_attempts}): {reason}")]
87    // Note: Used in IsRetryable trait implementation, but not yet constructed in code
88    #[allow(dead_code)] // Reserved for future retry logic
89    Retryable {
90        attempt: u32,
91        max_attempts: u32,
92        reason: String,
93        #[source]
94        source: Option<Box<dyn std::error::Error + Send + Sync>>,
95    },
96
97    #[error("I/O error: {0}")]
98    Io(#[from] std::io::Error),
99
100    #[error("AWS SDK error: {0}")]
101    Aws(String),
102
103    #[error("S3 error: {0}")]
104    S3(String),
105
106    #[error("SSM error: {0}")]
107    Ssm(String),
108
109    #[error("Validation error: {field} - {reason}")]
110    Validation { field: String, reason: String },
111
112    #[error("Cost tracking error: {0}")]
113    #[allow(dead_code)] // Reserved for future cost tracking
114    CostTracking(String),
115
116    #[error("Cleanup error: {0}")]
117    #[allow(dead_code)] // Reserved for future cleanup features
118    Cleanup(String),
119
120    #[error("Data transfer error: {0}")]
121    DataTransfer(String),
122
123    #[error("JSON serialization error: {0}")]
124    Json(#[from] serde_json::Error),
125}
126
127/// Configuration-specific errors
128#[derive(Error, Debug)]
129pub enum ConfigError {
130    #[error("Invalid cloud provider: {0}")]
131    #[allow(dead_code)] // Reserved for future provider validation
132    InvalidProvider(String),
133
134    #[error("Missing required field: {0}")]
135    MissingField(String),
136
137    #[error("Invalid value for {field}: {reason}")]
138    InvalidValue { field: String, reason: String },
139
140    #[error("Config file not found: {0}")]
141    #[allow(dead_code)] // Reserved for future resource lookup
142    NotFound(String),
143
144    #[error("Failed to parse config: {0}")]
145    ParseError(String),
146}
147
148/// Result type alias
149pub type Result<T> = std::result::Result<T, TrainctlError>;
150
151/// Trait for determining if an error is retryable
152///
153/// Used by `RetryPolicy` implementations to determine whether an error
154/// should trigger a retry attempt.
155///
156/// This trait is actively used by `src/retry.rs` - do not mark as dead_code.
157pub trait IsRetryable {
158    fn is_retryable(&self) -> bool;
159}
160
161impl IsRetryable for TrainctlError {
162    fn is_retryable(&self) -> bool {
163        matches!(
164            self,
165            TrainctlError::Retryable { .. }
166                | TrainctlError::CloudProvider { .. }
167                | TrainctlError::Io(_)
168        )
169    }
170}
171
172// Helper to convert AWS SDK errors
173// Note: AWS SDK v1 errors are complex, so we handle them manually in code