Skip to main content

zlayer_agent/
error.rs

1//! Agent-specific errors
2
3use std::time::Duration;
4use thiserror::Error;
5
6/// Agent runtime errors
7#[derive(Debug, Error)]
8pub enum AgentError {
9    /// Container not found
10    #[error("Container '{container}' not found: {reason}")]
11    NotFound { container: String, reason: String },
12
13    /// Failed to pull image
14    #[error("Failed to pull image '{image}': {reason}")]
15    PullFailed { image: String, reason: String },
16
17    /// Failed to create container
18    #[error("Failed to create container '{id}': {reason}")]
19    CreateFailed { id: String, reason: String },
20
21    /// Failed to start container
22    #[error("Failed to start container '{id}': {reason}")]
23    StartFailed { id: String, reason: String },
24
25    /// Container exited unexpectedly
26    #[error("Container '{id}' exited unexpectedly with code {code}")]
27    UnexpectedExit { id: String, code: i32 },
28
29    /// Health check failed
30    #[error("Health check failed for '{id}': {reason}")]
31    HealthCheckFailed { id: String, reason: String },
32
33    /// Init action failed
34    #[error("Init action failed for '{id}': {reason}")]
35    InitActionFailed { id: String, reason: String },
36
37    /// Timeout
38    #[error("Timeout after {timeout:?}")]
39    Timeout { timeout: Duration },
40
41    /// Dependency timeout - service waiting for dependency condition
42    #[error("Dependency timeout: '{service}' waiting for '{dependency}' ({condition}) after {timeout:?}")]
43    DependencyTimeout {
44        service: String,
45        dependency: String,
46        condition: String,
47        timeout: Duration,
48    },
49
50    /// Invalid spec
51    #[error("Invalid spec: {0}")]
52    InvalidSpec(String),
53
54    /// Network setup or operation failed
55    #[error("Network error: {0}")]
56    Network(String),
57
58    /// Configuration error (missing or invalid configuration)
59    #[error("Configuration error: {0}")]
60    Configuration(String),
61
62    /// Internal runtime error
63    #[error("Internal error: {0}")]
64    Internal(String),
65
66    /// Operation is not supported by this runtime
67    #[error("Operation not supported by this runtime: {0}")]
68    Unsupported(String),
69
70    /// The workload cannot run on this node and must be re-placed on a peer
71    /// that can satisfy `required_os`.
72    ///
73    /// Returned by [`crate::runtimes::composite::CompositeRuntime::select_for`]
74    /// when a foreign-OS workload (today: Linux on a Windows node) lands on a
75    /// node that has no suitable local runtime (e.g. no WSL2 delegate
76    /// configured). The scheduler is expected to catch this and re-dispatch
77    /// to a cluster peer whose `NodeState.os` matches `required_os`. When no
78    /// capable peer exists the scheduler marks the service failed with an
79    /// actionable message naming both remediations (enable the local WSL2
80    /// delegate, or add a Linux peer to the cluster).
81    ///
82    /// This variant is *not* a container failure: the service manager must
83    /// surface it to the scheduler and must not roll up `CreateFailed` on top
84    /// of it, otherwise the rescheduling signal is lost.
85    #[error(
86        "route-to-peer: service '{service}' requires OS '{required_os}' on another node: {reason}"
87    )]
88    RouteToPeer {
89        /// Service name that needs to be re-placed.
90        service: String,
91        /// OS the workload requires (OCI-canonical: `linux` / `windows` / `darwin`).
92        required_os: String,
93        /// Human-readable explanation (e.g. "no WSL2 delegate configured on this Windows node").
94        reason: String,
95    },
96}
97
98pub type Result<T, E = AgentError> = std::result::Result<T, E>;