1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
//! Agent-specific errors
use std::time::Duration;
use thiserror::Error;
/// Agent runtime errors
#[derive(Debug, Error)]
pub enum AgentError {
/// Container not found
#[error("Container '{container}' not found: {reason}")]
NotFound { container: String, reason: String },
/// Failed to pull image
#[error("Failed to pull image '{image}': {reason}")]
PullFailed { image: String, reason: String },
/// Failed to create container
#[error("Failed to create container '{id}': {reason}")]
CreateFailed { id: String, reason: String },
/// Failed to start container
#[error("Failed to start container '{id}': {reason}")]
StartFailed { id: String, reason: String },
/// Container exited unexpectedly
#[error("Container '{id}' exited unexpectedly with code {code}")]
UnexpectedExit { id: String, code: i32 },
/// Health check failed
#[error("Health check failed for '{id}': {reason}")]
HealthCheckFailed { id: String, reason: String },
/// Init action failed
#[error("Init action failed for '{id}': {reason}")]
InitActionFailed { id: String, reason: String },
/// Timeout
#[error("Timeout after {timeout:?}")]
Timeout { timeout: Duration },
/// Dependency timeout - service waiting for dependency condition
#[error("Dependency timeout: '{service}' waiting for '{dependency}' ({condition}) after {timeout:?}")]
DependencyTimeout {
service: String,
dependency: String,
condition: String,
timeout: Duration,
},
/// Invalid spec
#[error("Invalid spec: {0}")]
InvalidSpec(String),
/// Network setup or operation failed
#[error("Network error: {0}")]
Network(String),
/// Configuration error (missing or invalid configuration)
#[error("Configuration error: {0}")]
Configuration(String),
/// Internal runtime error
#[error("Internal error: {0}")]
Internal(String),
/// Operation is not supported by this runtime
#[error("Operation not supported by this runtime: {0}")]
Unsupported(String),
/// The workload cannot run on this node and must be re-placed on a peer
/// that can satisfy `required_os`.
///
/// Returned by [`crate::runtimes::composite::CompositeRuntime::select_for`]
/// when a foreign-OS workload (today: Linux on a Windows node) lands on a
/// node that has no suitable local runtime (e.g. no WSL2 delegate
/// configured). The scheduler is expected to catch this and re-dispatch
/// to a cluster peer whose `NodeState.os` matches `required_os`. When no
/// capable peer exists the scheduler marks the service failed with an
/// actionable message naming both remediations (enable the local WSL2
/// delegate, or add a Linux peer to the cluster).
///
/// This variant is *not* a container failure: the service manager must
/// surface it to the scheduler and must not roll up `CreateFailed` on top
/// of it, otherwise the rescheduling signal is lost.
#[error(
"route-to-peer: service '{service}' requires OS '{required_os}' on another node: {reason}"
)]
RouteToPeer {
/// Service name that needs to be re-placed.
service: String,
/// OS the workload requires (OCI-canonical: `linux` / `windows` / `darwin`).
required_os: String,
/// Human-readable explanation (e.g. "no WSL2 delegate configured on this Windows node").
reason: String,
},
}
pub type Result<T, E = AgentError> = std::result::Result<T, E>;