taskvisor/error.rs
1//! # Error types used by the taskvisor runtime and tasks.
2//!
3//! This module defines two main error enums:
4//!
5//! - [`RuntimeError`] errors raised by the orchestration runtime itself.
6//! - [`TaskError`] errors raised by individual task executions.
7//!
8//! Both types provide helper methods `as_label` for metrics.
9//! [`TaskError`] has additional methods: `is_retryable()` and `is_fatal()`
10
11use std::time::Duration;
12
13use thiserror::Error;
14
15/// # Errors produced by the taskvisor runtime.
16///
17/// These represent failures in the orchestration system itself.
18#[non_exhaustive]
19#[derive(Error, Debug)]
20pub enum RuntimeError {
21 /// Shutdown grace period was exceeded; some tasks remained stuck and had to be force-terminated.
22 #[error("shutdown timeout {grace:?} exceeded; stuck: {stuck:?}; forcing termination")]
23 GraceExceeded {
24 /// The configured grace duration.
25 grace: Duration,
26 /// List of task names that did not shut down in time.
27 stuck: Vec<String>,
28 },
29 /// Attempted to add a task with a name that already exists in the registry.
30 #[error("task '{name}' already exists in registry")]
31 TaskAlreadyExists {
32 /// The duplicate task name.
33 name: String,
34 },
35 /// Attempted to remove a task that doesn't exist in the registry.
36 #[error("task '{name}' not found in registry")]
37 TaskNotFound {
38 /// The missing task name.
39 name: String,
40 },
41 /// Timeout waiting for task removal confirmation.
42 #[error("timeout waiting for task '{name}' removal after {timeout:?}")]
43 TaskRemoveTimeout {
44 /// Task which timeout on cancel.
45 name: String,
46 // Task timeout duration.
47 timeout: Duration,
48 },
49}
50
51impl RuntimeError {
52 /// Returns a short stable label (snake_case) for use in logs/metrics.
53 pub fn as_label(&self) -> &'static str {
54 match self {
55 RuntimeError::GraceExceeded { .. } => "runtime_grace_exceeded",
56 RuntimeError::TaskAlreadyExists { .. } => "runtime_task_already_exists",
57 RuntimeError::TaskNotFound { .. } => "runtime_task_not_found",
58 RuntimeError::TaskRemoveTimeout { .. } => "runtime_task_remove_timeout",
59 }
60 }
61}
62
63/// # Errors produced by task execution.
64///
65/// These represent failures of individual async tasks managed by the runtime.
66/// Some errors are retryable (`Timeout`, `Fail`), others are considered fatal.
67#[non_exhaustive]
68#[derive(Error, Debug)]
69pub enum TaskError {
70 /// Task execution exceeded its timeout duration.
71 #[error("timed out after {timeout:?}")]
72 Timeout { timeout: Duration },
73
74 /// Non-recoverable fatal error (should not be retried).
75 #[error("fatal error (no retry): {reason}")]
76 Fatal { reason: String },
77
78 /// Task execution failed but may succeed if retried.
79 #[error("execution failed: {reason}")]
80 Fail { reason: String },
81
82 /// Task was canceled due to shut down or parent cancellation.
83 ///
84 /// This is **not an error** in traditional sense, but signals intentional termination.
85 #[error("context canceled")]
86 Canceled,
87}
88
89impl TaskError {
90 /// Returns a short stable label.
91 pub fn as_label(&self) -> &'static str {
92 match self {
93 TaskError::Timeout { .. } => "task_timeout",
94 TaskError::Fatal { .. } => "task_fatal",
95 TaskError::Fail { .. } => "task_failed",
96 TaskError::Canceled => "task_canceled",
97 }
98 }
99
100 /// Indicates whether the error type is safe to retry.
101 pub fn is_retryable(&self) -> bool {
102 matches!(self, TaskError::Timeout { .. } | TaskError::Fail { .. })
103 }
104
105 /// Indicates whether the error is fatal.
106 pub fn is_fatal(&self) -> bool {
107 matches!(self, TaskError::Fatal { .. })
108 }
109}