Skip to main content

taskvisor/
error.rs

1//! # Error types used by the taskvisor runtime and tasks.
2//!
3//! This module defines two main error enums:
4//!
5//! - [`RuntimeError`] errors raised by the orchestration runtime itself.
6//! - [`TaskError`] errors raised by individual task executions.
7//!
8//! Both types provide helper methods `as_label` for metrics.
9//! [`TaskError`] has additional methods: `is_retryable()` and `is_fatal()`
10
11use std::time::Duration;
12
13use thiserror::Error;
14
15/// # Errors produced by the taskvisor runtime.
16///
17/// These represent failures in the orchestration system itself.
18#[non_exhaustive]
19#[derive(Error, Debug)]
20pub enum RuntimeError {
21    /// Shutdown grace period was exceeded; some tasks remained stuck and had to be force-terminated.
22    #[error("shutdown timeout {grace:?} exceeded; stuck: {stuck:?}; forcing termination")]
23    GraceExceeded {
24        /// The configured grace duration.
25        grace: Duration,
26        /// List of task names that did not shut down in time.
27        stuck: Vec<String>,
28    },
29    /// Attempted to add a task with a name that already exists in the registry.
30    #[error("task '{name}' already exists in registry")]
31    TaskAlreadyExists {
32        /// The duplicate task name.
33        name: String,
34    },
35    /// Attempted to remove a task that doesn't exist in the registry.
36    #[error("task '{name}' not found in registry")]
37    TaskNotFound {
38        /// The missing task name.
39        name: String,
40    },
41    /// Timeout waiting for task removal confirmation.
42    #[error("timeout waiting for task '{name}' removal after {timeout:?}")]
43    TaskRemoveTimeout {
44        /// Task which timeout on cancel.
45        name: String,
46        // Task timeout duration.
47        timeout: Duration,
48    },
49}
50
51impl RuntimeError {
52    /// Returns a short stable label (snake_case) for use in logs/metrics.
53    pub fn as_label(&self) -> &'static str {
54        match self {
55            RuntimeError::GraceExceeded { .. } => "runtime_grace_exceeded",
56            RuntimeError::TaskAlreadyExists { .. } => "runtime_task_already_exists",
57            RuntimeError::TaskNotFound { .. } => "runtime_task_not_found",
58            RuntimeError::TaskRemoveTimeout { .. } => "runtime_task_remove_timeout",
59        }
60    }
61}
62
63/// # Errors produced by task execution.
64///
65/// These represent failures of individual async tasks managed by the runtime.
66/// Some errors are retryable (`Timeout`, `Fail`), others are considered fatal.
67#[non_exhaustive]
68#[derive(Error, Debug)]
69pub enum TaskError {
70    /// Task execution exceeded its timeout duration.
71    #[error("timed out after {timeout:?}")]
72    Timeout { timeout: Duration },
73
74    /// Non-recoverable fatal error (should not be retried).
75    #[error("fatal error (no retry): {reason}")]
76    Fatal { reason: String },
77
78    /// Task execution failed but may succeed if retried.
79    #[error("execution failed: {reason}")]
80    Fail { reason: String },
81
82    /// Task was canceled due to shut down or parent cancellation.
83    ///
84    /// This is **not an error** in traditional sense, but signals intentional termination.
85    #[error("context canceled")]
86    Canceled,
87}
88
89impl TaskError {
90    /// Returns a short stable label.
91    pub fn as_label(&self) -> &'static str {
92        match self {
93            TaskError::Timeout { .. } => "task_timeout",
94            TaskError::Fatal { .. } => "task_fatal",
95            TaskError::Fail { .. } => "task_failed",
96            TaskError::Canceled => "task_canceled",
97        }
98    }
99
100    /// Indicates whether the error type is safe to retry.
101    pub fn is_retryable(&self) -> bool {
102        matches!(self, TaskError::Timeout { .. } | TaskError::Fail { .. })
103    }
104
105    /// Indicates whether the error is fatal.
106    pub fn is_fatal(&self) -> bool {
107        matches!(self, TaskError::Fatal { .. })
108    }
109}