solti_runner/metrics/backend.rs
1//! # Metrics backend trait and label types.
2//!
3//! [`MetricsBackend`] is the abstraction for collecting task execution metrics.
4//! Concrete backends (e.g. `solti-prometheus`) implement this trait.
5//!
6//! See the [metrics module](super) for the convenience [`noop_metrics`](super::noop_metrics) constructor.
7
8use std::sync::Arc;
9
10/// Runner implementation type for metrics labeling.
11///
12/// Passed to [`MetricsBackend`] methods so dashboards can slice metrics by runner backend.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
14#[non_exhaustive]
15pub enum RunnerType {
16 /// OS subprocess runner.
17 Subprocess,
18 /// Container (OCI) runner.
19 Container,
20 /// WebAssembly runner.
21 Wasm,
22}
23
24impl RunnerType {
25 /// Return label value for metrics.
26 #[inline]
27 pub fn as_label(self) -> &'static str {
28 match self {
29 Self::Subprocess => "subprocess",
30 Self::Container => "container",
31 Self::Wasm => "wasm",
32 }
33 }
34}
35
36/// Runner setup/teardown error kind for metrics labeling.
37///
38/// Passed to [`MetricsBackend::record_runner_error`] so dashboards can slice errors by a bounded, low-cardinality label rather than free-form strings.
39#[non_exhaustive]
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
41pub enum RunnerErrorKind {
42 /// cgroup v2 preparation (creation / attribute write) failed.
43 CgroupPrepareFailed,
44 /// Applying runner-specific configuration to the task command failed (rlimits, capabilities, namespaces, …).
45 BackendConfigFailed,
46 /// Spawning the child process or actor failed.
47 SpawnFailed,
48 /// Loading the runner module (WASM / container image) failed.
49 ModuleLoadFailed,
50}
51
52impl RunnerErrorKind {
53 /// Return label value for metrics.
54 #[inline]
55 pub fn as_label(self) -> &'static str {
56 match self {
57 Self::CgroupPrepareFailed => "cgroup_prepare_failed",
58 Self::BackendConfigFailed => "backend_config_failed",
59 Self::SpawnFailed => "spawn_failed",
60 Self::ModuleLoadFailed => "module_load_failed",
61 }
62 }
63}
64
65/// Task execution outcome for metrics classification.
66#[non_exhaustive]
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum TaskOutcome {
69 /// Task completed successfully.
70 Success,
71 /// Task failed.
72 Failure,
73 /// Task canceled.
74 Canceled,
75 /// Task timeout.
76 Timeout,
77}
78
79impl TaskOutcome {
80 /// Return label value for metrics.
81 #[inline]
82 pub fn as_label(&self) -> &'static str {
83 match self {
84 TaskOutcome::Success => "success",
85 TaskOutcome::Failure => "failure",
86 TaskOutcome::Canceled => "canceled",
87 TaskOutcome::Timeout => "timeout",
88 }
89 }
90}
91
92/// Backend metrics collection interface.
93///
94/// This trait abstracts metrics collection across different backends.
95/// Implementations are injected via [`crate::BuildContext`] and used by all runners.
96///
97/// ## Also
98///
99/// - [`NoOpMetrics`](super::NoOpMetrics): zero-size default backend.
100/// - [`crate::BuildContext::metrics`]: access the handle from within a runner.
101/// - `solti-prometheus::PrometheusMetrics` is a production Prometheus implementation.
102pub trait MetricsBackend: Send + Sync + 'static {
103 /// Record task spawn event.
104 ///
105 /// Called when a task is submitted and starts executing.
106 fn record_task_started(&self, runner_type: RunnerType);
107
108 /// Record task completion with outcome and duration.
109 ///
110 /// Called when task exits (success, failure, timeout, cancel).
111 fn record_task_completed(
112 &self,
113 runner_type: RunnerType,
114 outcome: TaskOutcome,
115 duration_ms: u64,
116 );
117
118 /// Record runner-specific error during task setup/teardown.
119 ///
120 /// Called when runner fails to spawn/cleanup a task.
121 /// This is separate from task failures (which are `record_task_completed` with `Failure`).
122 fn record_runner_error(&self, runner_type: RunnerType, error_kind: RunnerErrorKind);
123}
124
125/// Shared handle to metrics backend.
126///
127/// Stored in [`crate::BuildContext`] and cloned into each task.
128pub type MetricsHandle = Arc<dyn MetricsBackend>;
129
130#[cfg(test)]
131mod tests {
132 use super::*;
133
134 #[test]
135 fn runner_error_kind_as_label_maps_all_variants() {
136 assert_eq!(
137 RunnerErrorKind::CgroupPrepareFailed.as_label(),
138 "cgroup_prepare_failed"
139 );
140 assert_eq!(
141 RunnerErrorKind::BackendConfigFailed.as_label(),
142 "backend_config_failed"
143 );
144 assert_eq!(RunnerErrorKind::SpawnFailed.as_label(), "spawn_failed");
145 assert_eq!(
146 RunnerErrorKind::ModuleLoadFailed.as_label(),
147 "module_load_failed"
148 );
149 }
150}