solti_runner/metrics/backend.rs
1//! # Metrics backend trait and label types.
2//!
3//! [`MetricsBackend`] is the abstraction for collecting task execution metrics.
4//! Concrete backends (e.g. `solti-prometheus`) implement this trait.
5//!
6//! See the [metrics module](super) for the convenience [`noop_metrics`](super::noop_metrics) constructor.
7
8use std::sync::Arc;
9
10/// Runner implementation type for metrics labeling.
11///
12/// Passed to [`MetricsBackend`] methods so dashboards can slice metrics by runner backend.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
14#[non_exhaustive]
15pub enum RunnerType {
16 /// OS subprocess runner.
17 Subprocess,
18 /// Container (OCI) runner.
19 Container,
20 /// WebAssembly runner.
21 Wasm,
22}
23
24impl RunnerType {
25 /// Return label value for metrics.
26 #[inline]
27 pub fn as_label(self) -> &'static str {
28 match self {
29 Self::Subprocess => "subprocess",
30 Self::Container => "container",
31 Self::Wasm => "wasm",
32 }
33 }
34}
35
36/// Task execution outcome for metrics classification.
37#[non_exhaustive]
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39pub enum TaskOutcome {
40 /// Task completed successfully.
41 Success,
42 /// Task failed.
43 Failure,
44 /// Task canceled.
45 Canceled,
46 /// Task timeout.
47 Timeout,
48}
49
50impl TaskOutcome {
51 /// Return label value for metrics.
52 #[inline]
53 pub fn as_label(&self) -> &'static str {
54 match self {
55 TaskOutcome::Success => "success",
56 TaskOutcome::Failure => "failure",
57 TaskOutcome::Canceled => "canceled",
58 TaskOutcome::Timeout => "timeout",
59 }
60 }
61}
62
63/// Backend metrics collection interface.
64///
65/// This trait abstracts metrics collection across different backends.
66/// Implementations are injected via [`crate::BuildContext`] and used by all runners.
67///
68/// ## Also
69///
70/// - [`NoOpMetrics`](super::NoOpMetrics): zero-size default backend.
71/// - [`crate::BuildContext::metrics`]: access the handle from within a runner.
72/// - `solti-prometheus::PrometheusMetrics` is a production Prometheus implementation.
73pub trait MetricsBackend: Send + Sync + 'static {
74 /// Record task spawn event.
75 ///
76 /// Called when a task is submitted and starts executing.
77 fn record_task_started(&self, runner_type: RunnerType);
78
79 /// Record task completion with outcome and duration.
80 ///
81 /// Called when task exits (success, failure, timeout, cancel).
82 fn record_task_completed(
83 &self,
84 runner_type: RunnerType,
85 outcome: TaskOutcome,
86 duration_ms: u64,
87 );
88
89 /// Record runner-specific error during task setup/teardown.
90 ///
91 /// Called when runner fails to spawn/cleanup a task.
92 /// This is separate from task failures (which are `record_task_completed` with `Failure`).
93 fn record_runner_error(&self, runner_type: RunnerType, error_kind: &str);
94}
95
96/// Shared handle to metrics backend.
97///
98/// Stored in [`crate::BuildContext`] and cloned into each task.
99pub type MetricsHandle = Arc<dyn MetricsBackend>;