Skip to main content

yarli_cli/yarli-observability/src/
metrics.rs

1//! Prometheus metrics for YARLI (Section 15.2).
2//!
3//! All metrics use the `yarli_` prefix. Label types are defined locally so that
4//! the observability crate does not depend on FSM internals — callers pass
5//! string labels derived from FSM states.
6//!
7//! # Metric families
8//!
9//! | Metric | Type | Labels | Section |
10//! |--------|------|--------|---------|
11//! | `yarli_queue_depth` | Gauge | — | Queue |
12//! | `yarli_queue_lease_timeouts_total`* | Counter | — | Queue |
13//! | `yarli_runs_total`* | Counter | `state` | Run/Task |
14//! | `yarli_tasks_total`* | Counter | `state`, `command_class` | Run/Task |
15//! | `yarli_gate_failures_total`* | Counter | `gate` | Run/Task |
16//! | `yarli_commands_total`* | Counter | `command_class`, `exit_reason` | Commands |
17//! | `yarli_command_duration_seconds` | Histogram | `command_class` | Commands |
18//! | `yarli_worktree_state_total`* | Counter | `state` | Git |
19//! | `yarli_merge_attempts_total`* | Counter | `outcome` | Git |
20//! | `yarli_merge_conflicts_total`* | Counter | `conflict_type` | Git |
21//!
22//! *`_total` suffix is auto-appended by prometheus-client for counter types.
23
24use prometheus_client::encoding::EncodeLabelSet;
25use prometheus_client::metrics::counter::Counter;
26use prometheus_client::metrics::family::Family;
27use prometheus_client::metrics::gauge::Gauge;
28use prometheus_client::metrics::histogram::{exponential_buckets, Histogram};
29pub use prometheus_client::registry::Registry;
30
31// ---------------------------------------------------------------------------
32// Label types
33// ---------------------------------------------------------------------------
34
35/// Labels for run-level metrics.
36#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
37pub struct StateLabel {
38    pub state: String,
39}
40
41/// Labels for task-level metrics.
42#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
43pub struct TaskLabel {
44    pub state: String,
45    pub command_class: String,
46}
47
48/// Labels for gate failure metrics.
49#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
50pub struct GateLabel {
51    pub gate: String,
52}
53
54/// Labels for command metrics.
55#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
56pub struct CommandLabel {
57    pub command_class: String,
58    pub exit_reason: String,
59}
60
61/// Labels for command duration histograms (class only).
62#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
63pub struct CommandClassLabel {
64    pub command_class: String,
65}
66
67/// Labels for scheduler tick stages.
68#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
69pub struct TickStageLabel {
70    pub stage: String,
71}
72
73/// Labels for command overhead phases.
74#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
75pub struct CommandOverheadLabel {
76    pub command_class: String,
77    pub phase: String,
78}
79
80/// Labels for store operations.
81#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
82pub struct StoreOpLabel {
83    pub operation: String,
84}
85
86#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
87pub struct RunMetricLabel {
88    pub run_id: String,
89    pub metric: String,
90}
91
92/// Labels for merge outcome metrics.
93#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
94pub struct MergeOutcomeLabel {
95    pub outcome: String,
96}
97
98/// Labels for merge conflict type metrics.
99#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
100pub struct ConflictTypeLabel {
101    pub conflict_type: String,
102}
103
104// ---------------------------------------------------------------------------
105// Metrics registry
106// ---------------------------------------------------------------------------
107
108/// All YARLI Prometheus metrics, registered under a shared [`Registry`].
109///
110/// Clone this struct freely — all metric handles are internally `Arc`-based.
111#[derive(Clone, Debug)]
112pub struct YarliMetrics {
113    // -- Queue --
114    /// Current number of tasks in the queue.
115    pub queue_depth: Gauge,
116    /// Total lease timeouts observed.
117    pub queue_lease_timeouts_total: Counter,
118
119    // -- Scheduler --
120    /// Scheduler tick duration in seconds (labelled by stage).
121    pub scheduler_tick_duration_seconds: Family<TickStageLabel, Histogram>,
122
123    // -- Runs --
124    /// Total run state transitions (labelled by target state).
125    pub runs_total: Family<StateLabel, Counter>,
126
127    // -- Tasks --
128    /// Total task state transitions (labelled by state + command class).
129    pub tasks_total: Family<TaskLabel, Counter>,
130
131    // -- Gates --
132    /// Total gate evaluation failures (labelled by gate name).
133    pub gate_failures_total: Family<GateLabel, Counter>,
134
135    // -- Commands --
136    /// Total command executions (labelled by class + exit reason).
137    pub commands_total: Family<CommandLabel, Counter>,
138    /// Command execution duration in seconds (labelled by class).
139    pub command_duration_seconds: Family<CommandClassLabel, Histogram>,
140    /// Command overhead duration in seconds (labelled by class + phase).
141    pub command_overhead_duration_seconds: Family<CommandOverheadLabel, Histogram>,
142
143    // -- Store --
144    /// Store operation duration in seconds (labelled by operation).
145    pub store_duration_seconds: Family<StoreOpLabel, Histogram>,
146    /// Total slow queries observed (labelled by operation).
147    pub store_slow_queries_total: Family<StoreOpLabel, Counter>,
148
149    // -- Git: Worktree --
150    /// Total worktree state transitions (labelled by target state).
151    pub worktree_state_total: Family<StateLabel, Counter>,
152
153    // -- Git: Merge --
154    /// Total merge attempts (labelled by outcome: done/conflict/aborted).
155    pub merge_attempts_total: Family<MergeOutcomeLabel, Counter>,
156    /// Total merge conflicts (labelled by conflict type).
157    pub merge_conflicts_total: Family<ConflictTypeLabel, Counter>,
158
159    // -- Run metrics --
160    /// Current run-level resource usage totals.
161    pub run_resource_usage: Family<RunMetricLabel, Gauge>,
162    /// Current run-level token usage totals.
163    pub run_token_usage: Family<RunMetricLabel, Gauge>,
164}
165
166impl YarliMetrics {
167    /// Create a new `YarliMetrics` and register all metrics in `registry`.
168    pub fn new(registry: &mut Registry) -> Self {
169        // -- Queue --
170        let queue_depth = Gauge::default();
171        registry.register(
172            "yarli_queue_depth",
173            "Current number of tasks in queue",
174            queue_depth.clone(),
175        );
176
177        // NOTE: prometheus-client auto-appends `_total` suffix to counter names,
178        // so we register without the `_total` suffix.
179        let queue_lease_timeouts_total = Counter::default();
180        registry.register(
181            "yarli_queue_lease_timeouts",
182            "Total number of lease timeouts",
183            queue_lease_timeouts_total.clone(),
184        );
185
186        // -- Scheduler --
187        let scheduler_tick_duration_seconds =
188            Family::<TickStageLabel, Histogram>::new_with_constructor(|| {
189                // Microsecond resolution up to ~1s
190                Histogram::new(exponential_buckets(0.0001, 4.0, 8))
191            });
192        registry.register(
193            "yarli_scheduler_tick_duration_seconds",
194            "Scheduler tick duration in seconds by stage",
195            scheduler_tick_duration_seconds.clone(),
196        );
197
198        // -- Runs --
199        let runs_total = Family::<StateLabel, Counter>::default();
200        registry.register(
201            "yarli_runs",
202            "Total run state transitions by target state",
203            runs_total.clone(),
204        );
205
206        // -- Tasks --
207        let tasks_total = Family::<TaskLabel, Counter>::default();
208        registry.register(
209            "yarli_tasks",
210            "Total task state transitions by state and command class",
211            tasks_total.clone(),
212        );
213
214        // -- Gates --
215        let gate_failures_total = Family::<GateLabel, Counter>::default();
216        registry.register(
217            "yarli_gate_failures",
218            "Total gate evaluation failures by gate name",
219            gate_failures_total.clone(),
220        );
221
222        // -- Commands --
223        let commands_total = Family::<CommandLabel, Counter>::default();
224        registry.register(
225            "yarli_commands",
226            "Total command executions by class and exit reason",
227            commands_total.clone(),
228        );
229
230        let command_duration_seconds =
231            Family::<CommandClassLabel, Histogram>::new_with_constructor(|| {
232                // Exponential buckets: 0.1, 0.2, 0.4, 0.8, 1.6, 3.2, 6.4, 12.8, 25.6, 51.2
233                Histogram::new(exponential_buckets(0.1, 2.0, 10))
234            });
235        registry.register(
236            "yarli_command_duration_seconds",
237            "Command execution duration in seconds",
238            command_duration_seconds.clone(),
239        );
240
241        let command_overhead_duration_seconds =
242            Family::<CommandOverheadLabel, Histogram>::new_with_constructor(|| {
243                // Millisecond resolution up to ~10s
244                Histogram::new(exponential_buckets(0.001, 4.0, 8))
245            });
246        registry.register(
247            "yarli_command_overhead_duration_seconds",
248            "Command overhead duration in seconds by phase",
249            command_overhead_duration_seconds.clone(),
250        );
251
252        // -- Store --
253        let store_duration_seconds =
254            Family::<StoreOpLabel, Histogram>::new_with_constructor(|| {
255                // Millisecond resolution up to ~10s
256                Histogram::new(exponential_buckets(0.001, 4.0, 8))
257            });
258        registry.register(
259            "yarli_store_duration_seconds",
260            "Store operation duration in seconds by operation",
261            store_duration_seconds.clone(),
262        );
263
264        let store_slow_queries_total = Family::<StoreOpLabel, Counter>::default();
265        registry.register(
266            "yarli_store_slow_queries",
267            "Total slow queries observed by operation",
268            store_slow_queries_total.clone(),
269        );
270
271        // -- Git: Worktree --
272        let worktree_state_total = Family::<StateLabel, Counter>::default();
273        registry.register(
274            "yarli_worktree_state",
275            "Total worktree state transitions by target state",
276            worktree_state_total.clone(),
277        );
278
279        // -- Git: Merge --
280        let merge_attempts_total = Family::<MergeOutcomeLabel, Counter>::default();
281        registry.register(
282            "yarli_merge_attempts",
283            "Total merge attempts by outcome",
284            merge_attempts_total.clone(),
285        );
286
287        let merge_conflicts_total = Family::<ConflictTypeLabel, Counter>::default();
288        registry.register(
289            "yarli_merge_conflicts",
290            "Total merge conflicts by conflict type",
291            merge_conflicts_total.clone(),
292        );
293
294        let run_resource_usage = Family::<RunMetricLabel, Gauge>::default();
295        registry.register(
296            "yarli_run_resource_usage",
297            "Current run-level resource usage totals",
298            run_resource_usage.clone(),
299        );
300
301        let run_token_usage = Family::<RunMetricLabel, Gauge>::default();
302        registry.register(
303            "yarli_run_token_usage",
304            "Current run-level token usage totals",
305            run_token_usage.clone(),
306        );
307
308        Self {
309            queue_depth,
310            queue_lease_timeouts_total,
311            scheduler_tick_duration_seconds,
312            runs_total,
313            tasks_total,
314            gate_failures_total,
315            commands_total,
316            command_duration_seconds,
317            command_overhead_duration_seconds,
318            store_duration_seconds,
319            store_slow_queries_total,
320            worktree_state_total,
321            merge_attempts_total,
322            merge_conflicts_total,
323            run_resource_usage,
324            run_token_usage,
325        }
326    }
327
328    /// Set the queue depth gauge.
329    pub fn set_queue_depth(&self, depth: usize) {
330        self.queue_depth.set(depth as i64);
331    }
332
333    // -- Convenience helpers --
334
335    /// Record a run state transition.
336    pub fn record_run_transition(&self, state: &str) {
337        self.runs_total
338            .get_or_create(&StateLabel {
339                state: state.to_string(),
340            })
341            .inc();
342    }
343
344    /// Record a task state transition.
345    pub fn record_task_transition(&self, state: &str, command_class: &str) {
346        self.tasks_total
347            .get_or_create(&TaskLabel {
348                state: state.to_string(),
349                command_class: command_class.to_string(),
350            })
351            .inc();
352    }
353
354    /// Record a gate failure.
355    pub fn record_gate_failure(&self, gate: &str) {
356        self.gate_failures_total
357            .get_or_create(&GateLabel {
358                gate: gate.to_string(),
359            })
360            .inc();
361    }
362
363    /// Record a command execution.
364    pub fn record_command(&self, command_class: &str, exit_reason: &str) {
365        self.commands_total
366            .get_or_create(&CommandLabel {
367                command_class: command_class.to_string(),
368                exit_reason: exit_reason.to_string(),
369            })
370            .inc();
371    }
372
373    /// Record a command duration.
374    pub fn record_command_duration(&self, command_class: &str, duration_secs: f64) {
375        self.command_duration_seconds
376            .get_or_create(&CommandClassLabel {
377                command_class: command_class.to_string(),
378            })
379            .observe(duration_secs);
380    }
381
382    /// Record a worktree state transition.
383    pub fn record_worktree_transition(&self, state: &str) {
384        self.worktree_state_total
385            .get_or_create(&StateLabel {
386                state: state.to_string(),
387            })
388            .inc();
389    }
390
391    /// Record a merge attempt outcome.
392    pub fn record_merge_attempt(&self, outcome: &str) {
393        self.merge_attempts_total
394            .get_or_create(&MergeOutcomeLabel {
395                outcome: outcome.to_string(),
396            })
397            .inc();
398    }
399
400    /// Record a merge conflict.
401    pub fn record_merge_conflict(&self, conflict_type: &str) {
402        self.merge_conflicts_total
403            .get_or_create(&ConflictTypeLabel {
404                conflict_type: conflict_type.to_string(),
405            })
406            .inc();
407    }
408
409    /// Record current run-level resource usage.
410    pub fn set_run_resource_usage(&self, run_id: &str, metric: &str, value: u64) {
411        self.run_resource_usage
412            .get_or_create(&RunMetricLabel {
413                run_id: run_id.to_string(),
414                metric: metric.to_string(),
415            })
416            .set(value as i64);
417    }
418
419    /// Record current run-level token usage.
420    pub fn set_run_token_usage(&self, run_id: &str, metric: &str, value: u64) {
421        self.run_token_usage
422            .get_or_create(&RunMetricLabel {
423                run_id: run_id.to_string(),
424                metric: metric.to_string(),
425            })
426            .set(value as i64);
427    }
428
429    /// Record scheduler tick duration.
430    pub fn record_scheduler_tick_duration(&self, stage: &str, duration_secs: f64) {
431        self.scheduler_tick_duration_seconds
432            .get_or_create(&TickStageLabel {
433                stage: stage.to_string(),
434            })
435            .observe(duration_secs);
436    }
437
438    /// Record command overhead duration.
439    pub fn record_command_overhead_duration(
440        &self,
441        command_class: &str,
442        phase: &str,
443        duration_secs: f64,
444    ) {
445        self.command_overhead_duration_seconds
446            .get_or_create(&CommandOverheadLabel {
447                command_class: command_class.to_string(),
448                phase: phase.to_string(),
449            })
450            .observe(duration_secs);
451    }
452
453    /// Record store operation duration.
454    pub fn record_store_duration(&self, operation: &str, duration_secs: f64) {
455        self.store_duration_seconds
456            .get_or_create(&StoreOpLabel {
457                operation: operation.to_string(),
458            })
459            .observe(duration_secs);
460    }
461
462    /// Record a slow store query.
463    pub fn record_store_slow_query(&self, operation: &str) {
464        self.store_slow_queries_total
465            .get_or_create(&StoreOpLabel {
466                operation: operation.to_string(),
467            })
468            .inc();
469    }
470}
471
472/// Encode the registry contents in Prometheus text exposition format.
473pub fn encode_metrics(registry: &Registry) -> String {
474    let mut buf = String::new();
475    prometheus_client::encoding::text::encode(&mut buf, registry)
476        .expect("encoding to String never fails");
477    buf
478}
479
480// ---------------------------------------------------------------------------
481// Tests
482// ---------------------------------------------------------------------------
483
484#[cfg(test)]
485mod tests {
486    use super::*;
487
488    fn setup() -> (Registry, YarliMetrics) {
489        let mut registry = Registry::default();
490        let metrics = YarliMetrics::new(&mut registry);
491        (registry, metrics)
492    }
493
494    // -- Registration --
495
496    #[test]
497    fn metrics_register_without_panic() {
498        let (_registry, _metrics) = setup();
499    }
500
501    #[test]
502    fn encode_empty_registry() {
503        let (registry, _metrics) = setup();
504        let output = encode_metrics(&registry);
505        // Empty families still encode but no samples
506        assert!(output.contains("yarli_queue_depth"));
507        assert!(output.contains("yarli_queue_lease_timeouts_total"));
508    }
509
510    // -- Queue metrics --
511
512    #[test]
513    fn queue_depth_gauge() {
514        let (registry, metrics) = setup();
515        metrics.queue_depth.set(42);
516        let output = encode_metrics(&registry);
517        assert!(output.contains("yarli_queue_depth 42"));
518    }
519
520    #[test]
521    fn queue_depth_inc_dec() {
522        let (_registry, metrics) = setup();
523        metrics.queue_depth.inc();
524        metrics.queue_depth.inc();
525        metrics.queue_depth.dec();
526        assert_eq!(metrics.queue_depth.get(), 1);
527    }
528
529    #[test]
530    fn queue_lease_timeouts_counter() {
531        let (registry, metrics) = setup();
532        metrics.queue_lease_timeouts_total.inc();
533        metrics.queue_lease_timeouts_total.inc();
534        let output = encode_metrics(&registry);
535        assert!(output.contains("yarli_queue_lease_timeouts_total 2"));
536    }
537
538    // -- Run metrics --
539
540    #[test]
541    fn runs_total_counter() {
542        let (registry, metrics) = setup();
543        metrics.record_run_transition("RUN_ACTIVE");
544        metrics.record_run_transition("RUN_ACTIVE");
545        metrics.record_run_transition("RUN_COMPLETED");
546        let output = encode_metrics(&registry);
547        assert!(output.contains("yarli_runs_total"));
548        assert!(output.contains("RUN_ACTIVE"));
549        assert!(output.contains("RUN_COMPLETED"));
550    }
551
552    // -- Task metrics --
553
554    #[test]
555    fn tasks_total_counter() {
556        let (registry, metrics) = setup();
557        metrics.record_task_transition("TASK_EXECUTING", "cpu");
558        metrics.record_task_transition("TASK_COMPLETE", "io");
559        let output = encode_metrics(&registry);
560        assert!(output.contains("yarli_tasks_total"));
561        assert!(output.contains("TASK_EXECUTING"));
562        assert!(output.contains("TASK_COMPLETE"));
563    }
564
565    // -- Gate metrics --
566
567    #[test]
568    fn gate_failures_counter() {
569        let (registry, metrics) = setup();
570        metrics.record_gate_failure("tests_passed");
571        metrics.record_gate_failure("tests_passed");
572        metrics.record_gate_failure("policy_clean");
573        let output = encode_metrics(&registry);
574        assert!(output.contains("yarli_gate_failures_total"));
575        assert!(output.contains("tests_passed"));
576        assert!(output.contains("policy_clean"));
577    }
578
579    // -- Command metrics --
580
581    #[test]
582    fn commands_total_counter() {
583        let (registry, metrics) = setup();
584        metrics.record_command("io", "exited");
585        metrics.record_command("cpu", "timed_out");
586        let output = encode_metrics(&registry);
587        assert!(output.contains("yarli_commands_total"));
588        assert!(output.contains("exited"));
589        assert!(output.contains("timed_out"));
590    }
591
592    #[test]
593    fn command_duration_histogram() {
594        let (registry, metrics) = setup();
595        metrics.record_command_duration("io", 0.5);
596        metrics.record_command_duration("io", 1.5);
597        metrics.record_command_duration("cpu", 10.0);
598        let output = encode_metrics(&registry);
599        assert!(output.contains("yarli_command_duration_seconds"));
600    }
601
602    // -- Worktree metrics --
603
604    #[test]
605    fn worktree_state_counter() {
606        let (registry, metrics) = setup();
607        metrics.record_worktree_transition("WT_BOUND_HOME");
608        metrics.record_worktree_transition("WT_CLOSED");
609        let output = encode_metrics(&registry);
610        assert!(output.contains("yarli_worktree_state_total"));
611        assert!(output.contains("WT_BOUND_HOME"));
612    }
613
614    // -- Merge metrics --
615
616    #[test]
617    fn merge_attempts_counter() {
618        let (registry, metrics) = setup();
619        metrics.record_merge_attempt("done");
620        metrics.record_merge_attempt("conflict");
621        metrics.record_merge_attempt("aborted");
622        let output = encode_metrics(&registry);
623        assert!(output.contains("yarli_merge_attempts_total"));
624        assert!(output.contains("done"));
625        assert!(output.contains("conflict"));
626    }
627
628    #[test]
629    fn merge_conflicts_counter() {
630        let (registry, metrics) = setup();
631        metrics.record_merge_conflict("text");
632        metrics.record_merge_conflict("rename_rename");
633        metrics.record_merge_conflict("submodule_pointer");
634        let output = encode_metrics(&registry);
635        assert!(output.contains("yarli_merge_conflicts_total"));
636        assert!(output.contains("text"));
637        assert!(output.contains("rename_rename"));
638    }
639
640    // -- Scheduler metrics --
641
642    #[test]
643    fn scheduler_tick_duration_histogram() {
644        let (registry, metrics) = setup();
645        metrics.record_scheduler_tick_duration("scan", 0.1);
646        metrics.record_scheduler_tick_duration("claim", 0.05);
647        let output = encode_metrics(&registry);
648        assert!(output.contains("yarli_scheduler_tick_duration_seconds"));
649        assert!(output.contains("scan"));
650        assert!(output.contains("claim"));
651    }
652
653    // -- Command overhead metrics --
654
655    #[test]
656    fn command_overhead_duration_histogram() {
657        let (registry, metrics) = setup();
658        metrics.record_command_overhead_duration("io", "spawn", 0.01);
659        metrics.record_command_overhead_duration("cpu", "capture", 0.02);
660        let output = encode_metrics(&registry);
661        assert!(output.contains("yarli_command_overhead_duration_seconds"));
662        assert!(output.contains("spawn"));
663        assert!(output.contains("capture"));
664    }
665
666    // -- Store metrics --
667
668    #[test]
669    fn store_metrics() {
670        let (registry, metrics) = setup();
671        metrics.record_store_duration("append", 0.05);
672        metrics.record_store_slow_query("query");
673        let output = encode_metrics(&registry);
674        assert!(output.contains("yarli_store_duration_seconds"));
675        assert!(output.contains("yarli_store_slow_queries"));
676        assert!(output.contains("append"));
677        assert!(output.contains("query"));
678    }
679
680    // -- Clone --
681
682    #[test]
683    fn metrics_are_cloneable_and_share_state() {
684        let (_registry, metrics) = setup();
685        let metrics2 = metrics.clone();
686        metrics.queue_depth.set(99);
687        assert_eq!(metrics2.queue_depth.get(), 99);
688    }
689
690    // -- Full encode --
691
692    #[test]
693    fn full_encode_includes_all_metric_names() {
694        let (registry, metrics) = setup();
695        // Touch each metric family at least once
696        metrics.queue_depth.set(1);
697        metrics.queue_lease_timeouts_total.inc();
698        metrics.record_run_transition("RUN_OPEN");
699        metrics.record_task_transition("TASK_OPEN", "io");
700        metrics.record_gate_failure("tests_passed");
701        metrics.record_command("io", "exited");
702        metrics.record_command_duration("io", 1.0);
703        metrics.record_worktree_transition("WT_CREATING");
704        metrics.record_merge_attempt("done");
705        metrics.record_merge_conflict("text");
706        metrics.record_scheduler_tick_duration("scan", 0.001);
707        metrics.record_command_overhead_duration("io", "spawn", 0.001);
708        metrics.record_store_duration("query", 0.001);
709        metrics.record_store_slow_query("append");
710
711        let output = encode_metrics(&registry);
712
713        let expected_names = [
714            "yarli_queue_depth",
715            "yarli_queue_lease_timeouts_total",
716            "yarli_scheduler_tick_duration_seconds",
717            "yarli_runs_total",
718            "yarli_tasks_total",
719            "yarli_gate_failures_total",
720            "yarli_commands_total",
721            "yarli_command_duration_seconds",
722            "yarli_command_overhead_duration_seconds",
723            "yarli_store_duration_seconds",
724            "yarli_store_slow_queries",
725            "yarli_worktree_state_total",
726            "yarli_merge_attempts_total",
727            "yarli_merge_conflicts_total",
728            "yarli_run_resource_usage",
729            "yarli_run_token_usage",
730        ];
731        for name in &expected_names {
732            assert!(output.contains(name), "missing metric: {name}");
733        }
734    }
735}