1use super::traits::{Observer, ObserverEvent, ObserverMetric};
2use prometheus::{
3 Encoder, GaugeVec, Histogram, HistogramOpts, HistogramVec, IntCounterVec, Registry, TextEncoder,
4};
5
6pub struct PrometheusObserver {
8 registry: Registry,
9
10 agent_starts: IntCounterVec,
12 llm_requests: IntCounterVec,
13 tokens_input_total: IntCounterVec,
14 tokens_output_total: IntCounterVec,
15 tool_calls: IntCounterVec,
16 channel_messages: IntCounterVec,
17 heartbeat_ticks: prometheus::IntCounter,
18 errors: IntCounterVec,
19 cache_hits: IntCounterVec,
20 cache_misses: IntCounterVec,
21 cache_tokens_saved: IntCounterVec,
22
23 agent_duration: HistogramVec,
25 tool_duration: HistogramVec,
26 request_latency: Histogram,
27
28 tokens_used: prometheus::IntGauge,
30 active_sessions: GaugeVec,
31 queue_depth: GaugeVec,
32
33 hand_runs: IntCounterVec,
35 hand_duration: HistogramVec,
36 hand_findings: IntCounterVec,
37
38 deployments_total: IntCounterVec,
40 deployment_lead_time: Histogram,
41 deployment_failure_rate: prometheus::Gauge,
42 recovery_time: Histogram,
43 mttr: prometheus::Gauge,
44 deploy_success_count: std::sync::atomic::AtomicU64,
45 deploy_failure_count: std::sync::atomic::AtomicU64,
46}
47
48impl PrometheusObserver {
49 pub fn new() -> Self {
50 let registry = Registry::new();
51
52 let agent_starts = IntCounterVec::new(
53 prometheus::Opts::new("construct_agent_starts_total", "Total agent invocations"),
54 &["provider", "model"],
55 )
56 .expect("valid metric");
57
58 let llm_requests = IntCounterVec::new(
59 prometheus::Opts::new(
60 "construct_llm_requests_total",
61 "Total LLM provider requests",
62 ),
63 &["provider", "model", "success"],
64 )
65 .expect("valid metric");
66
67 let tokens_input_total = IntCounterVec::new(
68 prometheus::Opts::new(
69 "construct_tokens_input_total",
70 "Total input tokens consumed",
71 ),
72 &["provider", "model"],
73 )
74 .expect("valid metric");
75
76 let tokens_output_total = IntCounterVec::new(
77 prometheus::Opts::new(
78 "construct_tokens_output_total",
79 "Total output tokens consumed",
80 ),
81 &["provider", "model"],
82 )
83 .expect("valid metric");
84
85 let tool_calls = IntCounterVec::new(
86 prometheus::Opts::new("construct_tool_calls_total", "Total tool calls"),
87 &["tool", "success"],
88 )
89 .expect("valid metric");
90
91 let channel_messages = IntCounterVec::new(
92 prometheus::Opts::new("construct_channel_messages_total", "Total channel messages"),
93 &["channel", "direction"],
94 )
95 .expect("valid metric");
96
97 let heartbeat_ticks =
98 prometheus::IntCounter::new("construct_heartbeat_ticks_total", "Total heartbeat ticks")
99 .expect("valid metric");
100
101 let errors = IntCounterVec::new(
102 prometheus::Opts::new("construct_errors_total", "Total errors by component"),
103 &["component"],
104 )
105 .expect("valid metric");
106
107 let cache_hits = IntCounterVec::new(
108 prometheus::Opts::new("construct_cache_hits_total", "Total response cache hits"),
109 &["cache_type"],
110 )
111 .expect("valid metric");
112
113 let cache_misses = IntCounterVec::new(
114 prometheus::Opts::new(
115 "construct_cache_misses_total",
116 "Total response cache misses",
117 ),
118 &["cache_type"],
119 )
120 .expect("valid metric");
121
122 let cache_tokens_saved = IntCounterVec::new(
123 prometheus::Opts::new(
124 "construct_cache_tokens_saved_total",
125 "Total tokens saved by response cache",
126 ),
127 &["cache_type"],
128 )
129 .expect("valid metric");
130
131 let agent_duration = HistogramVec::new(
132 HistogramOpts::new(
133 "construct_agent_duration_seconds",
134 "Agent invocation duration in seconds",
135 )
136 .buckets(vec![0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0]),
137 &["provider", "model"],
138 )
139 .expect("valid metric");
140
141 let tool_duration = HistogramVec::new(
142 HistogramOpts::new(
143 "construct_tool_duration_seconds",
144 "Tool execution duration in seconds",
145 )
146 .buckets(vec![0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]),
147 &["tool"],
148 )
149 .expect("valid metric");
150
151 let request_latency = Histogram::with_opts(
152 HistogramOpts::new(
153 "construct_request_latency_seconds",
154 "Request latency in seconds",
155 )
156 .buckets(vec![0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]),
157 )
158 .expect("valid metric");
159
160 let tokens_used = prometheus::IntGauge::new(
161 "construct_tokens_used_last",
162 "Tokens used in the last request",
163 )
164 .expect("valid metric");
165
166 let active_sessions = GaugeVec::new(
167 prometheus::Opts::new("construct_active_sessions", "Number of active sessions"),
168 &[],
169 )
170 .expect("valid metric");
171
172 let queue_depth = GaugeVec::new(
173 prometheus::Opts::new("construct_queue_depth", "Message queue depth"),
174 &[],
175 )
176 .expect("valid metric");
177
178 let hand_runs = IntCounterVec::new(
179 prometheus::Opts::new("construct_hand_runs_total", "Total hand runs by outcome"),
180 &["hand", "success"],
181 )
182 .expect("valid metric");
183
184 let hand_duration = HistogramVec::new(
185 HistogramOpts::new(
186 "construct_hand_duration_seconds",
187 "Hand run duration in seconds",
188 )
189 .buckets(vec![0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0]),
190 &["hand"],
191 )
192 .expect("valid metric");
193
194 let hand_findings = IntCounterVec::new(
195 prometheus::Opts::new(
196 "construct_hand_findings_total",
197 "Total findings produced by hand runs",
198 ),
199 &["hand"],
200 )
201 .expect("valid metric");
202
203 let deployments_total = IntCounterVec::new(
204 prometheus::Opts::new("construct_deployments_total", "Total deployments by status"),
205 &["status"],
206 )
207 .expect("valid metric");
208
209 let deployment_lead_time = Histogram::with_opts(
210 HistogramOpts::new(
211 "construct_deployment_lead_time_seconds",
212 "Deployment lead time from commit to deploy in seconds",
213 )
214 .buckets(vec![
215 60.0, 300.0, 600.0, 1800.0, 3600.0, 7200.0, 14400.0, 43200.0, 86400.0,
216 ]),
217 )
218 .expect("valid metric");
219
220 let deployment_failure_rate = prometheus::Gauge::new(
221 "construct_deployment_failure_rate",
222 "Ratio of failed deployments to total deployments",
223 )
224 .expect("valid metric");
225
226 let recovery_time = Histogram::with_opts(
227 HistogramOpts::new(
228 "construct_recovery_time_seconds",
229 "Time to recover from a failed deployment in seconds",
230 )
231 .buckets(vec![
232 60.0, 300.0, 600.0, 1800.0, 3600.0, 7200.0, 14400.0, 43200.0, 86400.0,
233 ]),
234 )
235 .expect("valid metric");
236
237 let mttr =
238 prometheus::Gauge::new("construct_mttr_seconds", "Mean time to recovery in seconds")
239 .expect("valid metric");
240
241 registry.register(Box::new(agent_starts.clone())).ok();
243 registry.register(Box::new(llm_requests.clone())).ok();
244 registry.register(Box::new(tokens_input_total.clone())).ok();
245 registry
246 .register(Box::new(tokens_output_total.clone()))
247 .ok();
248 registry.register(Box::new(tool_calls.clone())).ok();
249 registry.register(Box::new(channel_messages.clone())).ok();
250 registry.register(Box::new(heartbeat_ticks.clone())).ok();
251 registry.register(Box::new(errors.clone())).ok();
252 registry.register(Box::new(cache_hits.clone())).ok();
253 registry.register(Box::new(cache_misses.clone())).ok();
254 registry.register(Box::new(cache_tokens_saved.clone())).ok();
255 registry.register(Box::new(agent_duration.clone())).ok();
256 registry.register(Box::new(tool_duration.clone())).ok();
257 registry.register(Box::new(request_latency.clone())).ok();
258 registry.register(Box::new(tokens_used.clone())).ok();
259 registry.register(Box::new(active_sessions.clone())).ok();
260 registry.register(Box::new(queue_depth.clone())).ok();
261 registry.register(Box::new(hand_runs.clone())).ok();
262 registry.register(Box::new(hand_duration.clone())).ok();
263 registry.register(Box::new(hand_findings.clone())).ok();
264 registry.register(Box::new(deployments_total.clone())).ok();
265 registry
266 .register(Box::new(deployment_lead_time.clone()))
267 .ok();
268 registry
269 .register(Box::new(deployment_failure_rate.clone()))
270 .ok();
271 registry.register(Box::new(recovery_time.clone())).ok();
272 registry.register(Box::new(mttr.clone())).ok();
273
274 Self {
275 registry,
276 agent_starts,
277 llm_requests,
278 tokens_input_total,
279 tokens_output_total,
280 tool_calls,
281 channel_messages,
282 heartbeat_ticks,
283 errors,
284 cache_hits,
285 cache_misses,
286 cache_tokens_saved,
287 agent_duration,
288 tool_duration,
289 request_latency,
290 tokens_used,
291 active_sessions,
292 queue_depth,
293 hand_runs,
294 hand_duration,
295 hand_findings,
296 deployments_total,
297 deployment_lead_time,
298 deployment_failure_rate,
299 recovery_time,
300 mttr,
301 deploy_success_count: std::sync::atomic::AtomicU64::new(0),
302 deploy_failure_count: std::sync::atomic::AtomicU64::new(0),
303 }
304 }
305
306 pub fn encode(&self) -> String {
308 let encoder = TextEncoder::new();
309 let families = self.registry.gather();
310 let mut buf = Vec::new();
311 encoder.encode(&families, &mut buf).unwrap_or_default();
312 String::from_utf8(buf).unwrap_or_default()
313 }
314}
315
316impl Observer for PrometheusObserver {
317 fn record_event(&self, event: &ObserverEvent) {
318 match event {
319 ObserverEvent::AgentStart { provider, model } => {
320 self.agent_starts
321 .with_label_values(&[provider, model])
322 .inc();
323 }
324 ObserverEvent::AgentEnd {
325 provider,
326 model,
327 duration,
328 tokens_used,
329 cost_usd: _,
330 } => {
331 self.agent_duration
333 .with_label_values(&[provider, model])
334 .observe(duration.as_secs_f64());
335 if let Some(t) = tokens_used {
336 self.tokens_used.set(i64::try_from(*t).unwrap_or(i64::MAX));
337 }
338 }
339 ObserverEvent::LlmResponse {
340 provider,
341 model,
342 success,
343 input_tokens,
344 output_tokens,
345 ..
346 } => {
347 let success_str = if *success { "true" } else { "false" };
348 self.llm_requests
349 .with_label_values(&[provider.as_str(), model.as_str(), success_str])
350 .inc();
351 if let Some(input) = input_tokens {
352 self.tokens_input_total
353 .with_label_values(&[provider.as_str(), model.as_str()])
354 .inc_by(*input);
355 }
356 if let Some(output) = output_tokens {
357 self.tokens_output_total
358 .with_label_values(&[provider.as_str(), model.as_str()])
359 .inc_by(*output);
360 }
361 }
362 ObserverEvent::ToolCallStart { .. }
363 | ObserverEvent::TurnComplete
364 | ObserverEvent::LlmRequest { .. }
365 | ObserverEvent::DeploymentStarted { .. }
366 | ObserverEvent::RecoveryCompleted { .. } => {}
367 ObserverEvent::ToolCall {
368 tool,
369 duration,
370 success,
371 } => {
372 let success_str = if *success { "true" } else { "false" };
373 self.tool_calls
374 .with_label_values(&[tool.as_str(), success_str])
375 .inc();
376 self.tool_duration
377 .with_label_values(&[tool.as_str()])
378 .observe(duration.as_secs_f64());
379 }
380 ObserverEvent::ChannelMessage { channel, direction } => {
381 self.channel_messages
382 .with_label_values(&[channel, direction])
383 .inc();
384 }
385 ObserverEvent::HeartbeatTick => {
386 self.heartbeat_ticks.inc();
387 }
388 ObserverEvent::CacheHit {
389 cache_type,
390 tokens_saved,
391 } => {
392 self.cache_hits.with_label_values(&[cache_type]).inc();
393 self.cache_tokens_saved
394 .with_label_values(&[cache_type])
395 .inc_by(*tokens_saved);
396 }
397 ObserverEvent::CacheMiss { cache_type } => {
398 self.cache_misses.with_label_values(&[cache_type]).inc();
399 }
400 ObserverEvent::Error {
401 component,
402 message: _,
403 } => {
404 self.errors.with_label_values(&[component]).inc();
405 }
406 ObserverEvent::HandStarted { hand_name } => {
407 self.hand_runs
408 .with_label_values(&[hand_name.as_str(), "true"])
409 .inc_by(0); }
411 ObserverEvent::HandCompleted {
412 hand_name,
413 duration_ms,
414 findings_count,
415 } => {
416 self.hand_runs
417 .with_label_values(&[hand_name.as_str(), "true"])
418 .inc();
419 self.hand_duration
420 .with_label_values(&[hand_name.as_str()])
421 .observe(*duration_ms as f64 / 1000.0);
422 self.hand_findings
423 .with_label_values(&[hand_name.as_str()])
424 .inc_by(*findings_count as u64);
425 }
426 ObserverEvent::HandFailed {
427 hand_name,
428 duration_ms,
429 ..
430 } => {
431 self.hand_runs
432 .with_label_values(&[hand_name.as_str(), "false"])
433 .inc();
434 self.hand_duration
435 .with_label_values(&[hand_name.as_str()])
436 .observe(*duration_ms as f64 / 1000.0);
437 }
438 ObserverEvent::DeploymentCompleted { .. } => {
439 self.deployments_total.with_label_values(&["success"]).inc();
440 let s = self
441 .deploy_success_count
442 .fetch_add(1, std::sync::atomic::Ordering::Relaxed)
443 + 1;
444 let f = self
445 .deploy_failure_count
446 .load(std::sync::atomic::Ordering::Relaxed);
447 let total = s + f;
448 if total > 0 {
449 self.deployment_failure_rate.set(f as f64 / total as f64);
450 }
451 }
452 ObserverEvent::DeploymentFailed { .. } => {
453 self.deployments_total.with_label_values(&["failure"]).inc();
454 let f = self
455 .deploy_failure_count
456 .fetch_add(1, std::sync::atomic::Ordering::Relaxed)
457 + 1;
458 let s = self
459 .deploy_success_count
460 .load(std::sync::atomic::Ordering::Relaxed);
461 let total = s + f;
462 if total > 0 {
463 self.deployment_failure_rate.set(f as f64 / total as f64);
464 }
465 }
466 }
467 }
468
469 fn record_metric(&self, metric: &ObserverMetric) {
470 match metric {
471 ObserverMetric::RequestLatency(d) => {
472 self.request_latency.observe(d.as_secs_f64());
473 }
474 ObserverMetric::TokensUsed(t) => {
475 self.tokens_used.set(i64::try_from(*t).unwrap_or(i64::MAX));
476 }
477 ObserverMetric::ActiveSessions(s) => {
478 self.active_sessions
479 .with_label_values(&[] as &[&str])
480 .set(*s as f64);
481 }
482 ObserverMetric::QueueDepth(d) => {
483 self.queue_depth
484 .with_label_values(&[] as &[&str])
485 .set(*d as f64);
486 }
487 ObserverMetric::HandRunDuration {
488 hand_name,
489 duration,
490 } => {
491 self.hand_duration
492 .with_label_values(&[hand_name.as_str()])
493 .observe(duration.as_secs_f64());
494 }
495 ObserverMetric::HandFindingsCount { hand_name, count } => {
496 self.hand_findings
497 .with_label_values(&[hand_name.as_str()])
498 .inc_by(*count);
499 }
500 ObserverMetric::HandSuccessRate { hand_name, success } => {
501 let success_str = if *success { "true" } else { "false" };
502 self.hand_runs
503 .with_label_values(&[hand_name.as_str(), success_str])
504 .inc();
505 }
506 ObserverMetric::DeploymentLeadTime(d) => {
507 self.deployment_lead_time.observe(d.as_secs_f64());
508 }
509 ObserverMetric::RecoveryTime(d) => {
510 self.recovery_time.observe(d.as_secs_f64());
511 self.mttr.set(d.as_secs_f64());
512 }
513 }
514 }
515
516 fn name(&self) -> &str {
517 "prometheus"
518 }
519
520 fn as_any(&self) -> &dyn std::any::Any {
521 self
522 }
523}
524
525#[cfg(test)]
526mod tests {
527 use super::*;
528 use std::time::Duration;
529
530 #[test]
531 fn prometheus_observer_name() {
532 assert_eq!(PrometheusObserver::new().name(), "prometheus");
533 }
534
535 #[test]
536 fn records_all_events_without_panic() {
537 let obs = PrometheusObserver::new();
538 obs.record_event(&ObserverEvent::AgentStart {
539 provider: "openrouter".into(),
540 model: "claude-sonnet".into(),
541 });
542 obs.record_event(&ObserverEvent::AgentEnd {
543 provider: "openrouter".into(),
544 model: "claude-sonnet".into(),
545 duration: Duration::from_millis(500),
546 tokens_used: Some(100),
547 cost_usd: None,
548 });
549 obs.record_event(&ObserverEvent::AgentEnd {
550 provider: "openrouter".into(),
551 model: "claude-sonnet".into(),
552 duration: Duration::ZERO,
553 tokens_used: None,
554 cost_usd: None,
555 });
556 obs.record_event(&ObserverEvent::ToolCall {
557 tool: "shell".into(),
558 duration: Duration::from_millis(10),
559 success: true,
560 });
561 obs.record_event(&ObserverEvent::ToolCall {
562 tool: "file_read".into(),
563 duration: Duration::from_millis(5),
564 success: false,
565 });
566 obs.record_event(&ObserverEvent::ChannelMessage {
567 channel: "telegram".into(),
568 direction: "inbound".into(),
569 });
570 obs.record_event(&ObserverEvent::HeartbeatTick);
571 obs.record_event(&ObserverEvent::Error {
572 component: "provider".into(),
573 message: "timeout".into(),
574 });
575 }
576
577 #[test]
578 fn records_all_metrics_without_panic() {
579 let obs = PrometheusObserver::new();
580 obs.record_metric(&ObserverMetric::RequestLatency(Duration::from_secs(2)));
581 obs.record_metric(&ObserverMetric::TokensUsed(500));
582 obs.record_metric(&ObserverMetric::TokensUsed(0));
583 obs.record_metric(&ObserverMetric::ActiveSessions(3));
584 obs.record_metric(&ObserverMetric::QueueDepth(42));
585 }
586
587 #[test]
588 fn encode_produces_prometheus_text_format() {
589 let obs = PrometheusObserver::new();
590 obs.record_event(&ObserverEvent::AgentStart {
591 provider: "openrouter".into(),
592 model: "claude-sonnet".into(),
593 });
594 obs.record_event(&ObserverEvent::ToolCall {
595 tool: "shell".into(),
596 duration: Duration::from_millis(100),
597 success: true,
598 });
599 obs.record_event(&ObserverEvent::HeartbeatTick);
600 obs.record_metric(&ObserverMetric::RequestLatency(Duration::from_millis(250)));
601
602 let output = obs.encode();
603 assert!(output.contains("construct_agent_starts_total"));
604 assert!(output.contains("construct_tool_calls_total"));
605 assert!(output.contains("construct_heartbeat_ticks_total"));
606 assert!(output.contains("construct_request_latency_seconds"));
607 }
608
609 #[test]
610 fn counters_increment_correctly() {
611 let obs = PrometheusObserver::new();
612
613 for _ in 0..3 {
614 obs.record_event(&ObserverEvent::HeartbeatTick);
615 }
616
617 let output = obs.encode();
618 assert!(output.contains("construct_heartbeat_ticks_total 3"));
619 }
620
621 #[test]
622 fn tool_calls_track_success_and_failure_separately() {
623 let obs = PrometheusObserver::new();
624
625 obs.record_event(&ObserverEvent::ToolCall {
626 tool: "shell".into(),
627 duration: Duration::from_millis(10),
628 success: true,
629 });
630 obs.record_event(&ObserverEvent::ToolCall {
631 tool: "shell".into(),
632 duration: Duration::from_millis(10),
633 success: true,
634 });
635 obs.record_event(&ObserverEvent::ToolCall {
636 tool: "shell".into(),
637 duration: Duration::from_millis(10),
638 success: false,
639 });
640
641 let output = obs.encode();
642 assert!(output.contains(r#"construct_tool_calls_total{success="true",tool="shell"} 2"#));
643 assert!(output.contains(r#"construct_tool_calls_total{success="false",tool="shell"} 1"#));
644 }
645
646 #[test]
647 fn errors_track_by_component() {
648 let obs = PrometheusObserver::new();
649 obs.record_event(&ObserverEvent::Error {
650 component: "provider".into(),
651 message: "timeout".into(),
652 });
653 obs.record_event(&ObserverEvent::Error {
654 component: "provider".into(),
655 message: "rate limit".into(),
656 });
657 obs.record_event(&ObserverEvent::Error {
658 component: "channels".into(),
659 message: "disconnected".into(),
660 });
661
662 let output = obs.encode();
663 assert!(output.contains(r#"construct_errors_total{component="provider"} 2"#));
664 assert!(output.contains(r#"construct_errors_total{component="channels"} 1"#));
665 }
666
667 #[test]
668 fn gauge_reflects_latest_value() {
669 let obs = PrometheusObserver::new();
670 obs.record_metric(&ObserverMetric::TokensUsed(100));
671 obs.record_metric(&ObserverMetric::TokensUsed(200));
672
673 let output = obs.encode();
674 assert!(output.contains("construct_tokens_used_last 200"));
675 }
676
677 #[test]
678 fn llm_response_tracks_request_count_and_tokens() {
679 let obs = PrometheusObserver::new();
680
681 obs.record_event(&ObserverEvent::LlmResponse {
682 provider: "openrouter".into(),
683 model: "claude-sonnet".into(),
684 duration: Duration::from_millis(200),
685 success: true,
686 error_message: None,
687 input_tokens: Some(100),
688 output_tokens: Some(50),
689 });
690 obs.record_event(&ObserverEvent::LlmResponse {
691 provider: "openrouter".into(),
692 model: "claude-sonnet".into(),
693 duration: Duration::from_millis(300),
694 success: true,
695 error_message: None,
696 input_tokens: Some(200),
697 output_tokens: Some(80),
698 });
699
700 let output = obs.encode();
701 assert!(output.contains(
702 r#"construct_llm_requests_total{model="claude-sonnet",provider="openrouter",success="true"} 2"#
703 ));
704 assert!(output.contains(
705 r#"construct_tokens_input_total{model="claude-sonnet",provider="openrouter"} 300"#
706 ));
707 assert!(output.contains(
708 r#"construct_tokens_output_total{model="claude-sonnet",provider="openrouter"} 130"#
709 ));
710 }
711
712 #[test]
713 fn hand_events_track_runs_and_duration() {
714 let obs = PrometheusObserver::new();
715
716 obs.record_event(&ObserverEvent::HandCompleted {
717 hand_name: "review".into(),
718 duration_ms: 1500,
719 findings_count: 3,
720 });
721 obs.record_event(&ObserverEvent::HandCompleted {
722 hand_name: "review".into(),
723 duration_ms: 2000,
724 findings_count: 1,
725 });
726 obs.record_event(&ObserverEvent::HandFailed {
727 hand_name: "review".into(),
728 error: "timeout".into(),
729 duration_ms: 5000,
730 });
731
732 let output = obs.encode();
733 assert!(output.contains(r#"construct_hand_runs_total{hand="review",success="true"} 2"#));
734 assert!(output.contains(r#"construct_hand_runs_total{hand="review",success="false"} 1"#));
735 assert!(output.contains(r#"construct_hand_findings_total{hand="review"} 4"#));
736 assert!(output.contains("construct_hand_duration_seconds"));
737 }
738
739 #[test]
740 fn hand_metrics_record_duration_and_findings() {
741 let obs = PrometheusObserver::new();
742
743 obs.record_metric(&ObserverMetric::HandRunDuration {
744 hand_name: "scan".into(),
745 duration: Duration::from_millis(800),
746 });
747 obs.record_metric(&ObserverMetric::HandFindingsCount {
748 hand_name: "scan".into(),
749 count: 5,
750 });
751 obs.record_metric(&ObserverMetric::HandSuccessRate {
752 hand_name: "scan".into(),
753 success: true,
754 });
755 obs.record_metric(&ObserverMetric::HandSuccessRate {
756 hand_name: "scan".into(),
757 success: false,
758 });
759
760 let output = obs.encode();
761 assert!(output.contains("construct_hand_duration_seconds"));
762 assert!(output.contains(r#"construct_hand_findings_total{hand="scan"} 5"#));
763 assert!(output.contains(r#"construct_hand_runs_total{hand="scan",success="true"} 1"#));
764 assert!(output.contains(r#"construct_hand_runs_total{hand="scan",success="false"} 1"#));
765 }
766
767 #[test]
768 fn llm_response_without_tokens_increments_request_only() {
769 let obs = PrometheusObserver::new();
770
771 obs.record_event(&ObserverEvent::LlmResponse {
772 provider: "ollama".into(),
773 model: "llama3".into(),
774 duration: Duration::from_millis(100),
775 success: false,
776 error_message: Some("timeout".into()),
777 input_tokens: None,
778 output_tokens: None,
779 });
780
781 let output = obs.encode();
782 assert!(output.contains(
783 r#"construct_llm_requests_total{model="llama3",provider="ollama",success="false"} 1"#
784 ));
785 assert!(!output.contains("construct_tokens_input_total{"));
787 assert!(!output.contains("construct_tokens_output_total{"));
788 }
789
790 #[test]
791 fn dora_deployment_events_track_counters() {
792 let obs = PrometheusObserver::new();
793
794 obs.record_event(&ObserverEvent::DeploymentCompleted {
795 deploy_id: "d1".into(),
796 commit_sha: "abc123".into(),
797 });
798 obs.record_event(&ObserverEvent::DeploymentCompleted {
799 deploy_id: "d2".into(),
800 commit_sha: "def456".into(),
801 });
802 obs.record_event(&ObserverEvent::DeploymentFailed {
803 deploy_id: "d3".into(),
804 reason: "timeout".into(),
805 });
806
807 let output = obs.encode();
808 assert!(output.contains(r#"construct_deployments_total{status="success"} 2"#));
809 assert!(output.contains(r#"construct_deployments_total{status="failure"} 1"#));
810 }
811
812 #[test]
813 fn dora_failure_rate_gauge_updates() {
814 let obs = PrometheusObserver::new();
815
816 obs.record_event(&ObserverEvent::DeploymentCompleted {
817 deploy_id: "d1".into(),
818 commit_sha: "abc".into(),
819 });
820 obs.record_event(&ObserverEvent::DeploymentFailed {
821 deploy_id: "d2".into(),
822 reason: "error".into(),
823 });
824
825 let output = obs.encode();
826 assert!(output.contains("construct_deployment_failure_rate 0.5"));
828 }
829
830 #[test]
831 fn dora_lead_time_and_recovery_metrics() {
832 let obs = PrometheusObserver::new();
833
834 obs.record_metric(&ObserverMetric::DeploymentLeadTime(Duration::from_secs(
835 3600,
836 )));
837 obs.record_metric(&ObserverMetric::RecoveryTime(Duration::from_secs(600)));
838
839 let output = obs.encode();
840 assert!(output.contains("construct_deployment_lead_time_seconds"));
841 assert!(output.contains("construct_recovery_time_seconds"));
842 assert!(output.contains("construct_mttr_seconds 600"));
843 }
844
845 #[test]
846 fn dora_started_and_recovery_events_no_panic() {
847 let obs = PrometheusObserver::new();
848
849 obs.record_event(&ObserverEvent::DeploymentStarted {
850 deploy_id: "d1".into(),
851 });
852 obs.record_event(&ObserverEvent::RecoveryCompleted {
853 deploy_id: "d1".into(),
854 });
855 }
856}