1use std::sync::atomic::{AtomicI64, AtomicU64, Ordering};
9
10use dashmap::DashMap;
11
12const DEFAULT_BUCKETS: &[f64] = &[
19 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
20];
21
22#[derive(Debug)]
28pub struct Histogram {
29 buckets: Vec<f64>,
31 counts: Vec<AtomicU64>,
34 sum_bits: AtomicU64,
36 count: AtomicU64,
38}
39
40impl Histogram {
41 pub fn new(mut buckets: Vec<f64>) -> Self {
45 buckets.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
46 buckets.dedup();
47 let counts = buckets.iter().map(|_| AtomicU64::new(0)).collect();
48 Self {
49 buckets,
50 counts,
51 sum_bits: AtomicU64::new(f64::to_bits(0.0)),
52 count: AtomicU64::new(0),
53 }
54 }
55
56 pub fn with_defaults() -> Self {
58 Self::new(DEFAULT_BUCKETS.to_vec())
59 }
60
61 pub fn observe(&self, value: f64) {
63 for (i, upper) in self.buckets.iter().enumerate() {
65 if value <= *upper {
66 self.counts[i].fetch_add(1, Ordering::Relaxed);
67 }
68 }
69
70 loop {
72 let old_bits = self.sum_bits.load(Ordering::Relaxed);
73 let old = f64::from_bits(old_bits);
74 let new = old + value;
75 let new_bits = f64::to_bits(new);
76 if self
77 .sum_bits
78 .compare_exchange_weak(old_bits, new_bits, Ordering::Relaxed, Ordering::Relaxed)
79 .is_ok()
80 {
81 break;
82 }
83 }
84
85 self.count.fetch_add(1, Ordering::Relaxed);
86 }
87
88 pub fn count(&self) -> u64 {
90 self.count.load(Ordering::Relaxed)
91 }
92
93 pub fn sum(&self) -> f64 {
95 f64::from_bits(self.sum_bits.load(Ordering::Relaxed))
96 }
97
98 pub fn bucket_counts(&self) -> Vec<(f64, u64)> {
100 self.buckets
101 .iter()
102 .zip(self.counts.iter())
103 .map(|(b, c)| (*b, c.load(Ordering::Relaxed)))
104 .collect()
105 }
106}
107
108#[derive(Debug)]
120pub struct MetricsRegistry {
121 counters: DashMap<String, AtomicU64>,
123 gauges: DashMap<String, AtomicI64>,
125 histograms: DashMap<String, Histogram>,
127 help: DashMap<String, String>,
129 metric_type: DashMap<String, String>,
131}
132
133impl MetricsRegistry {
134 pub fn new() -> Self {
136 Self {
137 counters: DashMap::new(),
138 gauges: DashMap::new(),
139 histograms: DashMap::new(),
140 help: DashMap::new(),
141 metric_type: DashMap::new(),
142 }
143 }
144
145 pub fn register_counter(&self, name: &str, help: &str) {
149 self.help.insert(name.to_string(), help.to_string());
150 self.metric_type
151 .insert(name.to_string(), "counter".to_string());
152 }
153
154 pub fn register_gauge(&self, name: &str, help: &str) {
156 self.help.insert(name.to_string(), help.to_string());
157 self.metric_type
158 .insert(name.to_string(), "gauge".to_string());
159 }
160
161 pub fn register_histogram(&self, name: &str, help: &str) {
163 self.help.insert(name.to_string(), help.to_string());
164 self.metric_type
165 .insert(name.to_string(), "histogram".to_string());
166 }
167
168 pub fn counter_inc(&self, key: &str) {
172 self.counter_add(key, 1);
173 }
174
175 pub fn counter_add(&self, key: &str, n: u64) {
177 self.counters
178 .entry(key.to_string())
179 .or_insert_with(|| AtomicU64::new(0))
180 .fetch_add(n, Ordering::Relaxed);
181 }
182
183 pub fn counter_with_labels(&self, name: &str, labels: &[(&str, &str)]) {
185 let key = encode_key(name, labels);
186 self.counter_inc(&key);
187 }
188
189 pub fn counter_add_with_labels(&self, name: &str, labels: &[(&str, &str)], n: u64) {
191 let key = encode_key(name, labels);
192 self.counter_add(&key, n);
193 }
194
195 pub fn counter_get(&self, key: &str) -> u64 {
197 self.counters
198 .get(key)
199 .map(|v| v.load(Ordering::Relaxed))
200 .unwrap_or(0)
201 }
202
203 pub fn gauge_set(&self, key: &str, value: i64) {
207 self.gauges
208 .entry(key.to_string())
209 .or_insert_with(|| AtomicI64::new(0))
210 .store(value, Ordering::Relaxed);
211 }
212
213 pub fn gauge_inc(&self, key: &str) {
215 self.gauge_add(key, 1);
216 }
217
218 pub fn gauge_dec(&self, key: &str) {
220 self.gauge_add(key, -1);
221 }
222
223 pub fn gauge_add(&self, key: &str, delta: i64) {
225 self.gauges
226 .entry(key.to_string())
227 .or_insert_with(|| AtomicI64::new(0))
228 .fetch_add(delta, Ordering::Relaxed);
229 }
230
231 pub fn gauge_get(&self, key: &str) -> i64 {
233 self.gauges
234 .get(key)
235 .map(|v| v.load(Ordering::Relaxed))
236 .unwrap_or(0)
237 }
238
239 pub fn histogram_observe(&self, key: &str, value: f64) {
244 self.histograms
245 .entry(key.to_string())
246 .or_insert_with(Histogram::with_defaults)
247 .observe(value);
248 }
249
250 pub fn histogram_observe_with_labels(&self, name: &str, labels: &[(&str, &str)], value: f64) {
252 let key = encode_key(name, labels);
253 self.histogram_observe(&key, value);
254 }
255
256 #[allow(clippy::type_complexity)]
258 pub fn histogram_get(&self, key: &str) -> Option<(Vec<(f64, u64)>, f64, u64)> {
259 self.histograms
260 .get(key)
261 .map(|h| (h.bucket_counts(), h.sum(), h.count()))
262 }
263
264 pub fn export_prometheus(&self) -> String {
268 let mut out = String::new();
269
270 let counter_families: DashMap<String, Vec<(String, u64)>> = DashMap::new();
275 for entry in self.counters.iter() {
276 let full_key = entry.key().clone();
277 let value = entry.value().load(Ordering::Relaxed);
278 let (base, _labels) = split_key(&full_key);
279 counter_families
280 .entry(base.to_string())
281 .or_default()
282 .push((full_key, value));
283 }
284
285 let mut counter_bases: Vec<String> =
286 counter_families.iter().map(|e| e.key().clone()).collect();
287 counter_bases.sort();
288
289 for base in &counter_bases {
290 if let Some(help) = self.help.get(base.as_str()) {
291 out.push_str(&format!("# HELP {} {}\n", base, help.value()));
292 }
293 out.push_str(&format!("# TYPE {} counter\n", base));
294
295 if let Some(entries) = counter_families.get(base) {
296 let mut sorted: Vec<_> = entries.value().clone();
297 sorted.sort_by(|a, b| a.0.cmp(&b.0));
298 for (key, val) in &sorted {
299 out.push_str(&format!("{} {}\n", key, val));
300 }
301 }
302 out.push('\n');
303 }
304
305 let gauge_families: DashMap<String, Vec<(String, i64)>> = DashMap::new();
307 for entry in self.gauges.iter() {
308 let full_key = entry.key().clone();
309 let value = entry.value().load(Ordering::Relaxed);
310 let (base, _labels) = split_key(&full_key);
311 gauge_families
312 .entry(base.to_string())
313 .or_default()
314 .push((full_key, value));
315 }
316
317 let mut gauge_bases: Vec<String> = gauge_families.iter().map(|e| e.key().clone()).collect();
318 gauge_bases.sort();
319
320 for base in &gauge_bases {
321 if let Some(help) = self.help.get(base.as_str()) {
322 out.push_str(&format!("# HELP {} {}\n", base, help.value()));
323 }
324 out.push_str(&format!("# TYPE {} gauge\n", base));
325
326 if let Some(entries) = gauge_families.get(base) {
327 let mut sorted: Vec<_> = entries.value().clone();
328 sorted.sort_by(|a, b| a.0.cmp(&b.0));
329 for (key, val) in &sorted {
330 out.push_str(&format!("{} {}\n", key, val));
331 }
332 }
333 out.push('\n');
334 }
335
336 let histogram_families: DashMap<String, Vec<String>> = DashMap::new();
338 for entry in self.histograms.iter() {
339 let full_key = entry.key().clone();
340 let (base, _labels) = split_key(&full_key);
341 histogram_families
342 .entry(base.to_string())
343 .or_default()
344 .push(full_key);
345 }
346
347 let mut hist_bases: Vec<String> =
348 histogram_families.iter().map(|e| e.key().clone()).collect();
349 hist_bases.sort();
350
351 for base in &hist_bases {
352 if let Some(help) = self.help.get(base.as_str()) {
353 out.push_str(&format!("# HELP {} {}\n", base, help.value()));
354 }
355 out.push_str(&format!("# TYPE {} histogram\n", base));
356
357 if let Some(keys) = histogram_families.get(base) {
358 let mut sorted_keys = keys.value().clone();
359 sorted_keys.sort();
360
361 for key in &sorted_keys {
362 if let Some(h) = self.histograms.get(key.as_str()) {
363 let (_, labels_part) = split_key(key);
364 let label_prefix = if labels_part.is_empty() {
365 String::new()
366 } else {
367 let inner = &labels_part[1..labels_part.len() - 1];
369 format!("{},", inner)
370 };
371
372 for (bound, count) in h.bucket_counts() {
373 out.push_str(&format!(
374 "{}_bucket{{{}le=\"{}\"}} {}\n",
375 base,
376 label_prefix,
377 format_float(bound),
378 count
379 ));
380 }
381 out.push_str(&format!(
383 "{}_bucket{{{}le=\"+Inf\"}} {}\n",
384 base,
385 label_prefix,
386 h.count()
387 ));
388 out.push_str(&format!("{}_sum {}\n", key, format_float(h.sum())));
389 out.push_str(&format!("{}_count {}\n", key, h.count()));
390 }
391 }
392 }
393 out.push('\n');
394 }
395
396 out
397 }
398}
399
400impl Default for MetricsRegistry {
401 fn default() -> Self {
402 Self::new()
403 }
404}
405
406pub const REQUESTS_TOTAL: &str = "punch_requests_total";
413pub const LLM_CALLS_TOTAL: &str = "punch_llm_calls_total";
415pub const TOOL_EXECUTIONS_TOTAL: &str = "punch_tool_executions_total";
417pub const FIGHTER_SPAWNS_TOTAL: &str = "punch_fighter_spawns_total";
419pub const GORILLA_RUNS_TOTAL: &str = "punch_gorilla_runs_total";
421pub const MESSAGES_TOTAL: &str = "punch_messages_total";
423pub const ERRORS_TOTAL: &str = "punch_errors_total";
425pub const TOKENS_INPUT_TOTAL: &str = "punch_tokens_input_total";
427pub const TOKENS_OUTPUT_TOTAL: &str = "punch_tokens_output_total";
429
430pub const ACTIVE_FIGHTERS: &str = "punch_active_fighters";
433pub const ACTIVE_GORILLAS: &str = "punch_active_gorillas";
435pub const ACTIVE_BOUTS: &str = "punch_active_bouts";
437pub const MEMORY_ENTRIES: &str = "punch_memory_entries";
439pub const QUEUE_DEPTH: &str = "punch_queue_depth";
441
442pub const REQUEST_DURATION_SECONDS: &str = "punch_request_duration_seconds";
445pub const LLM_LATENCY_SECONDS: &str = "punch_llm_latency_seconds";
447pub const TOOL_EXECUTION_SECONDS: &str = "punch_tool_execution_seconds";
449
450pub fn register_default_metrics(registry: &MetricsRegistry) {
452 registry.register_counter(REQUESTS_TOTAL, "Total API requests");
454 registry.register_counter(LLM_CALLS_TOTAL, "Total LLM calls");
455 registry.register_counter(TOOL_EXECUTIONS_TOTAL, "Total tool executions");
456 registry.register_counter(FIGHTER_SPAWNS_TOTAL, "Total fighters spawned");
457 registry.register_counter(GORILLA_RUNS_TOTAL, "Total gorilla executions");
458 registry.register_counter(MESSAGES_TOTAL, "Total messages processed");
459 registry.register_counter(ERRORS_TOTAL, "Total errors by type");
460 registry.register_counter(TOKENS_INPUT_TOTAL, "Total input tokens consumed");
461 registry.register_counter(TOKENS_OUTPUT_TOTAL, "Total output tokens consumed");
462
463 registry.register_gauge(ACTIVE_FIGHTERS, "Currently active fighters");
465 registry.register_gauge(ACTIVE_GORILLAS, "Currently rampaging gorillas");
466 registry.register_gauge(ACTIVE_BOUTS, "Open bout sessions");
467 registry.register_gauge(MEMORY_ENTRIES, "Total memory entries");
468 registry.register_gauge(QUEUE_DEPTH, "Task queue depth");
469
470 registry.register_histogram(REQUEST_DURATION_SECONDS, "API request latency");
472 registry.register_histogram(LLM_LATENCY_SECONDS, "LLM call latency");
473 registry.register_histogram(TOOL_EXECUTION_SECONDS, "Tool execution time");
474}
475
476fn encode_key(name: &str, labels: &[(&str, &str)]) -> String {
484 if labels.is_empty() {
485 return name.to_string();
486 }
487 let parts: Vec<String> = labels
488 .iter()
489 .map(|(k, v)| format!("{}=\"{}\"", k, v))
490 .collect();
491 format!("{}{{{}}}", name, parts.join(","))
492}
493
494fn split_key(key: &str) -> (&str, &str) {
499 match key.find('{') {
500 Some(idx) => (&key[..idx], &key[idx..]),
501 None => (key, ""),
502 }
503}
504
505fn format_float(v: f64) -> String {
508 if v == f64::INFINITY {
509 return "+Inf".to_string();
510 }
511 if v == f64::NEG_INFINITY {
512 return "-Inf".to_string();
513 }
514 if v.fract() == 0.0 {
515 format!("{}", v as i64)
517 } else {
518 let s = format!("{}", v);
520 s
521 }
522}
523
524#[cfg(test)]
529mod tests {
530 use super::*;
531 use std::sync::Arc;
532 use std::thread;
533
534 #[test]
535 fn test_counter_increment() {
536 let reg = MetricsRegistry::new();
537 reg.counter_inc("test_counter");
538 assert_eq!(reg.counter_get("test_counter"), 1);
539 reg.counter_inc("test_counter");
540 assert_eq!(reg.counter_get("test_counter"), 2);
541 }
542
543 #[test]
544 fn test_counter_add() {
545 let reg = MetricsRegistry::new();
546 reg.counter_add("test_counter", 10);
547 assert_eq!(reg.counter_get("test_counter"), 10);
548 reg.counter_add("test_counter", 5);
549 assert_eq!(reg.counter_get("test_counter"), 15);
550 }
551
552 #[test]
553 fn test_gauge_set_inc_dec() {
554 let reg = MetricsRegistry::new();
555 reg.gauge_set("test_gauge", 42);
556 assert_eq!(reg.gauge_get("test_gauge"), 42);
557
558 reg.gauge_inc("test_gauge");
559 assert_eq!(reg.gauge_get("test_gauge"), 43);
560
561 reg.gauge_dec("test_gauge");
562 assert_eq!(reg.gauge_get("test_gauge"), 42);
563
564 reg.gauge_add("test_gauge", -10);
565 assert_eq!(reg.gauge_get("test_gauge"), 32);
566 }
567
568 #[test]
569 fn test_histogram_observe_and_buckets() {
570 let reg = MetricsRegistry::new();
571 reg.register_histogram("test_hist", "test histogram");
572
573 reg.histogram_observe("test_hist", 0.003); reg.histogram_observe("test_hist", 0.007); reg.histogram_observe("test_hist", 0.02); reg.histogram_observe("test_hist", 5.5); let (buckets, sum, count) = reg.histogram_get("test_hist").unwrap();
580 assert_eq!(count, 4);
581
582 let expected_sum = 0.003 + 0.007 + 0.02 + 5.5;
584 assert!((sum - expected_sum).abs() < 1e-10);
585
586 assert_eq!(buckets[0], (0.005, 1));
589 assert_eq!(buckets[1], (0.01, 2));
591 assert_eq!(buckets[2], (0.025, 3));
593 assert_eq!(buckets[9], (5.0, 3));
595 assert_eq!(buckets[10], (10.0, 4));
597 }
598
599 #[test]
600 fn test_labeled_metrics() {
601 let reg = MetricsRegistry::new();
602 reg.register_counter("http_requests_total", "Total HTTP requests");
603
604 reg.counter_with_labels(
605 "http_requests_total",
606 &[("method", "GET"), ("status", "200")],
607 );
608 reg.counter_with_labels(
609 "http_requests_total",
610 &[("method", "POST"), ("status", "200")],
611 );
612 reg.counter_with_labels(
613 "http_requests_total",
614 &[("method", "GET"), ("status", "200")],
615 );
616
617 assert_eq!(
618 reg.counter_get("http_requests_total{method=\"GET\",status=\"200\"}"),
619 2
620 );
621 assert_eq!(
622 reg.counter_get("http_requests_total{method=\"POST\",status=\"200\"}"),
623 1
624 );
625 }
626
627 #[test]
628 fn test_prometheus_text_format_counter() {
629 let reg = MetricsRegistry::new();
630 reg.register_counter("punch_requests_total", "Total API requests");
631
632 reg.counter_with_labels(
633 "punch_requests_total",
634 &[("method", "GET"), ("status", "200")],
635 );
636
637 let output = reg.export_prometheus();
638 assert!(output.contains("# HELP punch_requests_total Total API requests"));
639 assert!(output.contains("# TYPE punch_requests_total counter"));
640 assert!(output.contains("punch_requests_total{method=\"GET\",status=\"200\"} 1"));
641 }
642
643 #[test]
644 fn test_prometheus_text_format_gauge() {
645 let reg = MetricsRegistry::new();
646 reg.register_gauge("punch_active_fighters", "Currently active fighters");
647 reg.gauge_set("punch_active_fighters", 5);
648
649 let output = reg.export_prometheus();
650 assert!(output.contains("# HELP punch_active_fighters Currently active fighters"));
651 assert!(output.contains("# TYPE punch_active_fighters gauge"));
652 assert!(output.contains("punch_active_fighters 5"));
653 }
654
655 #[test]
656 fn test_prometheus_text_format_histogram() {
657 let reg = MetricsRegistry::new();
658 reg.register_histogram("punch_request_duration_seconds", "API request latency");
659
660 reg.histogram_observe("punch_request_duration_seconds", 0.02);
661 reg.histogram_observe("punch_request_duration_seconds", 0.08);
662
663 let output = reg.export_prometheus();
664 assert!(output.contains("# HELP punch_request_duration_seconds API request latency"));
665 assert!(output.contains("# TYPE punch_request_duration_seconds histogram"));
666 assert!(output.contains("punch_request_duration_seconds_bucket{le=\"0.025\"} 1"));
667 assert!(output.contains("punch_request_duration_seconds_bucket{le=\"0.1\"} 2"));
668 assert!(output.contains("punch_request_duration_seconds_bucket{le=\"+Inf\"} 2"));
669 assert!(output.contains("punch_request_duration_seconds_sum"));
670 assert!(output.contains("punch_request_duration_seconds_count 2"));
671 }
672
673 #[test]
674 fn test_concurrent_access() {
675 let reg = Arc::new(MetricsRegistry::new());
676 let mut handles = Vec::new();
677
678 for _ in 0..10 {
679 let r = Arc::clone(®);
680 handles.push(thread::spawn(move || {
681 for _ in 0..1000 {
682 r.counter_inc("concurrent_counter");
683 r.gauge_inc("concurrent_gauge");
684 r.histogram_observe("concurrent_hist", 0.1);
685 }
686 }));
687 }
688
689 for h in handles {
690 h.join().unwrap();
691 }
692
693 assert_eq!(reg.counter_get("concurrent_counter"), 10_000);
694 assert_eq!(reg.gauge_get("concurrent_gauge"), 10_000);
695
696 let (_, _, count) = reg.histogram_get("concurrent_hist").unwrap();
697 assert_eq!(count, 10_000);
698 }
699
700 #[test]
701 fn test_zero_value_metrics_display() {
702 let reg = MetricsRegistry::new();
703 reg.register_counter("zero_counter", "A zero counter");
704 reg.counter_add("zero_counter", 0);
706
707 let output = reg.export_prometheus();
708 assert!(output.contains("zero_counter 0"));
709 }
710
711 #[test]
712 fn test_histogram_percentile_via_buckets() {
713 let reg = MetricsRegistry::new();
714 let hist = Histogram::new(vec![1.0, 5.0, 10.0]);
716 reg.histograms.insert("custom_hist".to_string(), hist);
717
718 reg.histogram_observe("custom_hist", 0.5);
720 reg.histogram_observe("custom_hist", 3.0);
721 reg.histogram_observe("custom_hist", 7.0);
722 reg.histogram_observe("custom_hist", 7.0);
723
724 let (buckets, sum, count) = reg.histogram_get("custom_hist").unwrap();
725 assert_eq!(count, 4);
726 assert!((sum - 17.5).abs() < 1e-10);
727
728 assert_eq!(buckets[0], (1.0, 1));
730 assert_eq!(buckets[1], (5.0, 2));
732 assert_eq!(buckets[2], (10.0, 4));
734
735 }
740
741 #[test]
742 fn test_encode_key_no_labels() {
743 assert_eq!(encode_key("my_metric", &[]), "my_metric");
744 }
745
746 #[test]
747 fn test_encode_key_with_labels() {
748 let key = encode_key("my_metric", &[("a", "1"), ("b", "2")]);
749 assert_eq!(key, "my_metric{a=\"1\",b=\"2\"}");
750 }
751
752 #[test]
753 fn test_split_key() {
754 let (base, labels) = split_key("foo{a=\"1\"}");
755 assert_eq!(base, "foo");
756 assert_eq!(labels, "{a=\"1\"}");
757
758 let (base2, labels2) = split_key("bar");
759 assert_eq!(base2, "bar");
760 assert_eq!(labels2, "");
761 }
762
763 #[test]
764 fn test_register_default_metrics() {
765 let reg = MetricsRegistry::new();
766 register_default_metrics(®);
767
768 assert!(reg.help.contains_key(REQUESTS_TOTAL));
770 assert!(reg.help.contains_key(ACTIVE_FIGHTERS));
771 assert!(reg.help.contains_key(REQUEST_DURATION_SECONDS));
772
773 assert_eq!(
775 reg.metric_type
776 .get(REQUESTS_TOTAL)
777 .unwrap()
778 .value()
779 .as_str(),
780 "counter"
781 );
782 assert_eq!(
783 reg.metric_type
784 .get(ACTIVE_FIGHTERS)
785 .unwrap()
786 .value()
787 .as_str(),
788 "gauge"
789 );
790 assert_eq!(
791 reg.metric_type
792 .get(REQUEST_DURATION_SECONDS)
793 .unwrap()
794 .value()
795 .as_str(),
796 "histogram"
797 );
798 }
799}