1use prometheus_client::encoding::EncodeLabelSet;
7use prometheus_client::metrics::counter::Counter;
8use prometheus_client::metrics::family::Family;
9use prometheus_client::metrics::gauge::Gauge;
10use prometheus_client::metrics::histogram::Histogram;
11use prometheus_client::registry::Registry;
12use std::sync::atomic::AtomicI64;
13
14#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
16pub struct ExecutionLabels {
17 pub operation: String,
19}
20
21#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
23pub struct ErrorLabels {
24 pub error_kind: String,
26}
27
28pub struct ForgeMetrics {
30 pub executions_total: Family<ExecutionLabels, Counter>,
32 pub execution_duration_seconds: Family<ExecutionLabels, Histogram>,
34 pub errors_total: Family<ErrorLabels, Counter>,
36 pub pool_workers_alive: Gauge<i64, AtomicI64>,
38}
39
40impl ForgeMetrics {
41 pub fn new(registry: &mut Registry) -> Self {
43 let executions_total = Family::default();
44 registry.register(
45 "forge_executions_total",
46 "Total sandbox executions",
47 executions_total.clone(),
48 );
49
50 let execution_duration_seconds =
51 Family::<ExecutionLabels, Histogram>::new_with_constructor(|| {
52 Histogram::new(
53 [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 30.0].into_iter(),
54 )
55 });
56 registry.register(
57 "forge_execution_duration_seconds",
58 "Sandbox execution duration",
59 execution_duration_seconds.clone(),
60 );
61
62 let errors_total = Family::default();
63 registry.register(
64 "forge_errors_total",
65 "Total sandbox errors by kind",
66 errors_total.clone(),
67 );
68
69 let pool_workers_alive = Gauge::default();
70 registry.register(
71 "forge_pool_workers_alive",
72 "Current workers alive in pool",
73 pool_workers_alive.clone(),
74 );
75
76 Self {
77 executions_total,
78 execution_duration_seconds,
79 errors_total,
80 pool_workers_alive,
81 }
82 }
83
84 pub fn record_execution(&self, operation: &str, duration_secs: f64) {
86 let labels = ExecutionLabels {
87 operation: operation.to_string(),
88 };
89 self.executions_total.get_or_create(&labels).inc();
90 self.execution_duration_seconds
91 .get_or_create(&labels)
92 .observe(duration_secs);
93 }
94
95 pub fn record_error(&self, error_kind: &str) {
97 let labels = ErrorLabels {
98 error_kind: error_kind.to_string(),
99 };
100 self.errors_total.get_or_create(&labels).inc();
101 }
102}
103
104#[cfg(test)]
105mod tests {
106 use super::*;
107 use prometheus_client::encoding::text::encode;
108
109 #[test]
110 fn metrics_01_forge_metrics_creates_counters() {
111 let mut registry = Registry::default();
112 let metrics = ForgeMetrics::new(&mut registry);
113 let _ = metrics;
115 }
116
117 #[test]
118 fn metrics_02_execution_counter_increments() {
119 let mut registry = Registry::default();
120 let metrics = ForgeMetrics::new(&mut registry);
121 metrics.record_execution("execute", 0.5);
122 metrics.record_execution("execute", 1.0);
123 metrics.record_execution("search", 0.1);
124
125 let labels = ExecutionLabels {
126 operation: "execute".into(),
127 };
128 let count = metrics.executions_total.get_or_create(&labels).get();
129 assert_eq!(count, 2);
130 }
131
132 #[test]
133 fn metrics_03_error_counter_increments_on_failure() {
134 let mut registry = Registry::default();
135 let metrics = ForgeMetrics::new(&mut registry);
136 metrics.record_error("timeout");
137 metrics.record_error("timeout");
138 metrics.record_error("js_error");
139
140 let labels = ErrorLabels {
141 error_kind: "timeout".into(),
142 };
143 let count = metrics.errors_total.get_or_create(&labels).get();
144 assert_eq!(count, 2);
145 }
146
147 #[test]
148 fn metrics_04_pool_gauge_bridges_atomic_counters() {
149 let mut registry = Registry::default();
150 let metrics = ForgeMetrics::new(&mut registry);
151 metrics.pool_workers_alive.set(5);
152 assert_eq!(metrics.pool_workers_alive.get(), 5);
153 }
154
155 #[test]
156 fn metrics_05_duration_histogram_records() {
157 let mut registry = Registry::default();
158 let metrics = ForgeMetrics::new(&mut registry);
159 metrics.record_execution("execute", 0.05);
160 metrics.record_execution("execute", 2.5);
161 }
163
164 #[test]
165 fn metrics_06_metrics_encode_to_text() {
166 let mut registry = Registry::default();
167 let metrics = ForgeMetrics::new(&mut registry);
168 metrics.record_execution("execute", 1.0);
169 metrics.record_error("timeout");
170
171 let mut buf = String::new();
172 encode(&mut buf, ®istry).unwrap();
173
174 assert!(
175 buf.contains("forge_executions_total"),
176 "should contain execution counter: {buf}"
177 );
178 assert!(
179 buf.contains("forge_errors_total"),
180 "should contain error counter: {buf}"
181 );
182 }
183
184 #[test]
185 fn metrics_08_metrics_thread_safe() {
186 let mut registry = Registry::default();
187 let metrics = std::sync::Arc::new(ForgeMetrics::new(&mut registry));
188
189 let m1 = metrics.clone();
190 let h1 = std::thread::spawn(move || {
191 m1.record_execution("execute", 0.1);
192 });
193
194 let m2 = metrics.clone();
195 let h2 = std::thread::spawn(move || {
196 m2.record_error("js_error");
197 });
198
199 h1.join().unwrap();
200 h2.join().unwrap();
201 }
203}