1use once_cell::sync::Lazy;
7use prometheus::{
8 register_counter_vec, register_gauge_vec, register_histogram_vec, CounterVec, GaugeVec,
9 HistogramVec, Registry,
10};
11
12pub struct ChaosMetrics {
14 pub scenarios_total: CounterVec,
16
17 pub faults_injected_total: CounterVec,
19
20 pub latency_injected: HistogramVec,
22
23 pub rate_limit_violations_total: CounterVec,
25
26 pub circuit_breaker_state: GaugeVec,
28
29 pub bulkhead_concurrent: GaugeVec,
31
32 pub orchestration_step_duration: HistogramVec,
34
35 pub orchestration_executions_total: CounterVec,
37
38 pub active_orchestrations: GaugeVec,
40
41 pub assertion_results_total: CounterVec,
43
44 pub hook_executions_total: CounterVec,
46
47 pub recommendations_total: GaugeVec,
49
50 pub chaos_impact_score: GaugeVec,
52}
53
54impl ChaosMetrics {
55 pub fn new() -> Result<Self, prometheus::Error> {
57 Ok(Self {
58 scenarios_total: register_counter_vec!(
59 "mockforge_chaos_scenarios_total",
60 "Total number of chaos scenarios executed",
61 &["scenario_type", "status"]
62 )?,
63
64 faults_injected_total: register_counter_vec!(
65 "mockforge_chaos_faults_total",
66 "Total number of faults injected",
67 &["fault_type", "endpoint"]
68 )?,
69
70 latency_injected: register_histogram_vec!(
71 "mockforge_chaos_latency_ms",
72 "Latency injected in milliseconds",
73 &["endpoint"],
74 vec![10.0, 50.0, 100.0, 250.0, 500.0, 1000.0, 2500.0, 5000.0, 10000.0]
75 )?,
76
77 rate_limit_violations_total: register_counter_vec!(
78 "mockforge_chaos_rate_limit_violations_total",
79 "Total rate limit violations",
80 &["endpoint"]
81 )?,
82
83 circuit_breaker_state: register_gauge_vec!(
84 "mockforge_chaos_circuit_breaker_state",
85 "Circuit breaker state (0=closed, 1=open, 2=half-open)",
86 &["circuit_name"]
87 )?,
88
89 bulkhead_concurrent: register_gauge_vec!(
90 "mockforge_chaos_bulkhead_concurrent_requests",
91 "Current concurrent requests in bulkhead",
92 &["bulkhead_name"]
93 )?,
94
95 orchestration_step_duration: register_histogram_vec!(
96 "mockforge_chaos_orchestration_step_duration_seconds",
97 "Duration of orchestration steps in seconds",
98 &["orchestration", "step"],
99 vec![0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0]
100 )?,
101
102 orchestration_executions_total: register_counter_vec!(
103 "mockforge_chaos_orchestration_executions_total",
104 "Total orchestration executions",
105 &["orchestration", "status"]
106 )?,
107
108 active_orchestrations: register_gauge_vec!(
109 "mockforge_chaos_active_orchestrations",
110 "Number of active orchestrations",
111 &["orchestration"]
112 )?,
113
114 assertion_results_total: register_counter_vec!(
115 "mockforge_chaos_assertion_results_total",
116 "Total assertion results",
117 &["orchestration", "result"]
118 )?,
119
120 hook_executions_total: register_counter_vec!(
121 "mockforge_chaos_hook_executions_total",
122 "Total hook executions",
123 &["hook_type", "status"]
124 )?,
125
126 recommendations_total: register_gauge_vec!(
127 "mockforge_chaos_recommendations_total",
128 "Number of AI recommendations",
129 &["category", "severity"]
130 )?,
131
132 chaos_impact_score: register_gauge_vec!(
133 "mockforge_chaos_impact_score",
134 "Overall chaos impact score (0.0-1.0)",
135 &["time_window"]
136 )?,
137 })
138 }
139
140 pub fn record_scenario(&self, scenario_type: &str, success: bool) {
142 self.scenarios_total
143 .with_label_values(&[scenario_type, if success { "success" } else { "failure" }])
144 .inc();
145 }
146
147 pub fn record_fault(&self, fault_type: &str, endpoint: &str) {
149 self.faults_injected_total.with_label_values(&[fault_type, endpoint]).inc();
150 }
151
152 pub fn record_latency(&self, endpoint: &str, latency_ms: f64) {
154 self.latency_injected.with_label_values(&[endpoint]).observe(latency_ms);
155 }
156
157 pub fn record_rate_limit_violation(&self, endpoint: &str) {
159 self.rate_limit_violations_total.with_label_values(&[endpoint]).inc();
160 }
161
162 pub fn update_circuit_breaker_state(&self, circuit_name: &str, state: f64) {
164 self.circuit_breaker_state.with_label_values(&[circuit_name]).set(state);
165 }
166
167 pub fn update_bulkhead_concurrent(&self, bulkhead_name: &str, count: f64) {
169 self.bulkhead_concurrent.with_label_values(&[bulkhead_name]).set(count);
170 }
171
172 pub fn record_step_duration(&self, orchestration: &str, step: &str, duration_secs: f64) {
174 self.orchestration_step_duration
175 .with_label_values(&[orchestration, step])
176 .observe(duration_secs);
177 }
178
179 pub fn record_orchestration_execution(&self, orchestration: &str, success: bool) {
181 self.orchestration_executions_total
182 .with_label_values(&[orchestration, if success { "success" } else { "failure" }])
183 .inc();
184 }
185
186 pub fn update_active_orchestrations(&self, orchestration: &str, active: bool) {
188 if active {
189 self.active_orchestrations.with_label_values(&[orchestration]).inc();
190 } else {
191 self.active_orchestrations.with_label_values(&[orchestration]).dec();
192 }
193 }
194
195 pub fn record_assertion(&self, orchestration: &str, passed: bool) {
197 self.assertion_results_total
198 .with_label_values(&[orchestration, if passed { "passed" } else { "failed" }])
199 .inc();
200 }
201
202 pub fn record_hook(&self, hook_type: &str, success: bool) {
204 self.hook_executions_total
205 .with_label_values(&[hook_type, if success { "success" } else { "failure" }])
206 .inc();
207 }
208
209 pub fn update_recommendations(&self, category: &str, severity: &str, count: f64) {
211 self.recommendations_total.with_label_values(&[category, severity]).set(count);
212 }
213
214 pub fn update_impact_score(&self, time_window: &str, score: f64) {
216 self.chaos_impact_score.with_label_values(&[time_window]).set(score);
217 }
218}
219
220impl Default for ChaosMetrics {
221 fn default() -> Self {
222 Self::new().expect("Failed to create chaos metrics")
223 }
224}
225
226pub static CHAOS_METRICS: Lazy<ChaosMetrics> =
228 Lazy::new(|| ChaosMetrics::new().expect("Failed to initialize chaos metrics"));
229
230pub fn registry() -> &'static Registry {
232 prometheus::default_registry()
233}
234
235#[cfg(test)]
236mod tests {
237 use super::*;
238
239 #[test]
240 fn test_metrics_creation() {
241 let _metrics = &*CHAOS_METRICS;
246 }
248
249 #[test]
250 fn test_record_scenario() {
251 let metrics = CHAOS_METRICS.scenarios_total.clone();
252 let before = metrics.with_label_values(&["test", "success"]).get();
253
254 CHAOS_METRICS.record_scenario("test", true);
255
256 let after = metrics.with_label_values(&["test", "success"]).get();
257 assert!(after > before);
258 }
259
260 #[test]
261 fn test_record_latency() {
262 CHAOS_METRICS.record_latency("/api/test", 100.0);
263 }
265}