1use super::{HealthManager, HealthStatus};
17use crate::Result;
18use std::fmt::Write;
19use std::sync::Arc;
20use std::time::{SystemTime, UNIX_EPOCH};
21
22pub struct HealthMetrics {
24 pub uptime_seconds: f64,
26 pub healthy_components: u64,
28 pub degraded_components: u64,
30 pub unhealthy_components: u64,
32 pub total_components: u64,
34 pub network_peer_count: u64,
36 pub dht_routing_table_size: u64,
38 pub active_connections: u64,
40 pub memory_usage_bytes: u64,
42 pub cpu_usage_percent: f64,
44 pub bandwidth_usage_bps: u64,
46 pub storage_free_bytes: u64,
48 pub dht_ops_per_second: f64,
50}
51
52pub struct PrometheusExporter {
54 health_manager: Arc<HealthManager>,
55}
56
57impl PrometheusExporter {
58 pub fn new(health_manager: Arc<HealthManager>) -> Self {
60 Self { health_manager }
61 }
62
63 pub async fn export(&self) -> Result<String> {
65 let health = self.health_manager.get_health().await?;
66 let debug_info = self.health_manager.get_debug_info().await?;
67
68 let mut output = String::with_capacity(4096);
69
70 writeln!(
72 &mut output,
73 "# HELP p2p_node_info Node information\n# TYPE p2p_node_info gauge\np2p_node_info{{version=\"{}\",os=\"{}\",arch=\"{}\"}} 1",
74 health.version,
75 debug_info.system.os,
76 debug_info.system.arch
77 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
78
79 writeln!(
81 &mut output,
82 "\n# HELP p2p_uptime_seconds Node uptime in seconds\n# TYPE p2p_uptime_seconds counter\np2p_uptime_seconds {}",
83 health.uptime.as_secs_f64()
84 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
85
86 let mut healthy = 0u64;
88 let mut degraded = 0u64;
89 let mut unhealthy = 0u64;
90
91 for component in health.checks.values() {
92 match component.status {
93 HealthStatus::Healthy => healthy += 1,
94 HealthStatus::Degraded => degraded += 1,
95 HealthStatus::Unhealthy => unhealthy += 1,
96 }
97 }
98
99 writeln!(
100 &mut output,
101 "\n# HELP p2p_health_status Health status of components (1=healthy, 0=unhealthy)\n# TYPE p2p_health_status gauge"
102 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
103
104 for (name, component) in &health.checks {
105 let value = match component.status {
106 HealthStatus::Healthy => 1,
107 HealthStatus::Degraded => 0, HealthStatus::Unhealthy => 0,
109 };
110 writeln!(
111 &mut output,
112 "p2p_health_status{{component=\"{}\"}} {}",
113 name, value
114 )
115 .map_err(|e| {
116 crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into())
117 })?;
118 }
119
120 writeln!(
122 &mut output,
123 "\n# HELP p2p_health_check_latency_ms Health check latency in milliseconds\n# TYPE p2p_health_check_latency_ms gauge"
124 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
125
126 for (name, component) in &health.checks {
127 writeln!(
128 &mut output,
129 "p2p_health_check_latency_ms{{component=\"{}\"}} {}",
130 name, component.latency_ms
131 )
132 .map_err(|e| {
133 crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into())
134 })?;
135 }
136
137 writeln!(
139 &mut output,
140 "\n# HELP p2p_healthy_components Number of healthy components\n# TYPE p2p_healthy_components gauge\np2p_healthy_components {}",
141 healthy
142 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
143
144 writeln!(
145 &mut output,
146 "\n# HELP p2p_degraded_components Number of degraded components\n# TYPE p2p_degraded_components gauge\np2p_degraded_components {}",
147 degraded
148 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
149
150 writeln!(
151 &mut output,
152 "\n# HELP p2p_unhealthy_components Number of unhealthy components\n# TYPE p2p_unhealthy_components gauge\np2p_unhealthy_components {}",
153 unhealthy
154 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
155
156 writeln!(
158 &mut output,
159 "\n# HELP p2p_system_cpu_count Number of CPU cores\n# TYPE p2p_system_cpu_count gauge\np2p_system_cpu_count {}",
160 debug_info.system.cpu_count
161 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
162
163 writeln!(
164 &mut output,
165 "\n# HELP p2p_system_memory_total_bytes Total system memory in bytes\n# TYPE p2p_system_memory_total_bytes gauge\np2p_system_memory_total_bytes {}",
166 debug_info.system.total_memory
167 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
168
169 writeln!(
170 &mut output,
171 "\n# HELP p2p_system_memory_available_bytes Available system memory in bytes\n# TYPE p2p_system_memory_available_bytes gauge\np2p_system_memory_available_bytes {}",
172 debug_info.system.available_memory
173 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
174
175 writeln!(
177 &mut output,
178 "\n# HELP p2p_runtime_threads Number of runtime threads\n# TYPE p2p_runtime_threads gauge\np2p_runtime_threads {}",
179 debug_info.runtime.thread_count
180 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
181
182 writeln!(
183 &mut output,
184 "\n# HELP p2p_runtime_memory_usage_bytes Runtime memory usage in bytes\n# TYPE p2p_runtime_memory_usage_bytes gauge\np2p_runtime_memory_usage_bytes {}",
185 debug_info.runtime.memory_usage
186 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
187
188 for (name, component) in &health.checks {
190 for (key, value) in &component.metadata {
191 if let Some(num) = value.as_u64() {
192 writeln!(
193 &mut output,
194 "\n# HELP p2p_{}_{} Component-specific metric\n# TYPE p2p_{}_{} gauge\np2p_{}_{} {}",
195 name, key, name, key, name, key, num
196 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
197 } else if let Some(num) = value.as_f64() {
198 writeln!(
199 &mut output,
200 "\n# HELP p2p_{}_{} Component-specific metric\n# TYPE p2p_{}_{} gauge\np2p_{}_{} {}",
201 name, key, name, key, name, key, num
202 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
203 }
204 }
205 }
206
207 let timestamp = SystemTime::now()
209 .duration_since(UNIX_EPOCH)
210 .map_err(|e| {
211 crate::P2PError::Internal(format!("Failed to get timestamp: {}", e).into())
212 })?
213 .as_secs();
214
215 writeln!(
216 &mut output,
217 "\n# HELP p2p_last_scrape_timestamp_seconds Unix timestamp of last scrape\n# TYPE p2p_last_scrape_timestamp_seconds gauge\np2p_last_scrape_timestamp_seconds {}",
218 timestamp
219 ).map_err(|e| crate::P2PError::Internal(format!("Failed to write metrics: {}", e).into()))?;
220
221 Ok(output)
222 }
223
224 pub async fn export_metrics(&self) -> Result<HealthMetrics> {
226 let health = self.health_manager.get_health().await?;
227 let debug_info = self.health_manager.get_debug_info().await?;
228
229 let mut healthy = 0u64;
230 let mut degraded = 0u64;
231 let mut unhealthy = 0u64;
232
233 for component in health.checks.values() {
234 match component.status {
235 HealthStatus::Healthy => healthy += 1,
236 HealthStatus::Degraded => degraded += 1,
237 HealthStatus::Unhealthy => unhealthy += 1,
238 }
239 }
240
241 let mut network_peer_count = 0u64;
243 let mut dht_routing_table_size = 0u64;
244 let mut active_connections = 0u64;
245 let mut dht_ops_per_second = 0.0;
246 let mut bandwidth_usage_bps = 0u64;
247 let mut storage_free_bytes = 0u64;
248
249 for (name, component) in &health.checks {
250 match name.as_str() {
251 "network" => {
252 if let Some(count) = component
253 .metadata
254 .get("peer_count")
255 .and_then(|v| v.as_u64())
256 {
257 network_peer_count = count;
258 }
259 if let Some(count) = component
260 .metadata
261 .get("active_connections")
262 .and_then(|v| v.as_u64())
263 {
264 active_connections = count;
265 }
266 }
267 "dht" => {
268 if let Some(size) = component
269 .metadata
270 .get("routing_table_size")
271 .and_then(|v| v.as_u64())
272 {
273 dht_routing_table_size = size;
274 }
275 }
276 "resources" => {
277 if let Some(ops) = component
278 .metadata
279 .get("dht_ops_per_sec")
280 .and_then(|v| v.as_f64())
281 {
282 dht_ops_per_second = ops;
283 }
284
285 if let Some(bw) = component
286 .metadata
287 .get("bandwidth_usage")
288 .and_then(|v| v.as_u64())
289 {
290 bandwidth_usage_bps = bw;
291 }
292 }
293 "storage" => {
294 if let Some(free) = component
295 .metadata
296 .get("free_space")
297 .and_then(|v| v.as_u64())
298 {
299 storage_free_bytes = free;
300 }
301 }
302 _ => {}
303 }
304 }
305
306 Ok(HealthMetrics {
307 uptime_seconds: health.uptime.as_secs_f64(),
308 healthy_components: healthy,
309 degraded_components: degraded,
310 unhealthy_components: unhealthy,
311 total_components: health.checks.len() as u64,
312 network_peer_count,
313 dht_routing_table_size,
314 active_connections,
315 memory_usage_bytes: debug_info.runtime.memory_usage,
316 cpu_usage_percent: 0.0, bandwidth_usage_bps,
318 storage_free_bytes,
319 dht_ops_per_second,
320 })
321 }
322}
323
324#[cfg(test)]
325mod tests {
326 use super::*;
327 use crate::health::HealthManager;
328
329 #[tokio::test]
330 async fn test_prometheus_export_basic() {
331 let health_manager = Arc::new(HealthManager::new("1.0.0".to_string()));
332 let exporter = PrometheusExporter::new(health_manager);
333
334 let metrics = exporter.export().await.unwrap();
335
336 assert!(metrics.contains("# HELP p2p_node_info"));
338 assert!(metrics.contains("# TYPE p2p_node_info gauge"));
339 assert!(metrics.contains("p2p_node_info{"));
340
341 assert!(metrics.contains("# HELP p2p_uptime_seconds"));
342 assert!(metrics.contains("# TYPE p2p_uptime_seconds counter"));
343 assert!(metrics.contains("p2p_uptime_seconds"));
344
345 assert!(metrics.contains("# HELP p2p_health_status"));
346 assert!(metrics.contains("# TYPE p2p_health_status gauge"));
347
348 assert!(metrics.contains("# HELP p2p_last_scrape_timestamp_seconds"));
349 assert!(metrics.contains("# TYPE p2p_last_scrape_timestamp_seconds gauge"));
350 }
351
352 #[tokio::test]
353 async fn test_prometheus_export_with_components() {
354 let health_manager = Arc::new(HealthManager::new("1.0.0".to_string()));
355
356 struct MockChecker;
358 #[async_trait::async_trait]
359 impl crate::health::checks::ComponentChecker for MockChecker {
360 async fn check(&self) -> Result<HealthStatus> {
361 Ok(HealthStatus::Healthy)
362 }
363 }
364
365 health_manager
366 .register_checker("test_component", Box::new(MockChecker))
367 .await;
368
369 let exporter = PrometheusExporter::new(health_manager);
370 let metrics = exporter.export().await.unwrap();
371
372 assert!(metrics.contains("p2p_health_status{component=\"test_component\"}"));
374 assert!(metrics.contains("p2p_health_check_latency_ms{component=\"test_component\"}"));
375 assert!(metrics.contains("p2p_healthy_components 1"));
376 assert!(metrics.contains("p2p_degraded_components 0"));
377 assert!(metrics.contains("p2p_unhealthy_components 0"));
378 }
379
380 #[tokio::test]
381 async fn test_health_metrics_structure() {
382 let health_manager = Arc::new(HealthManager::new("1.0.0".to_string()));
383 let exporter = PrometheusExporter::new(health_manager);
384
385 let metrics = exporter.export_metrics().await.unwrap();
386
387 assert!(metrics.uptime_seconds >= 0.0);
388 assert_eq!(metrics.healthy_components, 0);
389 assert_eq!(metrics.degraded_components, 0);
390 assert_eq!(metrics.unhealthy_components, 0);
391 assert_eq!(metrics.total_components, 0);
392 }
393
394 #[tokio::test]
395 async fn test_prometheus_format_validation() {
396 let health_manager = Arc::new(HealthManager::new("1.0.0".to_string()));
397 let exporter = PrometheusExporter::new(health_manager);
398
399 let metrics = exporter.export().await.unwrap();
400
401 for line in metrics.lines() {
403 if line.is_empty() {
404 continue;
405 }
406
407 if line.starts_with('#') {
409 assert!(line.starts_with("# HELP") || line.starts_with("# TYPE"));
410 continue;
411 }
412
413 if !line.starts_with('#') {
415 assert!(line.contains(' '));
416 let parts: Vec<&str> = line.splitn(2, ' ').collect();
417 assert_eq!(parts.len(), 2);
418
419 let value = parts[1].trim();
421 assert!(
422 value.parse::<f64>().is_ok(),
423 "Invalid metric value: {}",
424 value
425 );
426 }
427 }
428 }
429
430 #[tokio::test]
431 async fn test_export_with_metadata() {
432 let health_manager = Arc::new(HealthManager::new("1.0.0".to_string()));
433
434 struct MockCheckerWithMetadata;
436 #[async_trait::async_trait]
437 impl crate::health::checks::ComponentChecker for MockCheckerWithMetadata {
438 async fn check(&self) -> Result<HealthStatus> {
439 Ok(HealthStatus::Healthy)
440 }
441
442 async fn debug_info(&self) -> Option<serde_json::Value> {
443 Some(serde_json::json!({
444 "peer_count": 10,
445 "connection_rate": 5.5,
446 }))
447 }
448 }
449
450 health_manager
451 .register_checker("network", Box::new(MockCheckerWithMetadata))
452 .await;
453
454 let exporter = PrometheusExporter::new(health_manager);
455 let metrics = exporter.export_metrics().await.unwrap();
456
457 assert_eq!(metrics.total_components, 1);
460 assert_eq!(metrics.healthy_components, 1);
461 }
462}