1pub mod metrics_collector;
7pub mod health_checker;
8
9pub use metrics_collector::*;
10pub use health_checker::*;
11
12use std::collections::HashMap;
13use std::sync::Arc;
14use std::time::{Duration, Instant};
15use serde::{Deserialize, Serialize};
16use tokio::sync::RwLock;
17use chrono::{DateTime, Utc};
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct MonitoringConfig {
22 pub enable_metrics: bool,
24 pub enable_health_checks: bool,
26 pub collection_interval: Duration,
28 pub health_check_interval: Duration,
30 pub retention_period: Duration,
32 pub max_metrics_points: usize,
34 pub prometheus_config: PrometheusConfig,
36 pub alerting_config: AlertingConfig,
38}
39
40impl Default for MonitoringConfig {
41 fn default() -> Self {
42 Self {
43 enable_metrics: true,
44 enable_health_checks: true,
45 collection_interval: Duration::from_secs(15),
46 health_check_interval: Duration::from_secs(30),
47 retention_period: Duration::from_secs(3600), max_metrics_points: 10000,
49 prometheus_config: PrometheusConfig::default(),
50 alerting_config: AlertingConfig::default(),
51 }
52 }
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct PrometheusConfig {
58 pub enabled: bool,
60 pub address: String,
62 pub port: u16,
64 pub path: String,
66 pub global_labels: HashMap<String, String>,
68}
69
70impl Default for PrometheusConfig {
71 fn default() -> Self {
72 Self {
73 enabled: true,
74 address: "127.0.0.1".to_string(),
75 port: 9090,
76 path: "/metrics".to_string(),
77 global_labels: HashMap::new(),
78 }
79 }
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct AlertingConfig {
85 pub enabled: bool,
87 pub rules: Vec<AlertRule>,
89 pub notifications: Vec<NotificationConfig>,
91}
92
93impl Default for AlertingConfig {
94 fn default() -> Self {
95 Self {
96 enabled: false,
97 rules: Vec::new(),
98 notifications: Vec::new(),
99 }
100 }
101}
102
103#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct AlertRule {
106 pub name: String,
108 pub description: String,
110 pub query: String,
112 pub threshold: AlertThreshold,
114 pub evaluation_interval: Duration,
116 pub severity: AlertSeverity,
118 pub labels: HashMap<String, String>,
120}
121
122#[derive(Debug, Clone, Serialize, Deserialize)]
124pub enum AlertThreshold {
125 GreaterThan(f64),
127 LessThan(f64),
129 Equal(f64),
131 NotEqual(f64),
133}
134
135#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
137pub enum AlertSeverity {
138 Info,
139 Warning,
140 Error,
141 Critical,
142}
143
144#[derive(Debug, Clone, Serialize, Deserialize)]
146pub struct NotificationConfig {
147 pub notification_type: NotificationType,
149 pub config: HashMap<String, String>,
151}
152
153#[derive(Debug, Clone, Serialize, Deserialize)]
155pub enum NotificationType {
156 Email,
157 Slack,
158 Webhook,
159 PagerDuty,
160}
161
162#[derive(Debug, Clone)]
164pub struct MetricPoint {
165 pub name: String,
167 pub value: f64,
169 pub timestamp: DateTime<Utc>,
171 pub labels: HashMap<String, String>,
173}
174
175#[derive(Debug, Clone)]
177pub struct HealthCheckResult {
178 pub name: String,
180 pub status: HealthStatus,
182 pub message: String,
184 pub duration: Duration,
186 pub details: HashMap<String, String>,
188}
189
190#[derive(Debug, Clone, Copy, PartialEq, Eq)]
192pub enum HealthStatus {
193 Healthy,
194 Degraded,
195 Unhealthy,
196 Unknown,
197}
198
199impl Default for HealthStatus {
200 fn default() -> Self {
201 HealthStatus::Unknown
202 }
203}
204
205#[derive(Debug, Clone, Default)]
207pub struct SystemHealth {
208 pub overall_status: HealthStatus,
210 pub checks: Vec<HealthCheckResult>,
212 pub uptime: Duration,
214 pub last_check: DateTime<Utc>,
216}
217
218#[derive(Debug, Clone)]
220pub struct PerformanceMetrics {
221 pub query_metrics: QueryMetrics,
223 pub storage_metrics: StorageMetrics,
225 pub system_metrics: SystemMetrics,
227 pub timestamp: DateTime<Utc>,
229}
230
231#[derive(Debug, Clone)]
233pub struct QueryMetrics {
234 pub total_queries: u64,
235 pub queries_per_second: f64,
236 pub avg_query_latency_ms: f64,
237 pub p95_query_latency_ms: f64,
238 pub p99_query_latency_ms: f64,
239 pub slow_queries: u64,
240 pub failed_queries: u64,
241}
242
243#[derive(Debug, Clone)]
245pub struct StorageMetrics {
246 pub total_size_bytes: u64,
247 pub used_size_bytes: u64,
248 pub read_operations: u64,
249 pub write_operations: u64,
250 pub read_bytes_per_sec: f64,
251 pub write_bytes_per_sec: f64,
252 pub cache_hit_rate: f64,
253 pub io_latency_ms: f64,
254}
255
256#[derive(Debug, Clone)]
258pub struct SystemMetrics {
259 pub cpu_usage_percent: f64,
260 pub memory_usage_bytes: u64,
261 pub memory_usage_percent: f64,
262 pub disk_usage_bytes: u64,
263 pub disk_usage_percent: f64,
264 pub network_rx_bytes: u64,
265 pub network_tx_bytes: u64,
266}
267
268#[derive(Debug, thiserror::Error)]
270pub enum MonitoringError {
271 #[error("Metrics collection error: {0}")]
272 MetricsCollection(String),
273
274 #[error("Health check error: {0}")]
275 HealthCheck(String),
276
277 #[error("Prometheus export error: {0}")]
278 PrometheusExport(String),
279
280 #[error("Configuration error: {0}")]
281 Configuration(String),
282
283 #[error("Storage error: {0}")]
284 Storage(String),
285
286 #[error("HTTP server error: {0}")]
287 HttpServer(String),
288}
289
290#[cfg(test)]
291mod tests {
292 use super::*;
293
294 #[test]
295 fn test_monitoring_config_default() {
296 let config = MonitoringConfig::default();
297 assert!(config.enable_metrics);
298 assert!(config.enable_health_checks);
299 assert_eq!(config.collection_interval, Duration::from_secs(15));
300 }
301
302 #[test]
303 fn test_prometheus_config_default() {
304 let config = PrometheusConfig::default();
305 assert!(config.enabled);
306 assert_eq!(config.port, 9090);
307 assert_eq!(config.path, "/metrics");
308 }
309
310 #[test]
311 fn test_health_status_ordering() {
312 assert!(HealthStatus::Healthy > HealthStatus::Degraded);
313 assert!(HealthStatus::Degraded > HealthStatus::Unhealthy);
314 assert!(HealthStatus::Unhealthy > HealthStatus::Unknown);
315 }
316
317 #[test]
318 fn test_metric_point_creation() {
319 let mut labels = HashMap::new();
320 labels.insert("service".to_string(), "kotoba-db".to_string());
321
322 let point = MetricPoint {
323 name: "query_latency".to_string(),
324 value: 15.5,
325 timestamp: Utc::now(),
326 labels,
327 };
328
329 assert_eq!(point.name, "query_latency");
330 assert_eq!(point.value, 15.5);
331 assert!(point.labels.contains_key("service"));
332 }
333}