1use std::sync::{Arc, RwLock, Mutex};
12use std::collections::{HashMap, VecDeque, BTreeMap};
13use std::time::{Duration, Instant};
14use chrono::{DateTime, Utc, NaiveDate};
15use serde::{Deserialize, Serialize};
16
17pub type HourlyAggregations = Arc<RwLock<BTreeMap<DateTime<Utc>, HashMap<String, AggregatedMetric>>>>;
19
20pub type MetricRecord = (String, f64, Option<HashMap<String, String>>);
22
23use super::tenant::TenantId;
24use super::quota::{UsagePattern, AlertType};
25use crate::error::{EventualiError, Result};
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct MetricDataPoint {
30 pub timestamp: DateTime<Utc>,
31 pub value: f64,
32 pub labels: HashMap<String, String>,
33}
34
35impl MetricDataPoint {
36 pub fn new(value: f64) -> Self {
37 MetricDataPoint {
38 timestamp: Utc::now(),
39 value,
40 labels: HashMap::new(),
41 }
42 }
43
44 pub fn with_labels(mut self, labels: HashMap<String, String>) -> Self {
45 self.labels = labels;
46 self
47 }
48
49 pub fn with_label(mut self, key: String, value: String) -> Self {
50 self.labels.insert(key, value);
51 self
52 }
53
54 pub fn add_label(&mut self, key: String, value: String) {
55 self.labels.insert(key, value);
56 }
57}
58
59#[derive(Debug)]
61pub struct TimeSeriesMetric {
62 #[allow(dead_code)] name: String,
64 data_points: VecDeque<MetricDataPoint>,
65 max_points: usize,
66 retention_period: Duration,
67}
68
69impl TimeSeriesMetric {
70 pub fn new(name: String, max_points: usize, retention_hours: u64) -> Self {
71 TimeSeriesMetric {
72 name,
73 data_points: VecDeque::new(),
74 max_points,
75 retention_period: Duration::from_secs(retention_hours * 3600),
76 }
77 }
78
79 pub fn add_point(&mut self, point: MetricDataPoint) {
80 let cutoff_time = Utc::now() - chrono::Duration::from_std(self.retention_period).unwrap();
82 while let Some(front) = self.data_points.front() {
83 if front.timestamp < cutoff_time {
84 self.data_points.pop_front();
85 } else {
86 break;
87 }
88 }
89
90 self.data_points.push_back(point);
92
93 if self.data_points.len() > self.max_points {
95 self.data_points.pop_front();
96 }
97 }
98
99 pub fn get_latest(&self) -> Option<&MetricDataPoint> {
100 self.data_points.back()
101 }
102
103 pub fn get_points(&self) -> Vec<&MetricDataPoint> {
104 self.data_points.iter().collect()
105 }
106
107 pub fn get_points_in_range(
108 &self,
109 start: DateTime<Utc>,
110 end: DateTime<Utc>,
111 ) -> Vec<&MetricDataPoint> {
112 self.data_points
113 .iter()
114 .filter(|point| point.timestamp >= start && point.timestamp <= end)
115 .collect()
116 }
117
118 pub fn calculate_average(&self) -> f64 {
119 if self.data_points.is_empty() {
120 return 0.0;
121 }
122 let sum: f64 = self.data_points.iter().map(|p| p.value).sum();
123 sum / self.data_points.len() as f64
124 }
125
126 pub fn calculate_percentile(&self, percentile: f64) -> f64 {
127 if self.data_points.is_empty() {
128 return 0.0;
129 }
130
131 let mut values: Vec<f64> = self.data_points.iter().map(|p| p.value).collect();
132 values.sort_by(|a, b| a.partial_cmp(b).unwrap());
133
134 let index = ((values.len() - 1) as f64 * percentile / 100.0).round() as usize;
135 values[index.min(values.len() - 1)]
136 }
137
138 pub fn detect_anomalies(&self, threshold_multiplier: f64) -> Vec<&MetricDataPoint> {
139 if self.data_points.len() < 10 {
140 return Vec::new(); }
142
143 let mean = self.calculate_average();
144 let variance = self.calculate_variance();
145 let std_dev = variance.sqrt();
146 let threshold = std_dev * threshold_multiplier;
147
148 self.data_points
149 .iter()
150 .filter(|point| (point.value - mean).abs() > threshold)
151 .collect()
152 }
153
154 pub fn calculate_variance(&self) -> f64 {
155 if self.data_points.len() < 2 {
156 return 0.0;
157 }
158
159 let mean = self.calculate_average();
160 let sum_squared_diff: f64 = self.data_points
161 .iter()
162 .map(|p| (p.value - mean).powi(2))
163 .sum();
164
165 sum_squared_diff / (self.data_points.len() - 1) as f64
166 }
167
168 pub fn get_trend(&self) -> UsagePattern {
169 if self.data_points.len() < 5 {
170 return UsagePattern::Stable;
171 }
172
173 let n = self.data_points.len() as f64;
175 let points: Vec<(f64, f64)> = self.data_points
176 .iter()
177 .enumerate()
178 .map(|(i, point)| (i as f64, point.value))
179 .collect();
180
181 let sum_x: f64 = points.iter().map(|(x, _)| x).sum();
182 let sum_y: f64 = points.iter().map(|(_, y)| y).sum();
183 let sum_xy: f64 = points.iter().map(|(x, y)| x * y).sum();
184 let sum_x2: f64 = points.iter().map(|(x, _)| x * x).sum();
185
186 let slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x * sum_x);
187
188 let mean_y = sum_y / n;
190 let ss_tot: f64 = points.iter().map(|(_, y)| (y - mean_y).powi(2)).sum();
191 let ss_res: f64 = points.iter().map(|(x, y)| {
192 let predicted = slope * x + (sum_y - slope * sum_x) / n;
193 (y - predicted).powi(2)
194 }).sum();
195
196 let r_squared = 1.0 - (ss_res / ss_tot);
197
198 if r_squared < 0.5 {
200 UsagePattern::Volatile
201 } else if slope > 0.1 {
202 UsagePattern::Growing
203 } else if slope < -0.1 {
204 UsagePattern::Declining
205 } else {
206 UsagePattern::Stable
207 }
208 }
209}
210
211#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct AggregatedMetric {
214 pub name: String,
215 pub min: f64,
216 pub max: f64,
217 pub avg: f64,
218 pub sum: f64,
219 pub count: u64,
220 pub p50: f64,
221 pub p95: f64,
222 pub p99: f64,
223 pub start_time: DateTime<Utc>,
224 pub end_time: DateTime<Utc>,
225}
226
227impl AggregatedMetric {
228 pub fn from_points(name: String, points: &[&MetricDataPoint]) -> Self {
229 if points.is_empty() {
230 return AggregatedMetric {
231 name,
232 min: 0.0,
233 max: 0.0,
234 avg: 0.0,
235 sum: 0.0,
236 count: 0,
237 p50: 0.0,
238 p95: 0.0,
239 p99: 0.0,
240 start_time: Utc::now(),
241 end_time: Utc::now(),
242 };
243 }
244
245 let mut values: Vec<f64> = points.iter().map(|p| p.value).collect();
246 values.sort_by(|a, b| a.partial_cmp(b).unwrap());
247
248 let min = values.first().copied().unwrap_or(0.0);
249 let max = values.last().copied().unwrap_or(0.0);
250 let sum: f64 = values.iter().sum();
251 let count = values.len() as u64;
252 let avg = sum / count as f64;
253
254 let p50_idx = (count as f64 * 0.5).round() as usize;
255 let p95_idx = (count as f64 * 0.95).round() as usize;
256 let p99_idx = (count as f64 * 0.99).round() as usize;
257
258 let p50 = values.get(p50_idx.min(values.len() - 1)).copied().unwrap_or(0.0);
259 let p95 = values.get(p95_idx.min(values.len() - 1)).copied().unwrap_or(0.0);
260 let p99 = values.get(p99_idx.min(values.len() - 1)).copied().unwrap_or(0.0);
261
262 let start_time = points.iter().map(|p| p.timestamp).min().unwrap_or_else(Utc::now);
263 let end_time = points.iter().map(|p| p.timestamp).max().unwrap_or_else(Utc::now);
264
265 AggregatedMetric {
266 name,
267 min,
268 max,
269 avg,
270 sum,
271 count,
272 p50,
273 p95,
274 p99,
275 start_time,
276 end_time,
277 }
278 }
279}
280
281#[derive(Debug, Clone, Serialize, Deserialize)]
283pub struct SlaDefinition {
284 pub name: String,
285 pub metric_name: String,
286 pub threshold: f64,
287 pub operator: SlaOperator,
288 pub target_percentage: f64, pub measurement_window: Duration,
290}
291
292#[derive(Debug, Clone, Serialize, Deserialize)]
293pub enum SlaOperator {
294 LessThan,
295 LessThanOrEqual,
296 GreaterThan,
297 GreaterThanOrEqual,
298 Equal,
299}
300
301#[derive(Debug, Clone, Serialize, Deserialize)]
303pub struct SlaResult {
304 pub sla_name: String,
305 pub measurement_period_start: DateTime<Utc>,
306 pub measurement_period_end: DateTime<Utc>,
307 pub compliance_percentage: f64,
308 pub violations_count: u64,
309 pub total_measurements: u64,
310 pub is_compliant: bool,
311 pub breach_duration: Duration,
312}
313
314#[derive(Debug, Clone, Serialize, Deserialize)]
316pub struct MetricAlertRule {
317 pub name: String,
318 pub metric_name: String,
319 pub threshold: f64,
320 pub operator: SlaOperator,
321 pub severity: AlertType,
322 pub evaluation_window: Duration,
323 pub cooldown_period: Duration,
324 pub enabled: bool,
325}
326
327#[derive(Debug, Clone, Serialize, Deserialize)]
329pub struct MetricAlert {
330 pub id: String,
331 pub rule_name: String,
332 pub tenant_id: TenantId,
333 pub metric_name: String,
334 pub current_value: f64,
335 pub threshold: f64,
336 pub severity: AlertType,
337 pub message: String,
338 pub triggered_at: DateTime<Utc>,
339 pub resolved_at: Option<DateTime<Utc>>,
340 pub acknowledged: bool,
341}
342
343#[derive(Debug, Clone, Serialize, Deserialize)]
345pub struct TenantDashboard {
346 pub name: String,
347 pub description: String,
348 pub widgets: Vec<DashboardWidget>,
349 pub refresh_interval: Duration,
350 pub created_at: DateTime<Utc>,
351 pub updated_at: DateTime<Utc>,
352}
353
354#[derive(Debug, Clone, Serialize, Deserialize)]
355pub struct DashboardWidget {
356 pub id: String,
357 pub widget_type: WidgetType,
358 pub title: String,
359 pub metric_names: Vec<String>,
360 pub time_range: Duration,
361 pub position: WidgetPosition,
362 pub size: WidgetSize,
363 pub configuration: HashMap<String, String>,
364}
365
366#[derive(Debug, Clone, Serialize, Deserialize)]
367pub enum WidgetType {
368 LineChart,
369 AreaChart,
370 BarChart,
371 Gauge,
372 SingleValue,
373 Table,
374 Heatmap,
375}
376
377#[derive(Debug, Clone, Serialize, Deserialize)]
378pub struct WidgetPosition {
379 pub x: u32,
380 pub y: u32,
381}
382
383#[derive(Debug, Clone, Serialize, Deserialize)]
384pub struct WidgetSize {
385 pub width: u32,
386 pub height: u32,
387}
388
389pub struct TenantMetricsCollector {
391 tenant_id: TenantId,
392 metrics: Arc<RwLock<HashMap<String, TimeSeriesMetric>>>,
393 sla_definitions: Arc<RwLock<Vec<SlaDefinition>>>,
394 alert_rules: Arc<RwLock<Vec<MetricAlertRule>>>,
395 active_alerts: Arc<RwLock<Vec<MetricAlert>>>,
396 dashboards: Arc<RwLock<Vec<TenantDashboard>>>,
397 #[allow(dead_code)] collection_interval: Duration,
399 #[allow(dead_code)] last_collection: Arc<Mutex<Instant>>,
401
402 #[allow(dead_code)] hourly_aggregations: HourlyAggregations,
405 #[allow(dead_code)] daily_aggregations: Arc<RwLock<BTreeMap<NaiveDate, HashMap<String, AggregatedMetric>>>>,
407}
408
409impl TenantMetricsCollector {
410 pub fn new(tenant_id: TenantId) -> Self {
411 TenantMetricsCollector {
412 tenant_id,
413 metrics: Arc::new(RwLock::new(HashMap::new())),
414 sla_definitions: Arc::new(RwLock::new(Vec::new())),
415 alert_rules: Arc::new(RwLock::new(Vec::new())),
416 active_alerts: Arc::new(RwLock::new(Vec::new())),
417 dashboards: Arc::new(RwLock::new(Vec::new())),
418 collection_interval: Duration::from_secs(60), last_collection: Arc::new(Mutex::new(Instant::now())),
420 hourly_aggregations: Arc::new(RwLock::new(BTreeMap::new())),
421 daily_aggregations: Arc::new(RwLock::new(BTreeMap::new())),
422 }
423 }
424
425 pub fn record_metric(&self, name: String, value: f64, labels: Option<HashMap<String, String>>) {
427 let point = if let Some(labels) = labels {
428 MetricDataPoint::new(value).with_labels(labels)
429 } else {
430 MetricDataPoint::new(value)
431 };
432
433 let mut metrics = self.metrics.write().unwrap();
434 let metric = metrics.entry(name.clone()).or_insert_with(|| {
435 TimeSeriesMetric::new(name.clone(), 10000, 24) });
437
438 metric.add_point(point);
439
440 drop(metrics); self.check_alert_rules(&name, value);
443 }
444
445 pub fn record_metrics(&self, metrics: Vec<MetricRecord>) {
447 for (name, value, labels) in metrics {
448 self.record_metric(name, value, labels);
449 }
450 }
451
452 pub fn get_current_metric_value(&self, name: &str) -> Option<f64> {
454 let metrics = self.metrics.read().unwrap();
455 metrics.get(name)?.get_latest().map(|point| point.value)
456 }
457
458 pub fn get_metric_timeseries(
460 &self,
461 name: &str,
462 start: Option<DateTime<Utc>>,
463 end: Option<DateTime<Utc>>,
464 ) -> Option<Vec<MetricDataPoint>> {
465 let metrics = self.metrics.read().unwrap();
466 let metric = metrics.get(name)?;
467
468 let points = if let (Some(start), Some(end)) = (start, end) {
469 metric.get_points_in_range(start, end)
470 } else {
471 metric.get_points()
472 };
473
474 Some(points.into_iter().cloned().collect())
475 }
476
477 pub fn get_aggregated_metrics(
479 &self,
480 names: &[String],
481 window: Duration,
482 ) -> HashMap<String, AggregatedMetric> {
483 let end_time = Utc::now();
484 let start_time = end_time - chrono::Duration::from_std(window).unwrap();
485
486 let metrics = self.metrics.read().unwrap();
487 let mut result = HashMap::new();
488
489 for name in names {
490 if let Some(metric) = metrics.get(name) {
491 let points = metric.get_points_in_range(start_time, end_time);
492 let aggregated = AggregatedMetric::from_points(name.clone(), &points);
493 result.insert(name.clone(), aggregated);
494 }
495 }
496
497 result
498 }
499
500 pub fn detect_anomalies(&self, threshold_multiplier: f64) -> HashMap<String, Vec<MetricDataPoint>> {
502 let metrics = self.metrics.read().unwrap();
503 let mut anomalies = HashMap::new();
504
505 for (name, metric) in metrics.iter() {
506 let anomalous_points = metric.detect_anomalies(threshold_multiplier);
507 if !anomalous_points.is_empty() {
508 anomalies.insert(
509 name.clone(),
510 anomalous_points.into_iter().cloned().collect()
511 );
512 }
513 }
514
515 anomalies
516 }
517
518 pub fn get_usage_patterns(&self) -> HashMap<String, UsagePattern> {
520 let metrics = self.metrics.read().unwrap();
521 let mut patterns = HashMap::new();
522
523 for (name, metric) in metrics.iter() {
524 patterns.insert(name.clone(), metric.get_trend());
525 }
526
527 patterns
528 }
529
530 pub fn add_sla_definition(&self, sla: SlaDefinition) {
532 let mut slas = self.sla_definitions.write().unwrap();
533 slas.push(sla);
534 }
535
536 pub fn check_sla_compliance(&self) -> Vec<SlaResult> {
538 let slas = self.sla_definitions.read().unwrap();
539 let metrics = self.metrics.read().unwrap();
540 let mut results = Vec::new();
541
542 for sla in slas.iter() {
543 if let Some(metric) = metrics.get(&sla.metric_name) {
544 let end_time = Utc::now();
545 let start_time = end_time - chrono::Duration::from_std(sla.measurement_window).unwrap();
546 let points = metric.get_points_in_range(start_time, end_time);
547
548 let mut violations = 0u64;
549 let total_measurements = points.len() as u64;
550
551 for point in &points {
552 let violates = match sla.operator {
553 SlaOperator::LessThan => point.value >= sla.threshold,
554 SlaOperator::LessThanOrEqual => point.value > sla.threshold,
555 SlaOperator::GreaterThan => point.value <= sla.threshold,
556 SlaOperator::GreaterThanOrEqual => point.value < sla.threshold,
557 SlaOperator::Equal => (point.value - sla.threshold).abs() > f64::EPSILON,
558 };
559
560 if violates {
561 violations += 1;
562 }
563 }
564
565 let compliance_percentage = if total_measurements > 0 {
566 100.0 * (total_measurements - violations) as f64 / total_measurements as f64
567 } else {
568 100.0
569 };
570
571 results.push(SlaResult {
572 sla_name: sla.name.clone(),
573 measurement_period_start: start_time,
574 measurement_period_end: end_time,
575 compliance_percentage,
576 violations_count: violations,
577 total_measurements,
578 is_compliant: compliance_percentage >= sla.target_percentage,
579 breach_duration: Duration::from_secs(violations * 60), });
581 }
582 }
583
584 results
585 }
586
587 pub fn add_alert_rule(&self, rule: MetricAlertRule) {
589 let mut rules = self.alert_rules.write().unwrap();
590 rules.push(rule);
591 }
592
593 fn check_alert_rules(&self, metric_name: &str, current_value: f64) {
595 let rules = self.alert_rules.read().unwrap();
596 let mut active_alerts = self.active_alerts.write().unwrap();
597
598 for rule in rules.iter() {
599 if !rule.enabled || rule.metric_name != metric_name {
600 continue;
601 }
602
603 let should_trigger = match rule.operator {
604 SlaOperator::LessThan => current_value < rule.threshold,
605 SlaOperator::LessThanOrEqual => current_value <= rule.threshold,
606 SlaOperator::GreaterThan => current_value > rule.threshold,
607 SlaOperator::GreaterThanOrEqual => current_value >= rule.threshold,
608 SlaOperator::Equal => (current_value - rule.threshold).abs() < f64::EPSILON,
609 };
610
611 if should_trigger {
612 let existing_alert = active_alerts.iter().any(|alert| {
614 alert.rule_name == rule.name &&
615 alert.resolved_at.is_none() &&
616 (Utc::now() - alert.triggered_at).to_std().unwrap_or(Duration::ZERO) < rule.cooldown_period
617 });
618
619 if !existing_alert {
620 let alert = MetricAlert {
621 id: uuid::Uuid::new_v4().to_string(),
622 rule_name: rule.name.clone(),
623 tenant_id: self.tenant_id.clone(),
624 metric_name: metric_name.to_string(),
625 current_value,
626 threshold: rule.threshold,
627 severity: rule.severity.clone(),
628 message: format!(
629 "Metric {} {} {} (current: {}, threshold: {})",
630 metric_name,
631 match rule.operator {
632 SlaOperator::LessThan => "is less than",
633 SlaOperator::LessThanOrEqual => "is less than or equal to",
634 SlaOperator::GreaterThan => "is greater than",
635 SlaOperator::GreaterThanOrEqual => "is greater than or equal to",
636 SlaOperator::Equal => "equals",
637 },
638 rule.threshold,
639 current_value,
640 rule.threshold
641 ),
642 triggered_at: Utc::now(),
643 resolved_at: None,
644 acknowledged: false,
645 };
646
647 active_alerts.push(alert);
648 }
649 }
650 }
651 }
652
653 pub fn get_active_alerts(&self) -> Vec<MetricAlert> {
655 let active_alerts = self.active_alerts.read().unwrap();
656 active_alerts.clone()
657 }
658
659 pub fn acknowledge_alert(&self, alert_id: &str) -> Result<()> {
661 let mut active_alerts = self.active_alerts.write().unwrap();
662 if let Some(alert) = active_alerts.iter_mut().find(|a| a.id == alert_id) {
663 alert.acknowledged = true;
664 Ok(())
665 } else {
666 Err(EventualiError::Tenant(format!("Alert not found: {alert_id}")))
667 }
668 }
669
670 pub fn resolve_alert(&self, alert_id: &str) -> Result<()> {
672 let mut active_alerts = self.active_alerts.write().unwrap();
673 if let Some(alert) = active_alerts.iter_mut().find(|a| a.id == alert_id) {
674 alert.resolved_at = Some(Utc::now());
675 Ok(())
676 } else {
677 Err(EventualiError::Tenant(format!("Alert not found: {alert_id}")))
678 }
679 }
680
681 pub fn create_dashboard(&self, dashboard: TenantDashboard) {
683 let mut dashboards = self.dashboards.write().unwrap();
684 dashboards.push(dashboard);
685 }
686
687 pub fn get_dashboards(&self) -> Vec<TenantDashboard> {
689 let dashboards = self.dashboards.read().unwrap();
690 dashboards.clone()
691 }
692
693 pub fn generate_dashboard_data(&self, dashboard_name: &str) -> Option<DashboardData> {
695 let dashboards = self.dashboards.read().unwrap();
696 let dashboard = dashboards.iter().find(|d| d.name == dashboard_name)?;
697
698 let mut widget_data = HashMap::new();
699
700 for widget in &dashboard.widgets {
701 let time_range = chrono::Duration::from_std(widget.time_range).ok()?;
702 let start_time = Utc::now() - time_range;
703 let end_time = Utc::now();
704
705 let mut data = Vec::new();
706 for metric_name in &widget.metric_names {
707 if let Some(timeseries) = self.get_metric_timeseries(metric_name, Some(start_time), Some(end_time)) {
708 data.push((metric_name.clone(), timeseries));
709 }
710 }
711
712 widget_data.insert(widget.id.clone(), data);
713 }
714
715 Some(DashboardData {
716 dashboard_name: dashboard_name.to_string(),
717 generated_at: Utc::now(),
718 widget_data,
719 })
720 }
721
722 pub fn export_metrics(&self, format: ExportFormat, time_range: Option<(DateTime<Utc>, DateTime<Utc>)>) -> Result<String> {
724 let metrics = self.metrics.read().unwrap();
725
726 match format {
727 ExportFormat::Json => {
728 let mut export_data = HashMap::new();
729
730 for (name, metric) in metrics.iter() {
731 let points = if let Some((start, end)) = time_range {
732 metric.get_points_in_range(start, end)
733 } else {
734 metric.get_points()
735 };
736
737 export_data.insert(name, points.into_iter().cloned().collect::<Vec<_>>());
738 }
739
740 Ok(serde_json::to_string_pretty(&export_data)?)
741 },
742 ExportFormat::Csv => {
743 let mut csv_data = String::new();
744 csv_data.push_str("metric_name,timestamp,value,labels\n");
745
746 for (name, metric) in metrics.iter() {
747 let points = if let Some((start, end)) = time_range {
748 metric.get_points_in_range(start, end)
749 } else {
750 metric.get_points()
751 };
752
753 for point in points {
754 let labels_str = if point.labels.is_empty() {
755 String::new()
756 } else {
757 serde_json::to_string(&point.labels).unwrap_or_default()
758 };
759
760 csv_data.push_str(&format!(
761 "{},{},{},{}\n",
762 name,
763 point.timestamp.to_rfc3339(),
764 point.value,
765 labels_str
766 ));
767 }
768 }
769
770 Ok(csv_data)
771 },
772 ExportFormat::Prometheus => {
773 let mut prom_data = String::new();
774
775 for (name, metric) in metrics.iter() {
776 if let Some(latest) = metric.get_latest() {
777 let metric_name = name.replace(['-', ' '], "_");
778
779 if latest.labels.is_empty() {
780 prom_data.push_str(&format!("{} {}\n", metric_name, latest.value));
781 } else {
782 let labels: Vec<String> = latest.labels.iter()
783 .map(|(k, v)| format!("{k}=\"{v}\""))
784 .collect();
785 prom_data.push_str(&format!(
786 "{}{{{}}} {}\n",
787 metric_name,
788 labels.join(","),
789 latest.value
790 ));
791 }
792 }
793 }
794
795 Ok(prom_data)
796 },
797 }
798 }
799
800 pub fn calculate_health_score(&self) -> TenantHealthScore {
802 let now = Utc::now();
803 let _last_hour = now - chrono::Duration::hours(1);
804
805 let error_rate = self.get_current_metric_value("error_rate").unwrap_or(0.0);
807 let response_time = self.get_current_metric_value("response_time_ms").unwrap_or(0.0);
808 let cpu_usage = self.get_current_metric_value("cpu_usage_percent").unwrap_or(0.0);
809 let memory_usage = self.get_current_metric_value("memory_usage_percent").unwrap_or(0.0);
810 let storage_usage = self.get_current_metric_value("storage_usage_percent").unwrap_or(0.0);
811
812 let error_score = (100.0 - (error_rate * 100.0)).clamp(0.0, 100.0);
814 let performance_score = if response_time > 1000.0 {
815 (1000.0 / response_time * 100.0).min(100.0)
816 } else {
817 100.0
818 };
819 let cpu_score = (100.0 - cpu_usage).clamp(0.0, 100.0);
820 let memory_score = (100.0 - memory_usage).clamp(0.0, 100.0);
821 let storage_score = (100.0 - storage_usage).clamp(0.0, 100.0);
822
823 let sla_results = self.check_sla_compliance();
825 let sla_score = if sla_results.is_empty() {
826 100.0
827 } else {
828 sla_results.iter()
829 .map(|r| r.compliance_percentage)
830 .sum::<f64>() / sla_results.len() as f64
831 };
832
833 let active_alerts = self.get_active_alerts();
835 let alert_penalty = active_alerts.iter()
836 .map(|alert| match alert.severity {
837 AlertType::Critical => 20.0,
838 AlertType::Exceeded => 15.0,
839 AlertType::Warning => 5.0,
840 AlertType::Violation => 25.0,
841 })
842 .sum::<f64>();
843
844 let base_score = error_score * 0.25 +
846 performance_score * 0.20 +
847 cpu_score * 0.15 +
848 memory_score * 0.15 +
849 storage_score * 0.10 +
850 sla_score * 0.15;
851
852 let overall_score = (base_score - alert_penalty).clamp(0.0, 100.0);
853
854 let status = if overall_score >= 90.0 {
856 HealthStatus::Excellent
857 } else if overall_score >= 75.0 {
858 HealthStatus::Good
859 } else if overall_score >= 60.0 {
860 HealthStatus::Fair
861 } else if overall_score >= 40.0 {
862 HealthStatus::Poor
863 } else {
864 HealthStatus::Critical
865 };
866
867 TenantHealthScore {
868 overall_score,
869 status,
870 component_scores: HashMap::from([
871 ("error_rate".to_string(), error_score),
872 ("performance".to_string(), performance_score),
873 ("cpu_usage".to_string(), cpu_score),
874 ("memory_usage".to_string(), memory_score),
875 ("storage_usage".to_string(), storage_score),
876 ("sla_compliance".to_string(), sla_score),
877 ]),
878 active_alerts_count: active_alerts.len(),
879 critical_alerts_count: active_alerts.iter().filter(|a| matches!(a.severity, AlertType::Critical | AlertType::Violation)).count(),
880 calculated_at: now,
881 recommendations: self.generate_health_recommendations(overall_score, &active_alerts),
882 }
883 }
884
885 fn generate_health_recommendations(&self, score: f64, alerts: &[MetricAlert]) -> Vec<String> {
887 let mut recommendations = Vec::new();
888
889 if score < 60.0 {
890 recommendations.push("🚨 Critical: Immediate attention required - system health is below acceptable levels".to_string());
891 }
892
893 if alerts.iter().any(|a| matches!(a.severity, AlertType::Critical)) {
894 recommendations.push("🔴 Address critical alerts immediately to prevent service degradation".to_string());
895 }
896
897 if self.get_current_metric_value("error_rate").unwrap_or(0.0) > 0.05 {
898 recommendations.push("📈 High error rate detected - investigate failing operations".to_string());
899 }
900
901 if self.get_current_metric_value("response_time_ms").unwrap_or(0.0) > 1000.0 {
902 recommendations.push("🐌 Slow response times detected - consider performance optimization".to_string());
903 }
904
905 let cpu_usage = self.get_current_metric_value("cpu_usage_percent").unwrap_or(0.0);
906 if cpu_usage > 80.0 {
907 recommendations.push("💻 High CPU usage - consider scaling up or optimizing workload".to_string());
908 }
909
910 let memory_usage = self.get_current_metric_value("memory_usage_percent").unwrap_or(0.0);
911 if memory_usage > 85.0 {
912 recommendations.push("🧠 High memory usage - check for memory leaks or increase allocation".to_string());
913 }
914
915 let storage_usage = self.get_current_metric_value("storage_usage_percent").unwrap_or(0.0);
916 if storage_usage > 90.0 {
917 recommendations.push("💾 Storage nearly full - archive old data or increase storage capacity".to_string());
918 }
919
920 if score >= 90.0 && alerts.is_empty() {
921 recommendations.push("✅ System is operating optimally - maintain current configuration".to_string());
922 }
923
924 recommendations
925 }
926}
927
928#[derive(Debug, Clone)]
930pub enum ExportFormat {
931 Json,
932 Csv,
933 Prometheus,
934}
935
936#[derive(Debug, Clone, Serialize, Deserialize)]
938pub struct DashboardData {
939 pub dashboard_name: String,
940 pub generated_at: DateTime<Utc>,
941 pub widget_data: HashMap<String, Vec<(String, Vec<MetricDataPoint>)>>,
942}
943
944#[derive(Debug, Clone, Serialize, Deserialize)]
946pub enum HealthStatus {
947 Excellent,
948 Good,
949 Fair,
950 Poor,
951 Critical,
952}
953
954#[derive(Debug, Clone, Serialize, Deserialize)]
956pub struct TenantHealthScore {
957 pub overall_score: f64,
958 pub status: HealthStatus,
959 pub component_scores: HashMap<String, f64>,
960 pub active_alerts_count: usize,
961 pub critical_alerts_count: usize,
962 pub calculated_at: DateTime<Utc>,
963 pub recommendations: Vec<String>,
964}
965
966#[cfg(test)]
967mod tests {
968 use super::*;
969
970 #[test]
971 fn test_time_series_metric() {
972 let mut metric = TimeSeriesMetric::new("test_metric".to_string(), 100, 1);
973
974 metric.add_point(MetricDataPoint::new(10.0));
975 metric.add_point(MetricDataPoint::new(20.0));
976 metric.add_point(MetricDataPoint::new(15.0));
977
978 assert_eq!(metric.calculate_average(), 15.0);
979 assert_eq!(metric.get_points().len(), 3);
980 }
981
982 #[test]
983 fn test_tenant_metrics_collector() {
984 let tenant_id = TenantId::new("test-tenant".to_string()).unwrap();
985 let collector = TenantMetricsCollector::new(tenant_id);
986
987 collector.record_metric("cpu_usage".to_string(), 45.0, None);
988 collector.record_metric("memory_usage".to_string(), 60.0, None);
989
990 assert_eq!(collector.get_current_metric_value("cpu_usage"), Some(45.0));
991 assert_eq!(collector.get_current_metric_value("memory_usage"), Some(60.0));
992 }
993
994 #[test]
995 fn test_aggregated_metric() {
996 let points = vec![
997 &MetricDataPoint::new(10.0),
998 &MetricDataPoint::new(20.0),
999 &MetricDataPoint::new(30.0),
1000 &MetricDataPoint::new(40.0),
1001 ];
1002
1003 let agg = AggregatedMetric::from_points("test".to_string(), &points);
1004 assert_eq!(agg.min, 10.0);
1005 assert_eq!(agg.max, 40.0);
1006 assert_eq!(agg.avg, 25.0);
1007 assert_eq!(agg.count, 4);
1008 }
1009}