eventuali_core/tenancy/
metrics.rs

1//! Advanced tenant metrics and observability
2//!
3//! This module provides comprehensive tenant metrics, analytics, and real-time monitoring:
4//! - Real-time tenant performance monitoring
5//! - Advanced analytics with trend detection
6//! - Custom dashboards and alerting
7//! - Multi-dimensional metrics collection
8//! - Historical data analysis and reporting
9//! - SLA monitoring and compliance tracking
10
11use std::sync::{Arc, RwLock, Mutex};
12use std::collections::{HashMap, VecDeque, BTreeMap};
13use std::time::{Duration, Instant};
14use chrono::{DateTime, Utc, NaiveDate};
15use serde::{Deserialize, Serialize};
16
17/// Type alias for hourly aggregation storage
18pub type HourlyAggregations = Arc<RwLock<BTreeMap<DateTime<Utc>, HashMap<String, AggregatedMetric>>>>;
19
20/// Type alias for metric record tuple
21pub type MetricRecord = (String, f64, Option<HashMap<String, String>>);
22
23use super::tenant::TenantId;
24use super::quota::{UsagePattern, AlertType};
25use crate::error::{EventualiError, Result};
26
27/// Time-series data point for metrics
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct MetricDataPoint {
30    pub timestamp: DateTime<Utc>,
31    pub value: f64,
32    pub labels: HashMap<String, String>,
33}
34
35impl MetricDataPoint {
36    pub fn new(value: f64) -> Self {
37        MetricDataPoint {
38            timestamp: Utc::now(),
39            value,
40            labels: HashMap::new(),
41        }
42    }
43
44    pub fn with_labels(mut self, labels: HashMap<String, String>) -> Self {
45        self.labels = labels;
46        self
47    }
48
49    pub fn with_label(mut self, key: String, value: String) -> Self {
50        self.labels.insert(key, value);
51        self
52    }
53
54    pub fn add_label(&mut self, key: String, value: String) {
55        self.labels.insert(key, value);
56    }
57}
58
59/// Time-series metric with rolling window
60#[derive(Debug)]
61pub struct TimeSeriesMetric {
62    #[allow(dead_code)] // Metric name for identification (stored but not currently accessed in implementation)
63    name: String,
64    data_points: VecDeque<MetricDataPoint>,
65    max_points: usize,
66    retention_period: Duration,
67}
68
69impl TimeSeriesMetric {
70    pub fn new(name: String, max_points: usize, retention_hours: u64) -> Self {
71        TimeSeriesMetric {
72            name,
73            data_points: VecDeque::new(),
74            max_points,
75            retention_period: Duration::from_secs(retention_hours * 3600),
76        }
77    }
78
79    pub fn add_point(&mut self, point: MetricDataPoint) {
80        // Remove expired points
81        let cutoff_time = Utc::now() - chrono::Duration::from_std(self.retention_period).unwrap();
82        while let Some(front) = self.data_points.front() {
83            if front.timestamp < cutoff_time {
84                self.data_points.pop_front();
85            } else {
86                break;
87            }
88        }
89
90        // Add new point
91        self.data_points.push_back(point);
92
93        // Maintain max points limit
94        if self.data_points.len() > self.max_points {
95            self.data_points.pop_front();
96        }
97    }
98
99    pub fn get_latest(&self) -> Option<&MetricDataPoint> {
100        self.data_points.back()
101    }
102
103    pub fn get_points(&self) -> Vec<&MetricDataPoint> {
104        self.data_points.iter().collect()
105    }
106
107    pub fn get_points_in_range(
108        &self,
109        start: DateTime<Utc>,
110        end: DateTime<Utc>,
111    ) -> Vec<&MetricDataPoint> {
112        self.data_points
113            .iter()
114            .filter(|point| point.timestamp >= start && point.timestamp <= end)
115            .collect()
116    }
117
118    pub fn calculate_average(&self) -> f64 {
119        if self.data_points.is_empty() {
120            return 0.0;
121        }
122        let sum: f64 = self.data_points.iter().map(|p| p.value).sum();
123        sum / self.data_points.len() as f64
124    }
125
126    pub fn calculate_percentile(&self, percentile: f64) -> f64 {
127        if self.data_points.is_empty() {
128            return 0.0;
129        }
130
131        let mut values: Vec<f64> = self.data_points.iter().map(|p| p.value).collect();
132        values.sort_by(|a, b| a.partial_cmp(b).unwrap());
133
134        let index = ((values.len() - 1) as f64 * percentile / 100.0).round() as usize;
135        values[index.min(values.len() - 1)]
136    }
137
138    pub fn detect_anomalies(&self, threshold_multiplier: f64) -> Vec<&MetricDataPoint> {
139        if self.data_points.len() < 10 {
140            return Vec::new(); // Need enough data for anomaly detection
141        }
142
143        let mean = self.calculate_average();
144        let variance = self.calculate_variance();
145        let std_dev = variance.sqrt();
146        let threshold = std_dev * threshold_multiplier;
147
148        self.data_points
149            .iter()
150            .filter(|point| (point.value - mean).abs() > threshold)
151            .collect()
152    }
153
154    pub fn calculate_variance(&self) -> f64 {
155        if self.data_points.len() < 2 {
156            return 0.0;
157        }
158
159        let mean = self.calculate_average();
160        let sum_squared_diff: f64 = self.data_points
161            .iter()
162            .map(|p| (p.value - mean).powi(2))
163            .sum();
164        
165        sum_squared_diff / (self.data_points.len() - 1) as f64
166    }
167
168    pub fn get_trend(&self) -> UsagePattern {
169        if self.data_points.len() < 5 {
170            return UsagePattern::Stable;
171        }
172
173        // Simple linear regression to detect trend
174        let n = self.data_points.len() as f64;
175        let points: Vec<(f64, f64)> = self.data_points
176            .iter()
177            .enumerate()
178            .map(|(i, point)| (i as f64, point.value))
179            .collect();
180
181        let sum_x: f64 = points.iter().map(|(x, _)| x).sum();
182        let sum_y: f64 = points.iter().map(|(_, y)| y).sum();
183        let sum_xy: f64 = points.iter().map(|(x, y)| x * y).sum();
184        let sum_x2: f64 = points.iter().map(|(x, _)| x * x).sum();
185
186        let slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x * sum_x);
187        
188        // Calculate R² for trend strength
189        let mean_y = sum_y / n;
190        let ss_tot: f64 = points.iter().map(|(_, y)| (y - mean_y).powi(2)).sum();
191        let ss_res: f64 = points.iter().map(|(x, y)| {
192            let predicted = slope * x + (sum_y - slope * sum_x) / n;
193            (y - predicted).powi(2)
194        }).sum();
195        
196        let r_squared = 1.0 - (ss_res / ss_tot);
197
198        // Determine pattern based on slope and correlation
199        if r_squared < 0.5 {
200            UsagePattern::Volatile
201        } else if slope > 0.1 {
202            UsagePattern::Growing
203        } else if slope < -0.1 {
204            UsagePattern::Declining
205        } else {
206            UsagePattern::Stable
207        }
208    }
209}
210
211/// Aggregated metric for different time windows
212#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct AggregatedMetric {
214    pub name: String,
215    pub min: f64,
216    pub max: f64,
217    pub avg: f64,
218    pub sum: f64,
219    pub count: u64,
220    pub p50: f64,
221    pub p95: f64,
222    pub p99: f64,
223    pub start_time: DateTime<Utc>,
224    pub end_time: DateTime<Utc>,
225}
226
227impl AggregatedMetric {
228    pub fn from_points(name: String, points: &[&MetricDataPoint]) -> Self {
229        if points.is_empty() {
230            return AggregatedMetric {
231                name,
232                min: 0.0,
233                max: 0.0,
234                avg: 0.0,
235                sum: 0.0,
236                count: 0,
237                p50: 0.0,
238                p95: 0.0,
239                p99: 0.0,
240                start_time: Utc::now(),
241                end_time: Utc::now(),
242            };
243        }
244
245        let mut values: Vec<f64> = points.iter().map(|p| p.value).collect();
246        values.sort_by(|a, b| a.partial_cmp(b).unwrap());
247
248        let min = values.first().copied().unwrap_or(0.0);
249        let max = values.last().copied().unwrap_or(0.0);
250        let sum: f64 = values.iter().sum();
251        let count = values.len() as u64;
252        let avg = sum / count as f64;
253
254        let p50_idx = (count as f64 * 0.5).round() as usize;
255        let p95_idx = (count as f64 * 0.95).round() as usize;
256        let p99_idx = (count as f64 * 0.99).round() as usize;
257
258        let p50 = values.get(p50_idx.min(values.len() - 1)).copied().unwrap_or(0.0);
259        let p95 = values.get(p95_idx.min(values.len() - 1)).copied().unwrap_or(0.0);
260        let p99 = values.get(p99_idx.min(values.len() - 1)).copied().unwrap_or(0.0);
261
262        let start_time = points.iter().map(|p| p.timestamp).min().unwrap_or_else(Utc::now);
263        let end_time = points.iter().map(|p| p.timestamp).max().unwrap_or_else(Utc::now);
264
265        AggregatedMetric {
266            name,
267            min,
268            max,
269            avg,
270            sum,
271            count,
272            p50,
273            p95,
274            p99,
275            start_time,
276            end_time,
277        }
278    }
279}
280
281/// SLA (Service Level Agreement) definition and tracking
282#[derive(Debug, Clone, Serialize, Deserialize)]
283pub struct SlaDefinition {
284    pub name: String,
285    pub metric_name: String,
286    pub threshold: f64,
287    pub operator: SlaOperator,
288    pub target_percentage: f64, // e.g., 99.9% uptime
289    pub measurement_window: Duration,
290}
291
292#[derive(Debug, Clone, Serialize, Deserialize)]
293pub enum SlaOperator {
294    LessThan,
295    LessThanOrEqual,
296    GreaterThan,
297    GreaterThanOrEqual,
298    Equal,
299}
300
301/// SLA measurement result
302#[derive(Debug, Clone, Serialize, Deserialize)]
303pub struct SlaResult {
304    pub sla_name: String,
305    pub measurement_period_start: DateTime<Utc>,
306    pub measurement_period_end: DateTime<Utc>,
307    pub compliance_percentage: f64,
308    pub violations_count: u64,
309    pub total_measurements: u64,
310    pub is_compliant: bool,
311    pub breach_duration: Duration,
312}
313
314/// Alert rule for metrics
315#[derive(Debug, Clone, Serialize, Deserialize)]
316pub struct MetricAlertRule {
317    pub name: String,
318    pub metric_name: String,
319    pub threshold: f64,
320    pub operator: SlaOperator,
321    pub severity: AlertType,
322    pub evaluation_window: Duration,
323    pub cooldown_period: Duration,
324    pub enabled: bool,
325}
326
327/// Alert triggered by metric rule
328#[derive(Debug, Clone, Serialize, Deserialize)]
329pub struct MetricAlert {
330    pub id: String,
331    pub rule_name: String,
332    pub tenant_id: TenantId,
333    pub metric_name: String,
334    pub current_value: f64,
335    pub threshold: f64,
336    pub severity: AlertType,
337    pub message: String,
338    pub triggered_at: DateTime<Utc>,
339    pub resolved_at: Option<DateTime<Utc>>,
340    pub acknowledged: bool,
341}
342
343/// Dashboard configuration
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub struct TenantDashboard {
346    pub name: String,
347    pub description: String,
348    pub widgets: Vec<DashboardWidget>,
349    pub refresh_interval: Duration,
350    pub created_at: DateTime<Utc>,
351    pub updated_at: DateTime<Utc>,
352}
353
354#[derive(Debug, Clone, Serialize, Deserialize)]
355pub struct DashboardWidget {
356    pub id: String,
357    pub widget_type: WidgetType,
358    pub title: String,
359    pub metric_names: Vec<String>,
360    pub time_range: Duration,
361    pub position: WidgetPosition,
362    pub size: WidgetSize,
363    pub configuration: HashMap<String, String>,
364}
365
366#[derive(Debug, Clone, Serialize, Deserialize)]
367pub enum WidgetType {
368    LineChart,
369    AreaChart,
370    BarChart,
371    Gauge,
372    SingleValue,
373    Table,
374    Heatmap,
375}
376
377#[derive(Debug, Clone, Serialize, Deserialize)]
378pub struct WidgetPosition {
379    pub x: u32,
380    pub y: u32,
381}
382
383#[derive(Debug, Clone, Serialize, Deserialize)]
384pub struct WidgetSize {
385    pub width: u32,
386    pub height: u32,
387}
388
389/// Advanced tenant metrics collector and analyzer
390pub struct TenantMetricsCollector {
391    tenant_id: TenantId,
392    metrics: Arc<RwLock<HashMap<String, TimeSeriesMetric>>>,
393    sla_definitions: Arc<RwLock<Vec<SlaDefinition>>>,
394    alert_rules: Arc<RwLock<Vec<MetricAlertRule>>>,
395    active_alerts: Arc<RwLock<Vec<MetricAlert>>>,
396    dashboards: Arc<RwLock<Vec<TenantDashboard>>>,
397    #[allow(dead_code)] // Collection interval for automated metric collection (configured but not actively used in current implementation)
398    collection_interval: Duration,
399    #[allow(dead_code)] // Last collection timestamp for scheduling (tracked but not currently utilized)
400    last_collection: Arc<Mutex<Instant>>,
401    
402    // Pre-computed aggregations for performance
403    #[allow(dead_code)] // Hourly aggregations for performance optimization (stored but not currently queried)
404    hourly_aggregations: HourlyAggregations,
405    #[allow(dead_code)] // Daily aggregations for long-term analytics (stored but not currently accessed)
406    daily_aggregations: Arc<RwLock<BTreeMap<NaiveDate, HashMap<String, AggregatedMetric>>>>,
407}
408
409impl TenantMetricsCollector {
410    pub fn new(tenant_id: TenantId) -> Self {
411        TenantMetricsCollector {
412            tenant_id,
413            metrics: Arc::new(RwLock::new(HashMap::new())),
414            sla_definitions: Arc::new(RwLock::new(Vec::new())),
415            alert_rules: Arc::new(RwLock::new(Vec::new())),
416            active_alerts: Arc::new(RwLock::new(Vec::new())),
417            dashboards: Arc::new(RwLock::new(Vec::new())),
418            collection_interval: Duration::from_secs(60), // 1 minute default
419            last_collection: Arc::new(Mutex::new(Instant::now())),
420            hourly_aggregations: Arc::new(RwLock::new(BTreeMap::new())),
421            daily_aggregations: Arc::new(RwLock::new(BTreeMap::new())),
422        }
423    }
424
425    /// Record a metric data point
426    pub fn record_metric(&self, name: String, value: f64, labels: Option<HashMap<String, String>>) {
427        let point = if let Some(labels) = labels {
428            MetricDataPoint::new(value).with_labels(labels)
429        } else {
430            MetricDataPoint::new(value)
431        };
432
433        let mut metrics = self.metrics.write().unwrap();
434        let metric = metrics.entry(name.clone()).or_insert_with(|| {
435            TimeSeriesMetric::new(name.clone(), 10000, 24) // 10k points, 24h retention
436        });
437
438        metric.add_point(point);
439
440        // Check alert rules
441        drop(metrics); // Release the lock before checking alerts
442        self.check_alert_rules(&name, value);
443    }
444
445    /// Record multiple metrics at once
446    pub fn record_metrics(&self, metrics: Vec<MetricRecord>) {
447        for (name, value, labels) in metrics {
448            self.record_metric(name, value, labels);
449        }
450    }
451
452    /// Get current value of a metric
453    pub fn get_current_metric_value(&self, name: &str) -> Option<f64> {
454        let metrics = self.metrics.read().unwrap();
455        metrics.get(name)?.get_latest().map(|point| point.value)
456    }
457
458    /// Get metric time series data
459    pub fn get_metric_timeseries(
460        &self,
461        name: &str,
462        start: Option<DateTime<Utc>>,
463        end: Option<DateTime<Utc>>,
464    ) -> Option<Vec<MetricDataPoint>> {
465        let metrics = self.metrics.read().unwrap();
466        let metric = metrics.get(name)?;
467
468        let points = if let (Some(start), Some(end)) = (start, end) {
469            metric.get_points_in_range(start, end)
470        } else {
471            metric.get_points()
472        };
473
474        Some(points.into_iter().cloned().collect())
475    }
476
477    /// Get aggregated metrics for time window
478    pub fn get_aggregated_metrics(
479        &self,
480        names: &[String],
481        window: Duration,
482    ) -> HashMap<String, AggregatedMetric> {
483        let end_time = Utc::now();
484        let start_time = end_time - chrono::Duration::from_std(window).unwrap();
485        
486        let metrics = self.metrics.read().unwrap();
487        let mut result = HashMap::new();
488
489        for name in names {
490            if let Some(metric) = metrics.get(name) {
491                let points = metric.get_points_in_range(start_time, end_time);
492                let aggregated = AggregatedMetric::from_points(name.clone(), &points);
493                result.insert(name.clone(), aggregated);
494            }
495        }
496
497        result
498    }
499
500    /// Detect anomalies across all metrics
501    pub fn detect_anomalies(&self, threshold_multiplier: f64) -> HashMap<String, Vec<MetricDataPoint>> {
502        let metrics = self.metrics.read().unwrap();
503        let mut anomalies = HashMap::new();
504
505        for (name, metric) in metrics.iter() {
506            let anomalous_points = metric.detect_anomalies(threshold_multiplier);
507            if !anomalous_points.is_empty() {
508                anomalies.insert(
509                    name.clone(),
510                    anomalous_points.into_iter().cloned().collect()
511                );
512            }
513        }
514
515        anomalies
516    }
517
518    /// Get usage patterns for all metrics
519    pub fn get_usage_patterns(&self) -> HashMap<String, UsagePattern> {
520        let metrics = self.metrics.read().unwrap();
521        let mut patterns = HashMap::new();
522
523        for (name, metric) in metrics.iter() {
524            patterns.insert(name.clone(), metric.get_trend());
525        }
526
527        patterns
528    }
529
530    /// Add SLA definition
531    pub fn add_sla_definition(&self, sla: SlaDefinition) {
532        let mut slas = self.sla_definitions.write().unwrap();
533        slas.push(sla);
534    }
535
536    /// Check SLA compliance
537    pub fn check_sla_compliance(&self) -> Vec<SlaResult> {
538        let slas = self.sla_definitions.read().unwrap();
539        let metrics = self.metrics.read().unwrap();
540        let mut results = Vec::new();
541
542        for sla in slas.iter() {
543            if let Some(metric) = metrics.get(&sla.metric_name) {
544                let end_time = Utc::now();
545                let start_time = end_time - chrono::Duration::from_std(sla.measurement_window).unwrap();
546                let points = metric.get_points_in_range(start_time, end_time);
547
548                let mut violations = 0u64;
549                let total_measurements = points.len() as u64;
550                
551                for point in &points {
552                    let violates = match sla.operator {
553                        SlaOperator::LessThan => point.value >= sla.threshold,
554                        SlaOperator::LessThanOrEqual => point.value > sla.threshold,
555                        SlaOperator::GreaterThan => point.value <= sla.threshold,
556                        SlaOperator::GreaterThanOrEqual => point.value < sla.threshold,
557                        SlaOperator::Equal => (point.value - sla.threshold).abs() > f64::EPSILON,
558                    };
559
560                    if violates {
561                        violations += 1;
562                    }
563                }
564
565                let compliance_percentage = if total_measurements > 0 {
566                    100.0 * (total_measurements - violations) as f64 / total_measurements as f64
567                } else {
568                    100.0
569                };
570
571                results.push(SlaResult {
572                    sla_name: sla.name.clone(),
573                    measurement_period_start: start_time,
574                    measurement_period_end: end_time,
575                    compliance_percentage,
576                    violations_count: violations,
577                    total_measurements,
578                    is_compliant: compliance_percentage >= sla.target_percentage,
579                    breach_duration: Duration::from_secs(violations * 60), // Simplified
580                });
581            }
582        }
583
584        results
585    }
586
587    /// Add alert rule
588    pub fn add_alert_rule(&self, rule: MetricAlertRule) {
589        let mut rules = self.alert_rules.write().unwrap();
590        rules.push(rule);
591    }
592
593    /// Check alert rules for a specific metric
594    fn check_alert_rules(&self, metric_name: &str, current_value: f64) {
595        let rules = self.alert_rules.read().unwrap();
596        let mut active_alerts = self.active_alerts.write().unwrap();
597
598        for rule in rules.iter() {
599            if !rule.enabled || rule.metric_name != metric_name {
600                continue;
601            }
602
603            let should_trigger = match rule.operator {
604                SlaOperator::LessThan => current_value < rule.threshold,
605                SlaOperator::LessThanOrEqual => current_value <= rule.threshold,
606                SlaOperator::GreaterThan => current_value > rule.threshold,
607                SlaOperator::GreaterThanOrEqual => current_value >= rule.threshold,
608                SlaOperator::Equal => (current_value - rule.threshold).abs() < f64::EPSILON,
609            };
610
611            if should_trigger {
612                // Check if alert already exists and is within cooldown
613                let existing_alert = active_alerts.iter().any(|alert| {
614                    alert.rule_name == rule.name && 
615                    alert.resolved_at.is_none() &&
616                    (Utc::now() - alert.triggered_at).to_std().unwrap_or(Duration::ZERO) < rule.cooldown_period
617                });
618
619                if !existing_alert {
620                    let alert = MetricAlert {
621                        id: uuid::Uuid::new_v4().to_string(),
622                        rule_name: rule.name.clone(),
623                        tenant_id: self.tenant_id.clone(),
624                        metric_name: metric_name.to_string(),
625                        current_value,
626                        threshold: rule.threshold,
627                        severity: rule.severity.clone(),
628                        message: format!(
629                            "Metric {} {} {} (current: {}, threshold: {})",
630                            metric_name,
631                            match rule.operator {
632                                SlaOperator::LessThan => "is less than",
633                                SlaOperator::LessThanOrEqual => "is less than or equal to",
634                                SlaOperator::GreaterThan => "is greater than",
635                                SlaOperator::GreaterThanOrEqual => "is greater than or equal to",
636                                SlaOperator::Equal => "equals",
637                            },
638                            rule.threshold,
639                            current_value,
640                            rule.threshold
641                        ),
642                        triggered_at: Utc::now(),
643                        resolved_at: None,
644                        acknowledged: false,
645                    };
646
647                    active_alerts.push(alert);
648                }
649            }
650        }
651    }
652
653    /// Get active alerts
654    pub fn get_active_alerts(&self) -> Vec<MetricAlert> {
655        let active_alerts = self.active_alerts.read().unwrap();
656        active_alerts.clone()
657    }
658
659    /// Acknowledge alert
660    pub fn acknowledge_alert(&self, alert_id: &str) -> Result<()> {
661        let mut active_alerts = self.active_alerts.write().unwrap();
662        if let Some(alert) = active_alerts.iter_mut().find(|a| a.id == alert_id) {
663            alert.acknowledged = true;
664            Ok(())
665        } else {
666            Err(EventualiError::Tenant(format!("Alert not found: {alert_id}")))
667        }
668    }
669
670    /// Resolve alert
671    pub fn resolve_alert(&self, alert_id: &str) -> Result<()> {
672        let mut active_alerts = self.active_alerts.write().unwrap();
673        if let Some(alert) = active_alerts.iter_mut().find(|a| a.id == alert_id) {
674            alert.resolved_at = Some(Utc::now());
675            Ok(())
676        } else {
677            Err(EventualiError::Tenant(format!("Alert not found: {alert_id}")))
678        }
679    }
680
681    /// Create dashboard
682    pub fn create_dashboard(&self, dashboard: TenantDashboard) {
683        let mut dashboards = self.dashboards.write().unwrap();
684        dashboards.push(dashboard);
685    }
686
687    /// Get dashboards
688    pub fn get_dashboards(&self) -> Vec<TenantDashboard> {
689        let dashboards = self.dashboards.read().unwrap();
690        dashboards.clone()
691    }
692
693    /// Generate dashboard data
694    pub fn generate_dashboard_data(&self, dashboard_name: &str) -> Option<DashboardData> {
695        let dashboards = self.dashboards.read().unwrap();
696        let dashboard = dashboards.iter().find(|d| d.name == dashboard_name)?;
697
698        let mut widget_data = HashMap::new();
699        
700        for widget in &dashboard.widgets {
701            let time_range = chrono::Duration::from_std(widget.time_range).ok()?;
702            let start_time = Utc::now() - time_range;
703            let end_time = Utc::now();
704
705            let mut data = Vec::new();
706            for metric_name in &widget.metric_names {
707                if let Some(timeseries) = self.get_metric_timeseries(metric_name, Some(start_time), Some(end_time)) {
708                    data.push((metric_name.clone(), timeseries));
709                }
710            }
711
712            widget_data.insert(widget.id.clone(), data);
713        }
714
715        Some(DashboardData {
716            dashboard_name: dashboard_name.to_string(),
717            generated_at: Utc::now(),
718            widget_data,
719        })
720    }
721
722    /// Export metrics data for external systems
723    pub fn export_metrics(&self, format: ExportFormat, time_range: Option<(DateTime<Utc>, DateTime<Utc>)>) -> Result<String> {
724        let metrics = self.metrics.read().unwrap();
725        
726        match format {
727            ExportFormat::Json => {
728                let mut export_data = HashMap::new();
729                
730                for (name, metric) in metrics.iter() {
731                    let points = if let Some((start, end)) = time_range {
732                        metric.get_points_in_range(start, end)
733                    } else {
734                        metric.get_points()
735                    };
736                    
737                    export_data.insert(name, points.into_iter().cloned().collect::<Vec<_>>());
738                }
739                
740                Ok(serde_json::to_string_pretty(&export_data)?)
741            },
742            ExportFormat::Csv => {
743                let mut csv_data = String::new();
744                csv_data.push_str("metric_name,timestamp,value,labels\n");
745                
746                for (name, metric) in metrics.iter() {
747                    let points = if let Some((start, end)) = time_range {
748                        metric.get_points_in_range(start, end)
749                    } else {
750                        metric.get_points()
751                    };
752                    
753                    for point in points {
754                        let labels_str = if point.labels.is_empty() {
755                            String::new()
756                        } else {
757                            serde_json::to_string(&point.labels).unwrap_or_default()
758                        };
759                        
760                        csv_data.push_str(&format!(
761                            "{},{},{},{}\n",
762                            name,
763                            point.timestamp.to_rfc3339(),
764                            point.value,
765                            labels_str
766                        ));
767                    }
768                }
769                
770                Ok(csv_data)
771            },
772            ExportFormat::Prometheus => {
773                let mut prom_data = String::new();
774                
775                for (name, metric) in metrics.iter() {
776                    if let Some(latest) = metric.get_latest() {
777                        let metric_name = name.replace(['-', ' '], "_");
778                        
779                        if latest.labels.is_empty() {
780                            prom_data.push_str(&format!("{} {}\n", metric_name, latest.value));
781                        } else {
782                            let labels: Vec<String> = latest.labels.iter()
783                                .map(|(k, v)| format!("{k}=\"{v}\""))
784                                .collect();
785                            prom_data.push_str(&format!(
786                                "{}{{{}}} {}\n",
787                                metric_name,
788                                labels.join(","),
789                                latest.value
790                            ));
791                        }
792                    }
793                }
794                
795                Ok(prom_data)
796            },
797        }
798    }
799
800    /// Calculate comprehensive tenant health score
801    pub fn calculate_health_score(&self) -> TenantHealthScore {
802        let now = Utc::now();
803        let _last_hour = now - chrono::Duration::hours(1);
804        
805        // Get key metrics for health calculation
806        let error_rate = self.get_current_metric_value("error_rate").unwrap_or(0.0);
807        let response_time = self.get_current_metric_value("response_time_ms").unwrap_or(0.0);
808        let cpu_usage = self.get_current_metric_value("cpu_usage_percent").unwrap_or(0.0);
809        let memory_usage = self.get_current_metric_value("memory_usage_percent").unwrap_or(0.0);
810        let storage_usage = self.get_current_metric_value("storage_usage_percent").unwrap_or(0.0);
811        
812        // Calculate individual component scores (0-100)
813        let error_score = (100.0 - (error_rate * 100.0)).clamp(0.0, 100.0);
814        let performance_score = if response_time > 1000.0 {
815            (1000.0 / response_time * 100.0).min(100.0)
816        } else {
817            100.0
818        };
819        let cpu_score = (100.0 - cpu_usage).clamp(0.0, 100.0);
820        let memory_score = (100.0 - memory_usage).clamp(0.0, 100.0);
821        let storage_score = (100.0 - storage_usage).clamp(0.0, 100.0);
822        
823        // Calculate SLA compliance score
824        let sla_results = self.check_sla_compliance();
825        let sla_score = if sla_results.is_empty() {
826            100.0
827        } else {
828            sla_results.iter()
829                .map(|r| r.compliance_percentage)
830                .sum::<f64>() / sla_results.len() as f64
831        };
832        
833        // Calculate active alerts impact
834        let active_alerts = self.get_active_alerts();
835        let alert_penalty = active_alerts.iter()
836            .map(|alert| match alert.severity {
837                AlertType::Critical => 20.0,
838                AlertType::Exceeded => 15.0,
839                AlertType::Warning => 5.0,
840                AlertType::Violation => 25.0,
841            })
842            .sum::<f64>();
843        
844        // Weighted overall score
845        let base_score = error_score * 0.25 +
846            performance_score * 0.20 +
847            cpu_score * 0.15 +
848            memory_score * 0.15 +
849            storage_score * 0.10 +
850            sla_score * 0.15;
851        
852        let overall_score = (base_score - alert_penalty).clamp(0.0, 100.0);
853        
854        // Determine health status
855        let status = if overall_score >= 90.0 {
856            HealthStatus::Excellent
857        } else if overall_score >= 75.0 {
858            HealthStatus::Good
859        } else if overall_score >= 60.0 {
860            HealthStatus::Fair
861        } else if overall_score >= 40.0 {
862            HealthStatus::Poor
863        } else {
864            HealthStatus::Critical
865        };
866        
867        TenantHealthScore {
868            overall_score,
869            status,
870            component_scores: HashMap::from([
871                ("error_rate".to_string(), error_score),
872                ("performance".to_string(), performance_score),
873                ("cpu_usage".to_string(), cpu_score),
874                ("memory_usage".to_string(), memory_score),
875                ("storage_usage".to_string(), storage_score),
876                ("sla_compliance".to_string(), sla_score),
877            ]),
878            active_alerts_count: active_alerts.len(),
879            critical_alerts_count: active_alerts.iter().filter(|a| matches!(a.severity, AlertType::Critical | AlertType::Violation)).count(),
880            calculated_at: now,
881            recommendations: self.generate_health_recommendations(overall_score, &active_alerts),
882        }
883    }
884
885    /// Generate health recommendations based on current state
886    fn generate_health_recommendations(&self, score: f64, alerts: &[MetricAlert]) -> Vec<String> {
887        let mut recommendations = Vec::new();
888        
889        if score < 60.0 {
890            recommendations.push("🚨 Critical: Immediate attention required - system health is below acceptable levels".to_string());
891        }
892        
893        if alerts.iter().any(|a| matches!(a.severity, AlertType::Critical)) {
894            recommendations.push("🔴 Address critical alerts immediately to prevent service degradation".to_string());
895        }
896        
897        if self.get_current_metric_value("error_rate").unwrap_or(0.0) > 0.05 {
898            recommendations.push("📈 High error rate detected - investigate failing operations".to_string());
899        }
900        
901        if self.get_current_metric_value("response_time_ms").unwrap_or(0.0) > 1000.0 {
902            recommendations.push("🐌 Slow response times detected - consider performance optimization".to_string());
903        }
904        
905        let cpu_usage = self.get_current_metric_value("cpu_usage_percent").unwrap_or(0.0);
906        if cpu_usage > 80.0 {
907            recommendations.push("💻 High CPU usage - consider scaling up or optimizing workload".to_string());
908        }
909        
910        let memory_usage = self.get_current_metric_value("memory_usage_percent").unwrap_or(0.0);
911        if memory_usage > 85.0 {
912            recommendations.push("🧠 High memory usage - check for memory leaks or increase allocation".to_string());
913        }
914        
915        let storage_usage = self.get_current_metric_value("storage_usage_percent").unwrap_or(0.0);
916        if storage_usage > 90.0 {
917            recommendations.push("💾 Storage nearly full - archive old data or increase storage capacity".to_string());
918        }
919        
920        if score >= 90.0 && alerts.is_empty() {
921            recommendations.push("✅ System is operating optimally - maintain current configuration".to_string());
922        }
923        
924        recommendations
925    }
926}
927
928/// Export formats for metrics data
929#[derive(Debug, Clone)]
930pub enum ExportFormat {
931    Json,
932    Csv,
933    Prometheus,
934}
935
936/// Dashboard data structure
937#[derive(Debug, Clone, Serialize, Deserialize)]
938pub struct DashboardData {
939    pub dashboard_name: String,
940    pub generated_at: DateTime<Utc>,
941    pub widget_data: HashMap<String, Vec<(String, Vec<MetricDataPoint>)>>,
942}
943
944/// Health status levels
945#[derive(Debug, Clone, Serialize, Deserialize)]
946pub enum HealthStatus {
947    Excellent,
948    Good,
949    Fair,
950    Poor,
951    Critical,
952}
953
954/// Comprehensive tenant health score
955#[derive(Debug, Clone, Serialize, Deserialize)]
956pub struct TenantHealthScore {
957    pub overall_score: f64,
958    pub status: HealthStatus,
959    pub component_scores: HashMap<String, f64>,
960    pub active_alerts_count: usize,
961    pub critical_alerts_count: usize,
962    pub calculated_at: DateTime<Utc>,
963    pub recommendations: Vec<String>,
964}
965
966#[cfg(test)]
967mod tests {
968    use super::*;
969
970    #[test]
971    fn test_time_series_metric() {
972        let mut metric = TimeSeriesMetric::new("test_metric".to_string(), 100, 1);
973        
974        metric.add_point(MetricDataPoint::new(10.0));
975        metric.add_point(MetricDataPoint::new(20.0));
976        metric.add_point(MetricDataPoint::new(15.0));
977        
978        assert_eq!(metric.calculate_average(), 15.0);
979        assert_eq!(metric.get_points().len(), 3);
980    }
981
982    #[test]
983    fn test_tenant_metrics_collector() {
984        let tenant_id = TenantId::new("test-tenant".to_string()).unwrap();
985        let collector = TenantMetricsCollector::new(tenant_id);
986        
987        collector.record_metric("cpu_usage".to_string(), 45.0, None);
988        collector.record_metric("memory_usage".to_string(), 60.0, None);
989        
990        assert_eq!(collector.get_current_metric_value("cpu_usage"), Some(45.0));
991        assert_eq!(collector.get_current_metric_value("memory_usage"), Some(60.0));
992    }
993
994    #[test]
995    fn test_aggregated_metric() {
996        let points = vec![
997            &MetricDataPoint::new(10.0),
998            &MetricDataPoint::new(20.0),
999            &MetricDataPoint::new(30.0),
1000            &MetricDataPoint::new(40.0),
1001        ];
1002        
1003        let agg = AggregatedMetric::from_points("test".to_string(), &points);
1004        assert_eq!(agg.min, 10.0);
1005        assert_eq!(agg.max, 40.0);
1006        assert_eq!(agg.avg, 25.0);
1007        assert_eq!(agg.count, 4);
1008    }
1009}