oxirs_vec/
enhanced_performance_monitoring.rs

1//! Enhanced Performance Monitoring and Analytics System
2//!
3//! This module provides comprehensive performance monitoring, analytics dashboard,
4//! and quality assurance metrics for the vector search engine.
5
6use anyhow::{anyhow, Result};
7use parking_lot::RwLock;
8use serde::{Deserialize, Serialize};
9use std::collections::{HashMap, VecDeque};
10use std::sync::Arc;
11use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
12
13/// Configuration for performance monitoring
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct MonitoringConfig {
16    /// Enable real-time monitoring
17    pub enable_real_time: bool,
18    /// Enable detailed query logging
19    pub enable_query_logging: bool,
20    /// Enable system resource monitoring
21    pub enable_system_monitoring: bool,
22    /// Enable quality metrics collection
23    pub enable_quality_metrics: bool,
24    /// Metrics retention period
25    pub retention_period: Duration,
26    /// Sampling rate for metrics (0.0 to 1.0)
27    pub sampling_rate: f32,
28    /// Alert thresholds
29    pub alert_thresholds: AlertThresholds,
30    /// Export configuration
31    pub export_config: ExportConfig,
32}
33
34impl Default for MonitoringConfig {
35    fn default() -> Self {
36        Self {
37            enable_real_time: true,
38            enable_query_logging: true,
39            enable_system_monitoring: true,
40            enable_quality_metrics: true,
41            retention_period: Duration::from_secs(24 * 60 * 60), // 24 hours
42            sampling_rate: 1.0,
43            alert_thresholds: AlertThresholds::default(),
44            export_config: ExportConfig::default(),
45        }
46    }
47}
48
49/// Alert threshold configuration
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct AlertThresholds {
52    /// Maximum acceptable query latency (ms)
53    pub max_query_latency: f64,
54    /// Maximum acceptable error rate (0.0 to 1.0)
55    pub max_error_rate: f32,
56    /// Minimum acceptable recall@10
57    pub min_recall_at_10: f32,
58    /// Maximum memory usage (MB)
59    pub max_memory_usage: u64,
60    /// Maximum CPU usage (0.0 to 1.0)
61    pub max_cpu_usage: f32,
62}
63
64impl Default for AlertThresholds {
65    fn default() -> Self {
66        Self {
67            max_query_latency: 1000.0, // 1 second
68            max_error_rate: 0.05,      // 5%
69            min_recall_at_10: 0.90,    // 90%
70            max_memory_usage: 8192,    // 8GB
71            max_cpu_usage: 0.80,       // 80%
72        }
73    }
74}
75
76/// Export configuration for metrics
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct ExportConfig {
79    /// Export format
80    pub format: ExportFormat,
81    /// Export interval
82    pub export_interval: Duration,
83    /// Export destination
84    pub destination: ExportDestination,
85    /// Include detailed metrics
86    pub include_detailed: bool,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub enum ExportFormat {
91    JSON,
92    CSV,
93    Prometheus,
94    InfluxDB,
95    ElasticSearch,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub enum ExportDestination {
100    File(String),
101    Http(String),
102    Database(String),
103    Console,
104}
105
106impl Default for ExportConfig {
107    fn default() -> Self {
108        Self {
109            format: ExportFormat::JSON,
110            export_interval: Duration::from_secs(60),
111            destination: ExportDestination::Console,
112            include_detailed: false,
113        }
114    }
115}
116
117/// Comprehensive performance monitor
118pub struct EnhancedPerformanceMonitor {
119    config: MonitoringConfig,
120    query_metrics: Arc<RwLock<QueryMetricsCollector>>,
121    system_metrics: Arc<RwLock<SystemMetricsCollector>>,
122    quality_metrics: Arc<RwLock<QualityMetricsCollector>>,
123    alert_manager: AlertManager,
124    analytics_engine: AnalyticsEngine,
125    dashboard_data: Arc<RwLock<DashboardData>>,
126}
127
128impl EnhancedPerformanceMonitor {
129    /// Create new performance monitor
130    pub fn new(config: MonitoringConfig) -> Self {
131        Self {
132            config: config.clone(),
133            query_metrics: Arc::new(RwLock::new(QueryMetricsCollector::new())),
134            system_metrics: Arc::new(RwLock::new(SystemMetricsCollector::new())),
135            quality_metrics: Arc::new(RwLock::new(QualityMetricsCollector::new())),
136            alert_manager: AlertManager::new(config.alert_thresholds.clone()),
137            analytics_engine: AnalyticsEngine::new(),
138            dashboard_data: Arc::new(RwLock::new(DashboardData::new())),
139        }
140    }
141
142    /// Record a query execution
143    pub fn record_query(&self, query_info: QueryInfo) {
144        if !self.config.enable_real_time {
145            return;
146        }
147
148        // Check sampling rate
149        if {
150            #[allow(unused_imports)]
151            use scirs2_core::random::{Random, Rng};
152            let mut rng = Random::seed(42);
153            rng.gen_range(0.0..1.0)
154        } > self.config.sampling_rate
155        {
156            return;
157        }
158
159        // Record query metrics
160        {
161            let mut metrics = self.query_metrics.write();
162            metrics.record_query(query_info.clone());
163        }
164
165        // Check for alerts
166        self.alert_manager.check_query_alerts(&query_info);
167
168        // Update dashboard data
169        {
170            let mut dashboard = self.dashboard_data.write();
171            dashboard.update_query_stats(&query_info);
172        }
173
174        // Run analytics
175        self.analytics_engine.analyze_query(&query_info);
176    }
177
178    /// Record system metrics
179    pub fn record_system_metrics(&self, metrics: SystemMetrics) {
180        if !self.config.enable_system_monitoring {
181            return;
182        }
183
184        {
185            let mut collector = self.system_metrics.write();
186            collector.record_metrics(metrics.clone());
187        }
188
189        // Check for system alerts
190        self.alert_manager.check_system_alerts(&metrics);
191
192        // Update dashboard
193        {
194            let mut dashboard = self.dashboard_data.write();
195            dashboard.update_system_stats(&metrics);
196        }
197    }
198
199    /// Record quality metrics
200    pub fn record_quality_metrics(&self, metrics: QualityMetrics) {
201        if !self.config.enable_quality_metrics {
202            return;
203        }
204
205        {
206            let mut collector = self.quality_metrics.write();
207            collector.record_metrics(metrics.clone());
208        }
209
210        // Check for quality alerts
211        self.alert_manager.check_quality_alerts(&metrics);
212
213        // Update dashboard
214        {
215            let mut dashboard = self.dashboard_data.write();
216            dashboard.update_quality_stats(&metrics);
217        }
218    }
219
220    /// Get current dashboard data
221    pub fn get_dashboard_data(&self) -> DashboardData {
222        self.dashboard_data.read().clone()
223    }
224
225    /// Generate comprehensive analytics report
226    pub fn generate_analytics_report(&self) -> AnalyticsReport {
227        let query_stats = self.query_metrics.read().get_statistics();
228        let system_stats = self.system_metrics.read().get_statistics();
229        let quality_stats = self.quality_metrics.read().get_statistics();
230        let alerts = self.alert_manager.get_active_alerts();
231
232        AnalyticsReport {
233            timestamp: SystemTime::now(),
234            query_statistics: query_stats,
235            system_statistics: system_stats,
236            quality_statistics: quality_stats,
237            active_alerts: alerts,
238            trends: self.analytics_engine.get_trends(),
239            recommendations: self.analytics_engine.get_recommendations(),
240        }
241    }
242
243    /// Export metrics
244    pub fn export_metrics(&self) -> Result<String> {
245        let report = self.generate_analytics_report();
246
247        match self.config.export_config.format {
248            ExportFormat::JSON => serde_json::to_string_pretty(&report)
249                .map_err(|e| anyhow!("JSON serialization error: {}", e)),
250            ExportFormat::CSV => self.generate_csv_export(&report),
251            ExportFormat::Prometheus => self.generate_prometheus_export(&report),
252            _ => Err(anyhow!("Export format not yet implemented")),
253        }
254    }
255
256    /// Generate CSV export
257    fn generate_csv_export(&self, report: &AnalyticsReport) -> Result<String> {
258        let mut csv = String::new();
259        csv.push_str("metric,value,timestamp\n");
260
261        csv.push_str(&format!(
262            "total_queries,{},{}\n",
263            report.query_statistics.total_queries,
264            report
265                .timestamp
266                .duration_since(UNIX_EPOCH)
267                .unwrap()
268                .as_secs()
269        ));
270
271        csv.push_str(&format!(
272            "avg_latency,{:.2},{}\n",
273            report.query_statistics.average_latency.as_millis(),
274            report
275                .timestamp
276                .duration_since(UNIX_EPOCH)
277                .unwrap()
278                .as_secs()
279        ));
280
281        Ok(csv)
282    }
283
284    /// Generate Prometheus export
285    fn generate_prometheus_export(&self, report: &AnalyticsReport) -> Result<String> {
286        let mut prometheus = String::new();
287
288        prometheus.push_str("# HELP vector_search_queries_total Total number of queries\n");
289        prometheus.push_str("# TYPE vector_search_queries_total counter\n");
290        prometheus.push_str(&format!(
291            "vector_search_queries_total {}\n",
292            report.query_statistics.total_queries
293        ));
294
295        prometheus.push_str("# HELP vector_search_latency_seconds Query latency in seconds\n");
296        prometheus.push_str("# TYPE vector_search_latency_seconds histogram\n");
297        prometheus.push_str(&format!(
298            "vector_search_latency_seconds {:.6}\n",
299            report.query_statistics.average_latency.as_secs_f64()
300        ));
301
302        Ok(prometheus)
303    }
304
305    /// Start background monitoring
306    pub fn start_background_monitoring(&self) {
307        // In a real implementation, this would start background threads
308        // for continuous system monitoring, metric collection, etc.
309    }
310
311    /// Stop monitoring
312    pub fn stop_monitoring(&self) {
313        // Clean shutdown of monitoring systems
314    }
315}
316
317/// Query information for monitoring
318#[derive(Debug, Clone)]
319pub struct QueryInfo {
320    pub query_id: String,
321    pub query_type: QueryType,
322    pub query_text: Option<String>,
323    pub vector_dimensions: Option<usize>,
324    pub k_value: Option<usize>,
325    pub threshold: Option<f32>,
326    pub start_time: Instant,
327    pub end_time: Instant,
328    pub success: bool,
329    pub error_message: Option<String>,
330    pub results_count: usize,
331    pub index_used: Option<String>,
332    pub cache_hit: bool,
333}
334
335#[derive(Debug, Clone)]
336pub enum QueryType {
337    KNNSearch,
338    ThresholdSearch,
339    SimilarityCalculation,
340    TextEmbedding,
341    IndexUpdate,
342    Custom(String),
343}
344
345/// Query metrics collector
346#[derive(Debug)]
347pub struct QueryMetricsCollector {
348    queries: VecDeque<QueryInfo>,
349    statistics: QueryStatistics,
350    max_retention: usize,
351}
352
353impl Default for QueryMetricsCollector {
354    fn default() -> Self {
355        Self::new()
356    }
357}
358
359impl QueryMetricsCollector {
360    pub fn new() -> Self {
361        Self {
362            queries: VecDeque::new(),
363            statistics: QueryStatistics::default(),
364            max_retention: 10000, // Keep last 10k queries
365        }
366    }
367
368    pub fn record_query(&mut self, query: QueryInfo) {
369        let latency = query.end_time.duration_since(query.start_time);
370
371        // Update statistics
372        self.statistics.total_queries += 1;
373        if query.success {
374            self.statistics.successful_queries += 1;
375        } else {
376            self.statistics.failed_queries += 1;
377        }
378
379        // Update latency statistics
380        if self.statistics.total_queries == 1 {
381            self.statistics.average_latency = latency;
382            self.statistics.min_latency = latency;
383            self.statistics.max_latency = latency;
384        } else {
385            // Update running average
386            let total_time = self
387                .statistics
388                .average_latency
389                .mul_f64(self.statistics.total_queries as f64 - 1.0)
390                + latency;
391            self.statistics.average_latency =
392                total_time.div_f64(self.statistics.total_queries as f64);
393
394            if latency < self.statistics.min_latency {
395                self.statistics.min_latency = latency;
396            }
397            if latency > self.statistics.max_latency {
398                self.statistics.max_latency = latency;
399            }
400        }
401
402        // Update latency distribution
403        let latency_ms = latency.as_millis() as f64;
404        if latency_ms < 10.0 {
405            self.statistics.latency_distribution.p10 += 1;
406        } else if latency_ms < 50.0 {
407            self.statistics.latency_distribution.p50 += 1;
408        } else if latency_ms < 100.0 {
409            self.statistics.latency_distribution.p90 += 1;
410        } else if latency_ms < 500.0 {
411            self.statistics.latency_distribution.p95 += 1;
412        } else {
413            self.statistics.latency_distribution.p99 += 1;
414        }
415
416        // Cache hit rate
417        if query.cache_hit {
418            self.statistics.cache_hit_rate =
419                (self.statistics.cache_hit_rate * (self.statistics.total_queries - 1) as f32 + 1.0)
420                    / self.statistics.total_queries as f32;
421        } else {
422            self.statistics.cache_hit_rate = (self.statistics.cache_hit_rate
423                * (self.statistics.total_queries - 1) as f32)
424                / self.statistics.total_queries as f32;
425        }
426
427        // Store query for retention
428        self.queries.push_back(query);
429        if self.queries.len() > self.max_retention {
430            self.queries.pop_front();
431        }
432    }
433
434    pub fn get_statistics(&self) -> QueryStatistics {
435        self.statistics.clone()
436    }
437
438    pub fn get_recent_queries(&self, count: usize) -> Vec<&QueryInfo> {
439        self.queries.iter().rev().take(count).collect()
440    }
441}
442
443/// Query statistics
444#[derive(Debug, Clone, Default, Serialize, Deserialize)]
445pub struct QueryStatistics {
446    pub total_queries: u64,
447    pub successful_queries: u64,
448    pub failed_queries: u64,
449    pub average_latency: Duration,
450    pub min_latency: Duration,
451    pub max_latency: Duration,
452    pub latency_distribution: LatencyDistribution,
453    pub cache_hit_rate: f32,
454    pub throughput_qps: f32,
455}
456
457#[derive(Debug, Clone, Default, Serialize, Deserialize)]
458pub struct LatencyDistribution {
459    pub p10: u64, // < 10ms
460    pub p50: u64, // < 50ms
461    pub p90: u64, // < 100ms
462    pub p95: u64, // < 500ms
463    pub p99: u64, // >= 500ms
464}
465
466/// System metrics
467#[derive(Debug, Clone, Serialize, Deserialize)]
468pub struct SystemMetrics {
469    pub timestamp: SystemTime,
470    pub cpu_usage: f32,
471    pub memory_usage: u64, // bytes
472    pub memory_total: u64, // bytes
473    pub disk_usage: u64,   // bytes
474    pub disk_total: u64,   // bytes
475    pub network_in: u64,   // bytes/sec
476    pub network_out: u64,  // bytes/sec
477    pub open_file_descriptors: u32,
478    pub thread_count: u32,
479}
480
481/// System metrics collector
482#[derive(Debug)]
483pub struct SystemMetricsCollector {
484    metrics_history: VecDeque<SystemMetrics>,
485    statistics: SystemStatistics,
486    max_retention: usize,
487}
488
489impl Default for SystemMetricsCollector {
490    fn default() -> Self {
491        Self::new()
492    }
493}
494
495impl SystemMetricsCollector {
496    pub fn new() -> Self {
497        Self {
498            metrics_history: VecDeque::new(),
499            statistics: SystemStatistics::default(),
500            max_retention: 1440, // 24 hours of minute-by-minute data
501        }
502    }
503
504    pub fn record_metrics(&mut self, metrics: SystemMetrics) {
505        // Update statistics
506        self.statistics.current_cpu_usage = metrics.cpu_usage;
507        self.statistics.current_memory_usage = metrics.memory_usage;
508
509        if metrics.cpu_usage > self.statistics.peak_cpu_usage {
510            self.statistics.peak_cpu_usage = metrics.cpu_usage;
511        }
512
513        if metrics.memory_usage > self.statistics.peak_memory_usage {
514            self.statistics.peak_memory_usage = metrics.memory_usage;
515        }
516
517        // Store metrics
518        self.metrics_history.push_back(metrics);
519        if self.metrics_history.len() > self.max_retention {
520            self.metrics_history.pop_front();
521        }
522    }
523
524    pub fn get_statistics(&self) -> SystemStatistics {
525        self.statistics.clone()
526    }
527
528    pub fn get_recent_metrics(&self, count: usize) -> Vec<&SystemMetrics> {
529        self.metrics_history.iter().rev().take(count).collect()
530    }
531}
532
533/// System statistics
534#[derive(Debug, Clone, Default, Serialize, Deserialize)]
535pub struct SystemStatistics {
536    pub current_cpu_usage: f32,
537    pub peak_cpu_usage: f32,
538    pub current_memory_usage: u64,
539    pub peak_memory_usage: u64,
540    pub average_cpu_usage: f32,
541    pub average_memory_usage: u64,
542}
543
544/// Quality metrics
545#[derive(Debug, Clone, Serialize, Deserialize)]
546pub struct QualityMetrics {
547    pub timestamp: SystemTime,
548    pub precision_at_1: f32,
549    pub precision_at_5: f32,
550    pub precision_at_10: f32,
551    pub recall_at_1: f32,
552    pub recall_at_5: f32,
553    pub recall_at_10: f32,
554    pub f1_score: f32,
555    pub mrr: f32,  // Mean Reciprocal Rank
556    pub ndcg: f32, // Normalized Discounted Cumulative Gain
557    pub query_coverage: f32,
558    pub result_diversity: f32,
559}
560
561/// Quality metrics collector
562#[derive(Debug)]
563pub struct QualityMetricsCollector {
564    metrics_history: VecDeque<QualityMetrics>,
565    statistics: QualityStatistics,
566    max_retention: usize,
567}
568
569impl Default for QualityMetricsCollector {
570    fn default() -> Self {
571        Self::new()
572    }
573}
574
575impl QualityMetricsCollector {
576    pub fn new() -> Self {
577        Self {
578            metrics_history: VecDeque::new(),
579            statistics: QualityStatistics::default(),
580            max_retention: 1000,
581        }
582    }
583
584    pub fn record_metrics(&mut self, metrics: QualityMetrics) {
585        // Update running statistics
586        let count = self.metrics_history.len() as f32;
587        if count > 0.0 {
588            self.statistics.average_precision_at_10 =
589                (self.statistics.average_precision_at_10 * count + metrics.precision_at_10)
590                    / (count + 1.0);
591            self.statistics.average_recall_at_10 = (self.statistics.average_recall_at_10 * count
592                + metrics.recall_at_10)
593                / (count + 1.0);
594            self.statistics.average_f1_score =
595                (self.statistics.average_f1_score * count + metrics.f1_score) / (count + 1.0);
596        } else {
597            self.statistics.average_precision_at_10 = metrics.precision_at_10;
598            self.statistics.average_recall_at_10 = metrics.recall_at_10;
599            self.statistics.average_f1_score = metrics.f1_score;
600        }
601
602        // Store metrics
603        self.metrics_history.push_back(metrics);
604        if self.metrics_history.len() > self.max_retention {
605            self.metrics_history.pop_front();
606        }
607    }
608
609    pub fn get_statistics(&self) -> QualityStatistics {
610        self.statistics.clone()
611    }
612}
613
614/// Quality statistics
615#[derive(Debug, Clone, Default, Serialize, Deserialize)]
616pub struct QualityStatistics {
617    pub average_precision_at_10: f32,
618    pub average_recall_at_10: f32,
619    pub average_f1_score: f32,
620    pub trend_precision: f32,
621    pub trend_recall: f32,
622}
623
624/// Alert manager
625pub struct AlertManager {
626    thresholds: AlertThresholds,
627    active_alerts: Arc<RwLock<Vec<Alert>>>,
628}
629
630impl AlertManager {
631    pub fn new(thresholds: AlertThresholds) -> Self {
632        Self {
633            thresholds,
634            active_alerts: Arc::new(RwLock::new(Vec::new())),
635        }
636    }
637
638    pub fn check_query_alerts(&self, query: &QueryInfo) {
639        let latency_ms = query.end_time.duration_since(query.start_time).as_millis() as f64;
640
641        if latency_ms > self.thresholds.max_query_latency {
642            self.add_alert(Alert {
643                alert_type: AlertType::HighLatency,
644                severity: AlertSeverity::Warning,
645                message: format!(
646                    "Query latency {}ms exceeds threshold {}ms",
647                    latency_ms, self.thresholds.max_query_latency
648                ),
649                timestamp: SystemTime::now(),
650                source: "query_monitor".to_string(),
651            });
652        }
653    }
654
655    pub fn check_system_alerts(&self, metrics: &SystemMetrics) {
656        if metrics.cpu_usage > self.thresholds.max_cpu_usage {
657            self.add_alert(Alert {
658                alert_type: AlertType::HighCpuUsage,
659                severity: AlertSeverity::Warning,
660                message: format!(
661                    "CPU usage {:.1}% exceeds threshold {:.1}%",
662                    metrics.cpu_usage * 100.0,
663                    self.thresholds.max_cpu_usage * 100.0
664                ),
665                timestamp: SystemTime::now(),
666                source: "system_monitor".to_string(),
667            });
668        }
669
670        let memory_mb = metrics.memory_usage / (1024 * 1024);
671        if memory_mb > self.thresholds.max_memory_usage {
672            self.add_alert(Alert {
673                alert_type: AlertType::HighMemoryUsage,
674                severity: AlertSeverity::Critical,
675                message: format!(
676                    "Memory usage {}MB exceeds threshold {}MB",
677                    memory_mb, self.thresholds.max_memory_usage
678                ),
679                timestamp: SystemTime::now(),
680                source: "system_monitor".to_string(),
681            });
682        }
683    }
684
685    pub fn check_quality_alerts(&self, metrics: &QualityMetrics) {
686        if metrics.recall_at_10 < self.thresholds.min_recall_at_10 {
687            self.add_alert(Alert {
688                alert_type: AlertType::LowRecall,
689                severity: AlertSeverity::Warning,
690                message: format!(
691                    "Recall@10 {:.3} below threshold {:.3}",
692                    metrics.recall_at_10, self.thresholds.min_recall_at_10
693                ),
694                timestamp: SystemTime::now(),
695                source: "quality_monitor".to_string(),
696            });
697        }
698    }
699
700    fn add_alert(&self, alert: Alert) {
701        let mut alerts = self.active_alerts.write();
702        alerts.push(alert);
703
704        // Keep only recent alerts (last hour)
705        let cutoff = SystemTime::now() - Duration::from_secs(3600);
706        alerts.retain(|a| a.timestamp > cutoff);
707    }
708
709    pub fn get_active_alerts(&self) -> Vec<Alert> {
710        self.active_alerts.read().clone()
711    }
712}
713
714/// Alert information
715#[derive(Debug, Clone, Serialize, Deserialize)]
716pub struct Alert {
717    pub alert_type: AlertType,
718    pub severity: AlertSeverity,
719    pub message: String,
720    pub timestamp: SystemTime,
721    pub source: String,
722}
723
724#[derive(Debug, Clone, Serialize, Deserialize)]
725pub enum AlertType {
726    HighLatency,
727    HighCpuUsage,
728    HighMemoryUsage,
729    LowRecall,
730    HighErrorRate,
731    IndexCorruption,
732    ServiceDown,
733}
734
735#[derive(Debug, Clone, Serialize, Deserialize)]
736pub enum AlertSeverity {
737    Info,
738    Warning,
739    Critical,
740    Emergency,
741}
742
743/// Analytics engine for trend analysis and recommendations
744pub struct AnalyticsEngine {
745    trends: Arc<RwLock<HashMap<String, TrendData>>>,
746    recommendations: Arc<RwLock<Vec<Recommendation>>>,
747}
748
749impl Default for AnalyticsEngine {
750    fn default() -> Self {
751        Self::new()
752    }
753}
754
755impl AnalyticsEngine {
756    pub fn new() -> Self {
757        Self {
758            trends: Arc::new(RwLock::new(HashMap::new())),
759            recommendations: Arc::new(RwLock::new(Vec::new())),
760        }
761    }
762
763    pub fn analyze_query(&self, _query: &QueryInfo) {
764        // Placeholder for trend analysis
765        // In a full implementation, this would update trend data
766    }
767
768    pub fn get_trends(&self) -> HashMap<String, TrendData> {
769        self.trends.read().clone()
770    }
771
772    pub fn get_recommendations(&self) -> Vec<Recommendation> {
773        self.recommendations.read().clone()
774    }
775}
776
777/// Trend data for analytics
778#[derive(Debug, Clone, Serialize, Deserialize)]
779pub struct TrendData {
780    pub metric_name: String,
781    pub values: Vec<f64>,
782    pub timestamps: Vec<SystemTime>,
783    pub trend_direction: TrendDirection,
784    pub confidence: f32,
785}
786
787#[derive(Debug, Clone, Serialize, Deserialize)]
788pub enum TrendDirection {
789    Increasing,
790    Decreasing,
791    Stable,
792    Volatile,
793}
794
795/// Recommendation for system optimization
796#[derive(Debug, Clone, Serialize, Deserialize)]
797pub struct Recommendation {
798    pub category: RecommendationCategory,
799    pub priority: RecommendationPriority,
800    pub title: String,
801    pub description: String,
802    pub estimated_impact: String,
803    pub implementation_effort: String,
804}
805
806#[derive(Debug, Clone, Serialize, Deserialize)]
807pub enum RecommendationCategory {
808    Performance,
809    Quality,
810    ResourceOptimization,
811    Configuration,
812    Maintenance,
813}
814
815#[derive(Debug, Clone, Serialize, Deserialize)]
816pub enum RecommendationPriority {
817    Low,
818    Medium,
819    High,
820    Critical,
821}
822
823/// Dashboard data structure
824#[derive(Debug, Clone, Serialize, Deserialize)]
825pub struct DashboardData {
826    pub last_updated: SystemTime,
827    pub query_stats: QueryStatistics,
828    pub system_stats: SystemStatistics,
829    pub quality_stats: QualityStatistics,
830    pub alerts_count: usize,
831    pub trends: HashMap<String, Vec<f64>>,
832}
833
834impl DashboardData {
835    pub fn new() -> Self {
836        Self {
837            last_updated: SystemTime::now(),
838            query_stats: QueryStatistics::default(),
839            system_stats: SystemStatistics::default(),
840            quality_stats: QualityStatistics::default(),
841            alerts_count: 0,
842            trends: HashMap::new(),
843        }
844    }
845
846    pub fn update_query_stats(&mut self, _query: &QueryInfo) {
847        // Update query statistics
848        self.last_updated = SystemTime::now();
849        // Additional updates would be implemented here
850    }
851
852    pub fn update_system_stats(&mut self, _metrics: &SystemMetrics) {
853        // Update system statistics
854        self.last_updated = SystemTime::now();
855        // Additional updates would be implemented here
856    }
857
858    pub fn update_quality_stats(&mut self, _metrics: &QualityMetrics) {
859        // Update quality statistics
860        self.last_updated = SystemTime::now();
861        // Additional updates would be implemented here
862    }
863}
864
865impl Default for DashboardData {
866    fn default() -> Self {
867        Self::new()
868    }
869}
870
871/// Complete analytics report
872#[derive(Debug, Clone, Serialize, Deserialize)]
873pub struct AnalyticsReport {
874    pub timestamp: SystemTime,
875    pub query_statistics: QueryStatistics,
876    pub system_statistics: SystemStatistics,
877    pub quality_statistics: QualityStatistics,
878    pub active_alerts: Vec<Alert>,
879    pub trends: HashMap<String, TrendData>,
880    pub recommendations: Vec<Recommendation>,
881}
882
883#[cfg(test)]
884mod tests {
885    use super::*;
886
887    #[test]
888    fn test_query_metrics_collection() {
889        let mut collector = QueryMetricsCollector::new();
890
891        let query = QueryInfo {
892            query_id: "test_query".to_string(),
893            query_type: QueryType::KNNSearch,
894            query_text: Some("test query".to_string()),
895            vector_dimensions: Some(384),
896            k_value: Some(10),
897            threshold: None,
898            start_time: Instant::now() - Duration::from_millis(50),
899            end_time: Instant::now(),
900            success: true,
901            error_message: None,
902            results_count: 5,
903            index_used: Some("hnsw".to_string()),
904            cache_hit: false,
905        };
906
907        collector.record_query(query);
908
909        let stats = collector.get_statistics();
910        assert_eq!(stats.total_queries, 1);
911        assert_eq!(stats.successful_queries, 1);
912        assert_eq!(stats.failed_queries, 0);
913    }
914
915    #[test]
916    fn test_alert_generation() {
917        let thresholds = AlertThresholds {
918            max_query_latency: 100.0,
919            max_error_rate: 0.05,
920            min_recall_at_10: 0.90,
921            max_memory_usage: 1024,
922            max_cpu_usage: 0.80,
923        };
924
925        let alert_manager = AlertManager::new(thresholds);
926
927        let query = QueryInfo {
928            query_id: "slow_query".to_string(),
929            query_type: QueryType::KNNSearch,
930            query_text: None,
931            vector_dimensions: None,
932            k_value: None,
933            threshold: None,
934            start_time: Instant::now() - Duration::from_millis(200), // Exceeds threshold
935            end_time: Instant::now(),
936            success: true,
937            error_message: None,
938            results_count: 5,
939            index_used: None,
940            cache_hit: false,
941        };
942
943        alert_manager.check_query_alerts(&query);
944
945        let alerts = alert_manager.get_active_alerts();
946        assert_eq!(alerts.len(), 1);
947        assert!(matches!(alerts[0].alert_type, AlertType::HighLatency));
948    }
949
950    #[test]
951    fn test_performance_monitor() {
952        let config = MonitoringConfig::default();
953        let monitor = EnhancedPerformanceMonitor::new(config);
954
955        let query = QueryInfo {
956            query_id: "test".to_string(),
957            query_type: QueryType::KNNSearch,
958            query_text: None,
959            vector_dimensions: Some(384),
960            k_value: Some(10),
961            threshold: None,
962            start_time: Instant::now() - Duration::from_millis(25),
963            end_time: Instant::now(),
964            success: true,
965            error_message: None,
966            results_count: 8,
967            index_used: Some("hnsw".to_string()),
968            cache_hit: true,
969        };
970
971        monitor.record_query(query);
972
973        let dashboard = monitor.get_dashboard_data();
974        assert!(dashboard.last_updated <= SystemTime::now());
975    }
976}
oxirs_vec/enhanced_performance_monitoring.rs

oxirs_vec/
enhanced_performance_monitoring.rs