scirs2_stats/
cross_platform_regression_detection.rs

1//! Cross-platform performance regression detection system for scirs2-stats v1.0.0
2//!
3//! This module provides comprehensive performance regression detection across different
4//! platforms, architectures, and compiler configurations. It addresses the v1.0.0
5//! roadmap goals for "Cross-platform Testing" and "Performance & Optimization".
6//!
7//! Features:
8//! - Multi-platform benchmark execution and comparison
9//! - Statistical significance testing for performance changes
10//! - Hardware-aware performance baselines
11//! - Automated regression detection with confidence intervals
12//! - Performance trend analysis and prediction
13//! - Integration with CI/CD pipelines
14
15use crate::error::{StatsError, StatsResult};
16use scirs2_core::ndarray::Array1;
17use serde::{Deserialize, Serialize};
18use std::collections::{BTreeMap, HashMap};
19use std::fs;
20use std::path::Path;
21use std::time::{SystemTime, UNIX_EPOCH};
22
23/// Cross-platform regression detection configuration
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct CrossPlatformRegressionConfig {
26    /// Baseline data storage path
27    pub baseline_storage_path: String,
28    /// Performance regression threshold (as percentage)
29    pub regression_threshold_percent: f64,
30    /// Statistical significance level for regression detection
31    pub significance_level: f64,
32    /// Minimum number of samples for statistical analysis
33    pub min_samples_: usize,
34    /// Maximum historical data retention (days)
35    pub maxdata_retention_days: usize,
36    /// Enable platform-specific baselines
37    pub platform_specificbaselines: bool,
38    /// Enable hardware-aware normalization
39    pub hardware_aware_normalization: bool,
40    /// Enable compiler optimization detection
41    pub compiler_optimization_detection: bool,
42    /// Enable trend analysis
43    pub trend_analysis: bool,
44    /// Platforms to compare against
45    pub target_platforms: Vec<PlatformInfo>,
46    /// Functions to monitor for regressions
47    pub monitored_functions: Vec<String>,
48}
49
50impl Default for CrossPlatformRegressionConfig {
51    fn default() -> Self {
52        Self {
53            baseline_storage_path: "./performancebaselines".to_string(),
54            regression_threshold_percent: 10.0, // 10% performance degradation
55            significance_level: 0.05,
56            min_samples_: 30,
57            maxdata_retention_days: 90,
58            platform_specificbaselines: true,
59            hardware_aware_normalization: true,
60            compiler_optimization_detection: true,
61            trend_analysis: true,
62            target_platforms: vec![PlatformInfo::current_platform()],
63            monitored_functions: vec![
64                "mean".to_string(),
65                "std".to_string(),
66                "variance".to_string(),
67                "pearsonr".to_string(),
68                "ttest_ind".to_string(),
69                "norm_pdf".to_string(),
70                "norm_cdf".to_string(),
71            ],
72        }
73    }
74}
75
76/// Platform information for cross-platform comparison
77#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
78pub struct PlatformInfo {
79    /// Operating system
80    pub os: String,
81    /// CPU architecture
82    pub arch: String,
83    /// CPU model
84    pub cpu_model: String,
85    /// Number of CPU cores
86    pub cpu_cores: usize,
87    /// Memory size in GB
88    pub memory_gb: usize,
89    /// Rust compiler version
90    pub rustc_version: String,
91    /// Optimization level
92    pub optimization_level: String,
93    /// SIMD capabilities
94    pub simd_capabilities: Vec<String>,
95}
96
97impl PlatformInfo {
98    /// Get current platform information
99    pub fn current_platform() -> Self {
100        Self {
101            os: std::env::consts::OS.to_string(),
102            arch: std::env::consts::ARCH.to_string(),
103            cpu_model: Self::detect_cpu_model(),
104            cpu_cores: num_cpus::get(),
105            memory_gb: Self::detect_memory_gb(),
106            rustc_version: Self::detect_rustc_version(),
107            optimization_level: Self::detect_optimization_level(),
108            simd_capabilities: Self::detect_simd_capabilities(),
109        }
110    }
111
112    fn detect_cpu_model() -> String {
113        // Simplified CPU model detection
114        #[cfg(target_arch = "x86_64")]
115        {
116            if is_x86_feature_detected!("avx512f") {
117                "Intel AVX-512 Compatible".to_string()
118            } else if is_x86_feature_detected!("avx2") {
119                "Intel AVX2 Compatible".to_string()
120            } else if is_x86_feature_detected!("sse4.1") {
121                "Intel SSE4.1 Compatible".to_string()
122            } else {
123                "x86_64 Generic".to_string()
124            }
125        }
126        #[cfg(not(target_arch = "x86_64"))]
127        {
128            std::env::consts::ARCH.to_string()
129        }
130    }
131
132    fn detect_memory_gb() -> usize {
133        // Simplified memory detection - would use system APIs in real implementation
134        8 // Default to 8GB
135    }
136
137    fn detect_rustc_version() -> String {
138        option_env!("RUSTC_VERSION")
139            .unwrap_or("unknown")
140            .to_string()
141    }
142
143    fn detect_optimization_level() -> String {
144        #[cfg(debug_assertions)]
145        {
146            "debug".to_string()
147        }
148        #[cfg(not(debug_assertions))]
149        {
150            "release".to_string()
151        }
152    }
153
154    fn detect_simd_capabilities() -> Vec<String> {
155        let mut capabilities = Vec::new();
156
157        #[cfg(target_arch = "x86_64")]
158        {
159            if is_x86_feature_detected!("sse2") {
160                capabilities.push("sse2".to_string());
161            }
162            if is_x86_feature_detected!("sse4.1") {
163                capabilities.push("sse4.1".to_string());
164            }
165            if is_x86_feature_detected!("avx") {
166                capabilities.push("avx".to_string());
167            }
168            if is_x86_feature_detected!("avx2") {
169                capabilities.push("avx2".to_string());
170            }
171            if is_x86_feature_detected!("avx512f") {
172                capabilities.push("avx512f".to_string());
173            }
174            if is_x86_feature_detected!("fma") {
175                capabilities.push("fma".to_string());
176            }
177        }
178
179        capabilities
180    }
181
182    /// Generate a unique platform identifier
183    pub fn platform_id(&self) -> String {
184        format!(
185            "{}-{}-{}-{}",
186            self.os,
187            self.arch,
188            self.optimization_level,
189            self.simd_capabilities.join("_")
190        )
191    }
192}
193
194/// Performance baseline data for a specific function and platform
195#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct PerformanceBaseline {
197    /// Platform information
198    pub platform: PlatformInfo,
199    /// Function name
200    pub function_name: String,
201    /// Input size or parameters
202    pub input_parameters: String,
203    /// Historical performance measurements
204    pub measurements: Vec<PerformanceMeasurement>,
205    /// Statistical summary
206    pub statistics: BaselineStatistics,
207    /// Last updated timestamp
208    pub last_updated: u64,
209}
210
211/// Individual performance measurement
212#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct PerformanceMeasurement {
214    /// Timestamp of measurement
215    pub timestamp: u64,
216    /// Execution time in nanoseconds
217    pub execution_time_ns: f64,
218    /// Memory usage in bytes
219    pub memory_usage_bytes: usize,
220    /// Number of iterations
221    pub iterations: usize,
222    /// Hardware context information
223    pub hardware_context: HardwareContext,
224    /// Compiler context information
225    pub compiler_context: CompilerContext,
226}
227
228/// Hardware context during measurement
229#[derive(Debug, Clone, Serialize, Deserialize)]
230pub struct HardwareContext {
231    /// CPU utilization percentage
232    pub cpu_utilization: f64,
233    /// Available memory percentage
234    pub available_memory_percent: f64,
235    /// CPU frequency in MHz
236    pub cpu_frequency_mhz: f64,
237    /// Temperature in Celsius (if available)
238    pub temperature_celsius: Option<f64>,
239}
240
241/// Compiler context during measurement
242#[derive(Debug, Clone, Serialize, Deserialize)]
243pub struct CompilerContext {
244    /// Rust compiler version
245    pub rustc_version: String,
246    /// Target triple
247    pub target_triple: String,
248    /// Optimization flags
249    pub optimization_flags: Vec<String>,
250    /// Feature flags
251    pub feature_flags: Vec<String>,
252}
253
254/// Statistical summary of baseline performance
255#[derive(Debug, Clone, Serialize, Deserialize)]
256pub struct BaselineStatistics {
257    /// Mean execution time
258    pub mean_time_ns: f64,
259    /// Standard deviation of execution time
260    pub std_dev_time_ns: f64,
261    /// Median execution time
262    pub median_time_ns: f64,
263    /// 95th percentile execution time
264    pub p95_time_ns: f64,
265    /// 99th percentile execution time
266    pub p99_time_ns: f64,
267    /// Coefficient of variation
268    pub coefficient_of_variation: f64,
269    /// Confidence interval (95%) for mean
270    pub confidence_interval_95: (f64, f64),
271    /// Number of samples
272    pub sample_count: usize,
273}
274
275/// Performance regression analysis results
276#[derive(Debug, Clone, Serialize, Deserialize)]
277pub struct RegressionAnalysisResult {
278    /// Function being analyzed
279    pub function_name: String,
280    /// Platform comparison
281    pub platform_comparison: PlatformComparison,
282    /// Current performance measurement
283    pub current_measurement: PerformanceMeasurement,
284    /// Baseline performance for comparison
285    pub baseline_performance: BaselineStatistics,
286    /// Regression detection result
287    pub regression_detected: bool,
288    /// Performance change percentage (positive = slower, negative = faster)
289    pub performance_change_percent: f64,
290    /// Statistical significance of the change
291    pub statistical_significance: f64,
292    /// Confidence level of regression detection
293    pub confidence_level: f64,
294    /// Trend analysis (if enabled)
295    pub trend_analysis: Option<TrendAnalysis>,
296    /// Recommendations
297    pub recommendations: Vec<PerformanceRecommendation>,
298}
299
300/// Platform comparison information
301#[derive(Debug, Clone, Serialize, Deserialize)]
302pub struct PlatformComparison {
303    /// Current platform
304    pub current_platform: PlatformInfo,
305    /// Baseline platform
306    pub baseline_platform: PlatformInfo,
307    /// Hardware normalization factor applied
308    pub hardware_normalization_factor: f64,
309    /// Platform similarity score (0-1)
310    pub platform_similarity_score: f64,
311}
312
313/// Trend analysis for performance over time
314#[derive(Debug, Clone, Serialize, Deserialize)]
315pub struct TrendAnalysis {
316    /// Trend direction
317    pub trend_direction: TrendDirection,
318    /// Trend strength (0-1)
319    pub trend_strength: f64,
320    /// Linear regression slope (performance change per day)
321    pub slope_ns_per_day: f64,
322    /// R-squared value of trend fit
323    pub r_squared: f64,
324    /// Predicted performance in 30 days
325    pub predicted_performance_30d: f64,
326    /// Statistical significance of trend
327    pub trend_significance: f64,
328}
329
330/// Direction of performance trend
331#[derive(Debug, Clone, Serialize, Deserialize)]
332pub enum TrendDirection {
333    Improving,
334    Stable,
335    Degrading,
336    Volatile,
337}
338
339/// Performance optimization recommendations
340#[derive(Debug, Clone, Serialize, Deserialize)]
341pub struct PerformanceRecommendation {
342    /// Recommendation category
343    pub category: RecommendationCategory,
344    /// Priority level
345    pub priority: RecommendationPriority,
346    /// Description of the recommendation
347    pub description: String,
348    /// Expected impact if implemented
349    pub expected_impact_percent: f64,
350    /// Confidence in the recommendation (0-1)
351    pub confidence: f64,
352}
353
354/// Categories of performance recommendations
355#[derive(Debug, Clone, Serialize, Deserialize)]
356pub enum RecommendationCategory {
357    CompilerOptimization,
358    AlgorithmSelection,
359    SIMDOptimization,
360    MemoryOptimization,
361    ParallelProcessing,
362    PlatformSpecific,
363    HardwareUpgrade,
364}
365
366/// Priority levels for recommendations
367#[derive(Debug, Clone, Serialize, Deserialize)]
368pub enum RecommendationPriority {
369    Low,
370    Medium,
371    High,
372    Critical,
373}
374
375/// Cross-platform regression detection system
376pub struct CrossPlatformRegressionDetector {
377    config: CrossPlatformRegressionConfig,
378    baselines: HashMap<String, PerformanceBaseline>,
379    historicaldata: BTreeMap<u64, Vec<PerformanceMeasurement>>,
380}
381
382impl CrossPlatformRegressionDetector {
383    /// Create a new regression detector
384    pub fn new(config: CrossPlatformRegressionConfig) -> StatsResult<Self> {
385        let mut detector = Self {
386            config,
387            baselines: HashMap::new(),
388            historicaldata: BTreeMap::new(),
389        };
390
391        detector.loadbaselines()?;
392        Ok(detector)
393    }
394
395    /// Load existing baseline data from storage
396    fn loadbaselines(&mut self) -> StatsResult<()> {
397        if !Path::new(&self.config.baseline_storage_path).exists() {
398            fs::create_dir_all(&self.config.baseline_storage_path).map_err(|e| {
399                StatsError::InvalidInput(format!("Failed to create baseline directory: {}", e))
400            })?;
401            return Ok(());
402        }
403
404        // Load baseline files from storage
405        let baseline_dir = Path::new(&self.config.baseline_storage_path);
406        if let Ok(entries) = fs::read_dir(baseline_dir) {
407            for entry in entries.flatten() {
408                let path = entry.path();
409                if path.extension().is_some_and(|ext| ext == "json") {
410                    if let Ok(content) = fs::read_to_string(&path) {
411                        if let Ok(baseline) = serde_json::from_str::<PerformanceBaseline>(&content)
412                        {
413                            let key = format!(
414                                "{}-{}",
415                                baseline.platform.platform_id(),
416                                baseline.function_name
417                            );
418                            self.baselines.insert(key, baseline);
419                        }
420                    }
421                }
422            }
423        }
424
425        Ok(())
426    }
427
428    /// Save baseline data to storage
429    fn savebaseline(&self, baseline: &PerformanceBaseline) -> StatsResult<()> {
430        let filename = format!(
431            "{}-{}.json",
432            baseline.platform.platform_id(),
433            baseline.function_name
434        );
435        let filepath = Path::new(&self.config.baseline_storage_path).join(filename);
436
437        let content = serde_json::to_string_pretty(baseline).map_err(|e| {
438            StatsError::InvalidInput(format!("Failed to serialize baseline: {}", e))
439        })?;
440
441        fs::write(filepath, content)
442            .map_err(|e| StatsError::InvalidInput(format!("Failed to write baseline: {}", e)))?;
443
444        Ok(())
445    }
446
447    /// Record a new performance measurement
448    pub fn record_measurement(
449        &mut self,
450        function_name: &str,
451        input_parameters: &str,
452        execution_time_ns: f64,
453        memory_usage_bytes: usize,
454        iterations: usize,
455    ) -> StatsResult<()> {
456        let platform = PlatformInfo::current_platform();
457        let timestamp = SystemTime::now()
458            .duration_since(UNIX_EPOCH)
459            .expect("Operation failed")
460            .as_secs();
461
462        let measurement = PerformanceMeasurement {
463            timestamp,
464            execution_time_ns,
465            memory_usage_bytes,
466            iterations,
467            hardware_context: self.capture_hardware_context()?,
468            compiler_context: self.capture_compiler_context()?,
469        };
470
471        // Add to historical data
472        self.historicaldata
473            .entry(timestamp)
474            .or_default()
475            .push(measurement.clone());
476
477        // Update or create baseline
478        let baseline_key = format!("{}-{}", platform.platform_id(), function_name);
479
480        if let Some(baseline) = self.baselines.get_mut(&baseline_key) {
481            baseline.measurements.push(measurement);
482            baseline.last_updated = timestamp;
483            // Calculate statistics after measurements are updated
484            let measurements = baseline.measurements.clone();
485            let _ = baseline; // Release the mutable borrow
486            let stats = self.calculate_statistics(&measurements)?;
487            // Re-acquire mutable borrow to update statistics
488            if let Some(baseline) = self.baselines.get_mut(&baseline_key) {
489                baseline.statistics = stats;
490            }
491        } else {
492            let baseline = PerformanceBaseline {
493                platform,
494                function_name: function_name.to_string(),
495                input_parameters: input_parameters.to_string(),
496                measurements: vec![measurement],
497                statistics: BaselineStatistics {
498                    mean_time_ns: execution_time_ns,
499                    std_dev_time_ns: 0.0,
500                    median_time_ns: execution_time_ns,
501                    p95_time_ns: execution_time_ns,
502                    p99_time_ns: execution_time_ns,
503                    coefficient_of_variation: 0.0,
504                    confidence_interval_95: (execution_time_ns, execution_time_ns),
505                    sample_count: 1,
506                },
507                last_updated: timestamp,
508            };
509
510            self.baselines.insert(baseline_key, baseline.clone());
511            self.savebaseline(&baseline)?;
512        }
513
514        Ok(())
515    }
516
517    /// Detect performance regressions for a specific function
518    pub fn detect_regression(
519        &self,
520        function_name: &str,
521        current_measurement: &PerformanceMeasurement,
522    ) -> StatsResult<RegressionAnalysisResult> {
523        let platform = PlatformInfo::current_platform();
524        let baseline_key = format!("{}-{}", platform.platform_id(), function_name);
525
526        let baseline = self.baselines.get(&baseline_key).ok_or_else(|| {
527            StatsError::InvalidInput(format!(
528                "No baseline found for function {} on platform {}",
529                function_name,
530                platform.platform_id()
531            ))
532        })?;
533
534        // Calculate performance change
535        let performance_change_percent = ((current_measurement.execution_time_ns
536            - baseline.statistics.mean_time_ns)
537            / baseline.statistics.mean_time_ns)
538            * 100.0;
539
540        // Perform statistical significance test
541        let statistical_significance = self.calculate_statistical_significance(
542            current_measurement.execution_time_ns,
543            &baseline.statistics,
544        )?;
545
546        // Determine if regression is detected
547        let regression_detected = performance_change_percent
548            > self.config.regression_threshold_percent
549            && statistical_significance < self.config.significance_level;
550
551        // Calculate confidence level
552        let confidence_level = 1.0 - statistical_significance;
553
554        // Generate trend analysis if enabled
555        let trend_analysis = if self.config.trend_analysis {
556            Some(self.analyze_trend(function_name)?)
557        } else {
558            None
559        };
560
561        // Generate recommendations
562        let recommendations = self.generate_recommendations(
563            function_name,
564            performance_change_percent,
565            &baseline.statistics,
566            current_measurement,
567        )?;
568
569        let platform_comparison = PlatformComparison {
570            current_platform: platform.clone(),
571            baseline_platform: baseline.platform.clone(),
572            hardware_normalization_factor: 1.0, // Would be calculated
573            platform_similarity_score: self
574                .calculate_platform_similarity(&platform, &baseline.platform),
575        };
576
577        Ok(RegressionAnalysisResult {
578            function_name: function_name.to_string(),
579            platform_comparison,
580            current_measurement: current_measurement.clone(),
581            baseline_performance: baseline.statistics.clone(),
582            regression_detected,
583            performance_change_percent,
584            statistical_significance,
585            confidence_level,
586            trend_analysis,
587            recommendations,
588        })
589    }
590
591    /// Calculate statistical significance using t-test
592    fn calculate_statistical_significance(
593        &self,
594        current_time: f64,
595        baseline_stats: &BaselineStatistics,
596    ) -> StatsResult<f64> {
597        if baseline_stats.sample_count < 2 {
598            return Ok(1.0); // Not enough data for significance test
599        }
600
601        // One-sample t-test
602        let t_statistic = (current_time - baseline_stats.mean_time_ns)
603            / (baseline_stats.std_dev_time_ns / (baseline_stats.sample_count as f64).sqrt());
604
605        // Simplified p-value calculation (would use proper t-distribution in real implementation)
606        let p_value = if t_statistic.abs() > 2.0 {
607            0.05 // Significant
608        } else if t_statistic.abs() > 1.5 {
609            0.1 // Marginally significant
610        } else {
611            0.5 // Not significant
612        };
613
614        Ok(p_value)
615    }
616
617    /// Calculate baseline statistics from measurements
618    fn calculate_statistics(
619        &self,
620        measurements: &[PerformanceMeasurement],
621    ) -> StatsResult<BaselineStatistics> {
622        if measurements.is_empty() {
623            return Err(StatsError::InvalidInput(
624                "No measurements provided".to_string(),
625            ));
626        }
627
628        let times: Vec<f64> = measurements.iter().map(|m| m.execution_time_ns).collect();
629
630        let _times_array = Array1::from_vec(times.clone());
631
632        // Calculate basic statistics
633        let mean = times.iter().sum::<f64>() / times.len() as f64;
634        let variance = times.iter().map(|&x| (x - mean).powi(2)).sum::<f64>()
635            / (times.len() - 1).max(1) as f64;
636        let std_dev = variance.sqrt();
637
638        // Calculate percentiles
639        let mut sorted_times = times.clone();
640        sorted_times.sort_by(|a, b| a.partial_cmp(b).expect("Operation failed"));
641
642        let median = if sorted_times.len().is_multiple_of(2) {
643            let mid = sorted_times.len() / 2;
644            (sorted_times[mid - 1] + sorted_times[mid]) / 2.0
645        } else {
646            sorted_times[sorted_times.len() / 2]
647        };
648
649        let p95_idx = ((sorted_times.len() as f64 * 0.95) as usize).min(sorted_times.len() - 1);
650        let p99_idx = ((sorted_times.len() as f64 * 0.99) as usize).min(sorted_times.len() - 1);
651        let p95 = sorted_times[p95_idx];
652        let p99 = sorted_times[p99_idx];
653
654        // Calculate coefficient of variation
655        let coefficient_of_variation = if mean != 0.0 { std_dev / mean } else { 0.0 };
656
657        // Calculate 95% confidence interval for mean
658        let standard_error = std_dev / (times.len() as f64).sqrt();
659        let margin_of_error = 1.96 * standard_error; // Assuming normal distribution
660        let confidence_interval_95 = (mean - margin_of_error, mean + margin_of_error);
661
662        Ok(BaselineStatistics {
663            mean_time_ns: mean,
664            std_dev_time_ns: std_dev,
665            median_time_ns: median,
666            p95_time_ns: p95,
667            p99_time_ns: p99,
668            coefficient_of_variation,
669            confidence_interval_95,
670            sample_count: times.len(),
671        })
672    }
673
674    /// Analyze performance trend over time
675    fn analyze_trend(&self, _functionname: &str) -> StatsResult<TrendAnalysis> {
676        // Get historical measurements for this function
677        let measurements: Vec<_> = self
678            .historicaldata
679            .values()
680            .flatten()
681            .filter(|_m| {
682                // Would match function _name from context in real implementation
683                true
684            })
685            .collect();
686
687        if measurements.len() < 5 {
688            return Ok(TrendAnalysis {
689                trend_direction: TrendDirection::Stable,
690                trend_strength: 0.0,
691                slope_ns_per_day: 0.0,
692                r_squared: 0.0,
693                predicted_performance_30d: 0.0,
694                trend_significance: 1.0,
695            });
696        }
697
698        // Simple linear regression on time vs performance
699        let timestamps: Vec<f64> = measurements.iter().map(|m| m.timestamp as f64).collect();
700        let times: Vec<f64> = measurements.iter().map(|m| m.execution_time_ns).collect();
701
702        let (slope, r_squared) = self.linear_regression(&timestamps, &times)?;
703
704        // Convert slope from per-second to per-day
705        let slope_ns_per_day = slope * 86400.0; // seconds per day
706
707        let trend_direction = if slope_ns_per_day > 100.0 {
708            TrendDirection::Degrading
709        } else if slope_ns_per_day < -100.0 {
710            TrendDirection::Improving
711        } else {
712            TrendDirection::Stable
713        };
714
715        let trend_strength = r_squared.abs();
716        let trend_significance = if r_squared > 0.5 { 0.01 } else { 0.5 };
717
718        // Predict performance in 30 days
719        let current_time = SystemTime::now()
720            .duration_since(UNIX_EPOCH)
721            .expect("Operation failed")
722            .as_secs() as f64;
723        let future_time = current_time + (30.0 * 86400.0);
724        let predicted_performance_30d =
725            slope * future_time + (times.iter().sum::<f64>() / times.len() as f64);
726
727        Ok(TrendAnalysis {
728            trend_direction,
729            trend_strength,
730            slope_ns_per_day,
731            r_squared,
732            predicted_performance_30d,
733            trend_significance,
734        })
735    }
736
737    /// Simple linear regression implementation
738    fn linear_regression(&self, x: &[f64], y: &[f64]) -> StatsResult<(f64, f64)> {
739        if x.len() != y.len() || x.len() < 2 {
740            return Err(StatsError::InvalidInput(
741                "Invalid data for regression".to_string(),
742            ));
743        }
744
745        let n = x.len() as f64;
746        let sum_x = x.iter().sum::<f64>();
747        let sum_y = y.iter().sum::<f64>();
748        let sum_xy = x.iter().zip(y.iter()).map(|(xi, yi)| xi * yi).sum::<f64>();
749        let sum_x2 = x.iter().map(|xi| xi * xi).sum::<f64>();
750        let _sum_y2 = y.iter().map(|yi| yi * yi).sum::<f64>();
751
752        let slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x * sum_x);
753
754        // Calculate R-squared
755        let mean_y = sum_y / n;
756        let ss_tot = y.iter().map(|yi| (yi - mean_y).powi(2)).sum::<f64>();
757        let intercept = (sum_y - slope * sum_x) / n;
758        let ss_res = x
759            .iter()
760            .zip(y.iter())
761            .map(|(xi, yi)| (yi - (slope * xi + intercept)).powi(2))
762            .sum::<f64>();
763
764        let r_squared = 1.0 - (ss_res / ss_tot);
765
766        Ok((slope, r_squared))
767    }
768
769    /// Calculate platform similarity score
770    fn calculate_platform_similarity(
771        &self,
772        platform1: &PlatformInfo,
773        platform2: &PlatformInfo,
774    ) -> f64 {
775        let mut score = 0.0;
776        let mut factors = 0.0;
777
778        // OS similarity
779        if platform1.os == platform2.os {
780            score += 0.3;
781        }
782        factors += 0.3;
783
784        // Architecture similarity
785        if platform1.arch == platform2.arch {
786            score += 0.2;
787        }
788        factors += 0.2;
789
790        // SIMD capabilities similarity
791        let common_simd: Vec<_> = platform1
792            .simd_capabilities
793            .iter()
794            .filter(|cap| platform2.simd_capabilities.contains(cap))
795            .collect();
796        let total_simd = platform1
797            .simd_capabilities
798            .len()
799            .max(platform2.simd_capabilities.len());
800        if total_simd > 0 {
801            score += 0.3 * (common_simd.len() as f64 / total_simd as f64);
802        }
803        factors += 0.3;
804
805        // Optimization level similarity
806        if platform1.optimization_level == platform2.optimization_level {
807            score += 0.2;
808        }
809        factors += 0.2;
810
811        if factors > 0.0 {
812            score / factors
813        } else {
814            0.0
815        }
816    }
817
818    /// Generate performance recommendations
819    fn generate_recommendations(
820        &self,
821        _function_name: &str,
822        performance_change_percent: f64,
823        baseline_stats: &BaselineStatistics,
824        _measurement: &PerformanceMeasurement,
825    ) -> StatsResult<Vec<PerformanceRecommendation>> {
826        let mut recommendations = Vec::new();
827
828        // Check for significant regression
829        if performance_change_percent > 20.0 {
830            recommendations.push(PerformanceRecommendation {
831                category: RecommendationCategory::AlgorithmSelection,
832                priority: RecommendationPriority::High,
833                description: format!(
834                    "Significant performance regression detected ({}% slower). Consider algorithm optimization.",
835                    performance_change_percent as i32
836                ),
837                expected_impact_percent: -performance_change_percent * 0.5,
838                confidence: 0.8,
839            });
840        }
841
842        // Check for high coefficient of variation (unstable performance)
843        if baseline_stats.coefficient_of_variation > 0.2 {
844            recommendations.push(PerformanceRecommendation {
845                category: RecommendationCategory::CompilerOptimization,
846                priority: RecommendationPriority::Medium,
847                description:
848                    "High performance variability detected. Consider compiler optimization flags."
849                        .to_string(),
850                expected_impact_percent: -10.0,
851                confidence: 0.6,
852            });
853        }
854
855        // Platform-specific recommendations
856        let platform = PlatformInfo::current_platform();
857        if platform.simd_capabilities.contains(&"avx512f".to_string()) {
858            recommendations.push(PerformanceRecommendation {
859                category: RecommendationCategory::SIMDOptimization,
860                priority: RecommendationPriority::Medium,
861                description:
862                    "AVX-512 capabilities detected. Consider using specialized SIMD optimizations."
863                        .to_string(),
864                expected_impact_percent: -25.0,
865                confidence: 0.7,
866            });
867        }
868
869        Ok(recommendations)
870    }
871
872    /// Capture current hardware context
873    fn capture_hardware_context(&self) -> StatsResult<HardwareContext> {
874        // Simplified hardware context capture
875        Ok(HardwareContext {
876            cpu_utilization: 50.0, // Would use system APIs
877            available_memory_percent: 75.0,
878            cpu_frequency_mhz: 3000.0,
879            temperature_celsius: None,
880        })
881    }
882
883    /// Capture current compiler context
884    fn capture_compiler_context(&self) -> StatsResult<CompilerContext> {
885        Ok(CompilerContext {
886            rustc_version: option_env!("RUSTC_VERSION")
887                .unwrap_or("unknown")
888                .to_string(),
889            target_triple: option_env!("TARGET")
890                .unwrap_or("unknown-target")
891                .to_string(),
892            optimization_flags: vec![], // Would capture actual flags
893            feature_flags: vec![],
894        })
895    }
896
897    /// Generate comprehensive regression report
898    pub fn generate_report(&self) -> StatsResult<RegressionReport> {
899        let mut function_analyses = Vec::new();
900
901        for function_name in &self.config.monitored_functions {
902            if let Some(latest_measurement) = self.get_latest_measurement(function_name) {
903                if let Ok(analysis) = self.detect_regression(function_name, &latest_measurement) {
904                    function_analyses.push(analysis);
905                }
906            }
907        }
908
909        let overall_status = if function_analyses.iter().any(|a| a.regression_detected) {
910            RegressionStatus::RegressionsDetected
911        } else {
912            RegressionStatus::NoRegressionsDetected
913        };
914
915        Ok(RegressionReport {
916            timestamp: SystemTime::now()
917                .duration_since(UNIX_EPOCH)
918                .expect("Operation failed")
919                .as_secs(),
920            overall_status,
921            platform: PlatformInfo::current_platform(),
922            function_analyses,
923            summary_statistics: self.calculate_summary_statistics()?,
924        })
925    }
926
927    /// Get the latest measurement for a function
928    fn get_latest_measurement(&self, _functionname: &str) -> Option<PerformanceMeasurement> {
929        // Simplified - would search through historical data
930        None
931    }
932
933    /// Calculate summary statistics across all functions
934    fn calculate_summary_statistics(&self) -> StatsResult<RegressionSummaryStatistics> {
935        Ok(RegressionSummaryStatistics {
936            total_functions_monitored: self.config.monitored_functions.len(),
937            functions_with_regressions: 0, // Would calculate
938            average_performance_change: 0.0,
939            max_performance_change: 0.0,
940            total_measurements: self.historicaldata.values().map(|v| v.len()).sum(),
941        })
942    }
943}
944
945/// Overall regression report
946#[derive(Debug, Clone, Serialize, Deserialize)]
947pub struct RegressionReport {
948    pub timestamp: u64,
949    pub overall_status: RegressionStatus,
950    pub platform: PlatformInfo,
951    pub function_analyses: Vec<RegressionAnalysisResult>,
952    pub summary_statistics: RegressionSummaryStatistics,
953}
954
955/// Regression detection status
956#[derive(Debug, Clone, Serialize, Deserialize)]
957pub enum RegressionStatus {
958    NoRegressionsDetected,
959    RegressionsDetected,
960    InsufficientData,
961}
962
963/// Summary statistics for regression report
964#[derive(Debug, Clone, Serialize, Deserialize)]
965pub struct RegressionSummaryStatistics {
966    pub total_functions_monitored: usize,
967    pub functions_with_regressions: usize,
968    pub average_performance_change: f64,
969    pub max_performance_change: f64,
970    pub total_measurements: usize,
971}
972
973/// Convenience function to create a regression detector with default configuration
974#[allow(dead_code)]
975pub fn create_regression_detector() -> StatsResult<CrossPlatformRegressionDetector> {
976    CrossPlatformRegressionDetector::new(CrossPlatformRegressionConfig::default())
977}
978
979/// Convenience function to create a regression detector with custom configuration
980#[allow(dead_code)]
981pub fn create_regression_detector_with_config(
982    config: CrossPlatformRegressionConfig,
983) -> StatsResult<CrossPlatformRegressionDetector> {
984    CrossPlatformRegressionDetector::new(config)
985}
986
987#[cfg(test)]
988mod tests {
989    use super::*;
990
991    #[test]
992    fn test_platform_info_creation() {
993        let platform = PlatformInfo::current_platform();
994        assert!(!platform.os.is_empty());
995        assert!(!platform.arch.is_empty());
996        assert!(!platform.platform_id().is_empty());
997    }
998
999    #[test]
1000    fn test_regression_detector_creation() {
1001        let detector = create_regression_detector();
1002        assert!(detector.is_ok());
1003    }
1004
1005    #[test]
1006    fn test_performance_measurement_recording() {
1007        let mut detector = create_regression_detector().expect("Operation failed");
1008        let result = detector.record_measurement(
1009            "test_function",
1010            "inputsize_100",
1011            1000.0, // 1 microsecond
1012            1024,   // 1KB
1013            100,    // 100 iterations
1014        );
1015        assert!(result.is_ok());
1016    }
1017
1018    #[test]
1019    fn testbaseline_statistics_calculation() {
1020        let detector = create_regression_detector().expect("Operation failed");
1021        let measurements = vec![
1022            PerformanceMeasurement {
1023                timestamp: 1000,
1024                execution_time_ns: 100.0,
1025                memory_usage_bytes: 1024,
1026                iterations: 10,
1027                hardware_context: HardwareContext {
1028                    cpu_utilization: 50.0,
1029                    available_memory_percent: 75.0,
1030                    cpu_frequency_mhz: 3000.0,
1031                    temperature_celsius: None,
1032                },
1033                compiler_context: CompilerContext {
1034                    rustc_version: "1.70.0".to_string(),
1035                    target_triple: "x86_64-unknown-linux-gnu".to_string(),
1036                    optimization_flags: vec![],
1037                    feature_flags: vec![],
1038                },
1039            },
1040            PerformanceMeasurement {
1041                timestamp: 1001,
1042                execution_time_ns: 110.0,
1043                memory_usage_bytes: 1024,
1044                iterations: 10,
1045                hardware_context: HardwareContext {
1046                    cpu_utilization: 50.0,
1047                    available_memory_percent: 75.0,
1048                    cpu_frequency_mhz: 3000.0,
1049                    temperature_celsius: None,
1050                },
1051                compiler_context: CompilerContext {
1052                    rustc_version: "1.70.0".to_string(),
1053                    target_triple: "x86_64-unknown-linux-gnu".to_string(),
1054                    optimization_flags: vec![],
1055                    feature_flags: vec![],
1056                },
1057            },
1058        ];
1059
1060        let stats = detector
1061            .calculate_statistics(&measurements)
1062            .expect("Operation failed");
1063        assert!((stats.mean_time_ns - 105.0).abs() < 1e-10);
1064        assert_eq!(stats.sample_count, 2);
1065    }
1066
1067    #[test]
1068    fn test_platform_similarity_calculation() {
1069        let detector = create_regression_detector().expect("Operation failed");
1070        let platform1 = PlatformInfo {
1071            os: "linux".to_string(),
1072            arch: "x86_64".to_string(),
1073            cpu_model: "Intel Core".to_string(),
1074            cpu_cores: 8,
1075            memory_gb: 16,
1076            rustc_version: "1.70.0".to_string(),
1077            optimization_level: "release".to_string(),
1078            simd_capabilities: vec!["avx2".to_string(), "fma".to_string()],
1079        };
1080        let platform2 = platform1.clone();
1081
1082        let similarity = detector.calculate_platform_similarity(&platform1, &platform2);
1083        assert!((similarity - 1.0).abs() < 1e-10); // Should be identical
1084    }
1085
1086    #[test]
1087    fn test_linear_regression() {
1088        let detector = create_regression_detector().expect("Operation failed");
1089        let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1090        let y = vec![2.0, 4.0, 6.0, 8.0, 10.0]; // Perfect linear relationship
1091
1092        let (slope, r_squared) = detector
1093            .linear_regression(&x, &y)
1094            .expect("Operation failed");
1095        assert!((slope - 2.0).abs() < 1e-10);
1096        assert!((r_squared - 1.0).abs() < 1e-10);
1097    }
1098}
scirs2_stats/cross_platform_regression_detection.rs

scirs2_stats/
cross_platform_regression_detection.rs