scirs2-stats 0.4.2

Statistical functions module for SciRS2 (scirs2-stats)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
//! Advanced Error Handling Enhancements
//!
//! Advanced error handling specifically designed for Advanced mode operations,
//! providing intelligent diagnostics, performance-aware suggestions, and
//! adaptive recovery strategies.

use crate::error::StatsError;
use std::time::{Duration, Instant};

/// Advanced-specific error context with performance tracking
#[derive(Debug, Clone)]
pub struct AdvancedErrorContext {
    pub operation_start: Instant,
    pub datasize: usize,
    pub memory_usage_mb: f64,
    pub simd_enabled: bool,
    pub parallel_enabled: bool,
    pub suggested_optimization: Option<OptimizationSuggestion>,
}

/// Performance-aware optimization suggestions
#[derive(Debug, Clone)]
pub enum OptimizationSuggestion {
    EnableSIMD {
        reason: String,
        expected_speedup: f64,
    },
    EnableParallel {
        reason: String,
        mindatasize: usize,
    },
    ReduceMemoryUsage {
        current_mb: f64,
        suggested_mb: f64,
        strategy: String,
    },
    ChunkProcessing {
        chunksize: usize,
        reason: String,
    },
    AlgorithmChange {
        current: String,
        suggested: String,
        reason: String,
    },
}

/// Enhanced error messages for Advanced mode
pub struct AdvancedErrorMessages;

impl AdvancedErrorMessages {
    /// Memory exhaustion with intelligent suggestions
    pub fn memory_exhaustion(required_mb: f64, available_mb: f64, datasize: usize) -> StatsError {
        let suggestion = if datasize > 10_000_000 {
            "Consider using chunked processing or streaming algorithms for large datasets."
        } else if required_mb > available_mb * 0.8 {
            "Use memory-efficient algorithms or increase available memory."
        } else {
            "Enable garbage collection or reduce concurrent operations."
        };

        StatsError::computation(format!(
            "Memory exhaustion: operation requires {:.1}MB but only {:.1}MB available. \
             Data size: {} elements. Suggestion: {}",
            required_mb, available_mb, datasize, suggestion
        ))
    }

    /// Performance degradation warnings
    pub fn performance_degradation(
        operation: &str,
        expected_duration: Duration,
        actual_duration: Duration,
        context: &AdvancedErrorContext,
    ) -> StatsError {
        let slowdown_factor = actual_duration.as_secs_f64() / expected_duration.as_secs_f64();

        let suggestion = match slowdown_factor {
            x if x > 10.0 => {
                if !context.simd_enabled && context.datasize > 1000 {
                    "Enable SIMD operations for significant performance improvement."
                } else if !context.parallel_enabled && context.datasize > 10_000 {
                    "Enable parallel processing for better performance on large datasets."
                } else {
                    "Check for memory pressure or system resource contention."
                }
            }
            x if x > 3.0 => "Consider optimizing data layout or using more efficient algorithms.",
            _ => "Performance is within acceptable range but could be optimized.",
        };

        StatsError::computation(format!(
            "Performance degradation in {}: expected {:.3}s, actual {:.3}s ({}x slower). \
             Data size: {} elements, SIMD: {}, Parallel: {}. Suggestion: {}",
            operation,
            expected_duration.as_secs_f64(),
            actual_duration.as_secs_f64(),
            slowdown_factor,
            context.datasize,
            context.simd_enabled,
            context.parallel_enabled,
            suggestion
        ))
    }

    /// Numerical precision warnings
    pub fn precision_warning(
        operation: &str,
        precision_loss: f64,
        data_characteristics: &str,
    ) -> StatsError {
        let suggestion = match precision_loss {
            x if x > 1e-6 => {
                "Use higher precision arithmetic or reorder operations to minimize error accumulation."
            }
            x if x > 1e-12 => {
                "Consider using numerically stable algorithms or regularization."
            }
            _ => "Precision _loss is minimal but monitor for accumulation in iterative algorithms."
        };

        StatsError::computation(format!(
            "Precision _loss detected in {}: estimated error {:.2e}. Data: {}. Suggestion: {}",
            operation, precision_loss, data_characteristics, suggestion
        ))
    }

    /// SIMD/parallel optimization recommendations
    pub fn optimization_opportunity(
        operation: &str,
        datasize: usize,
        current_performance: Duration,
        suggestion: OptimizationSuggestion,
    ) -> StatsError {
        let message = match suggestion {
            OptimizationSuggestion::EnableSIMD {
                reason,
                expected_speedup,
            } => {
                format!(
                    "SIMD optimization available for {}: {} Expected speedup: {:.1}x",
                    operation, reason, expected_speedup
                )
            }
            OptimizationSuggestion::EnableParallel {
                reason,
                mindatasize,
            } => {
                format!(
                    "Parallel processing recommended for {}: {} Minimum data size: {}",
                    operation, reason, mindatasize
                )
            }
            OptimizationSuggestion::ReduceMemoryUsage {
                current_mb,
                suggested_mb,
                strategy,
            } => {
                format!(
                    "Memory optimization for {}: reduce from {:.1}MB to {:.1}MB using {}",
                    operation, current_mb, suggested_mb, strategy
                )
            }
            OptimizationSuggestion::ChunkProcessing { chunksize, reason } => {
                format!(
                    "Chunked processing recommended for {}: use chunks of {} elements. {}",
                    operation, chunksize, reason
                )
            }
            OptimizationSuggestion::AlgorithmChange {
                current,
                suggested,
                reason,
            } => {
                format!(
                    "Algorithm optimization for {}: change from {} to {}. {}",
                    operation, current, suggested, reason
                )
            }
        };

        StatsError::computation(format!(
            "Optimization opportunity: {} Data , size: {} elements, Current time: {:.3}s",
            message,
            datasize,
            current_performance.as_secs_f64()
        ))
    }

    /// Resource contention warnings
    pub fn resource_contention(
        operation: &str,
        resource_type: &str,
        utilization: f64,
        impact: &str,
    ) -> StatsError {
        let suggestion = match resource_type {
            "cpu" if utilization > 0.9 => {
                "High CPU utilization detected. Consider reducing parallel thread count or scheduling operations."
            }
            "memory" if utilization > 0.8 => {
                "High memory utilization detected. Use streaming algorithms or reduce batch sizes."
            }
            "cache" => {
                "Cache pressure detected. Optimize data access patterns or reduce working set size."
            }
            _ => "Resource contention detected. Monitor system resources and adjust accordingly."
        };

        StatsError::computation(format!(
            "Resource contention in {}: {} utilization at {:.1}%. Impact: {}. Suggestion: {}",
            operation,
            resource_type,
            utilization * 100.0,
            impact,
            suggestion
        ))
    }
}

/// Adaptive error recovery system for Advanced mode
pub struct AdvancedErrorRecovery;

impl AdvancedErrorRecovery {
    /// Attempt automatic error recovery with performance optimization
    pub fn attempt_recovery(
        error: &StatsError,
        context: &AdvancedErrorContext,
        operation: &str,
    ) -> Option<RecoveryStrategy> {
        match error {
            StatsError::ComputationError(msg) if msg.contains("memory") => {
                Some(RecoveryStrategy::ReduceMemoryFootprint {
                    chunksize: context.datasize / 4,
                    streaming: true,
                })
            }
            StatsError::ComputationError(msg) if msg.contains("performance") => {
                if !context.simd_enabled && context.datasize > 1000 {
                    Some(RecoveryStrategy::EnableOptimizations {
                        simd: true,
                        parallel: context.datasize > 10_000,
                    })
                } else {
                    Some(RecoveryStrategy::AlgorithmFallback {
                        from: "optimized",
                        to: "stable",
                    })
                }
            }
            StatsError::ComputationError(msg) if msg.contains("precision") => {
                Some(RecoveryStrategy::IncreasePrecision {
                    use_f64: true,
                    use_stable_algorithms: true,
                })
            }
            _ => None,
        }
    }

    /// Generate context-aware recovery suggestions
    pub fn generate_suggestions(
        _error: &StatsError,
        context: &AdvancedErrorContext,
    ) -> Vec<String> {
        let mut suggestions = Vec::new();

        // Data size-based suggestions
        if context.datasize > 1_000_000 {
            suggestions.push("Consider using chunked processing for large datasets".to_string());
        }

        // Performance-based suggestions
        if !context.simd_enabled && context.datasize > 1000 {
            suggestions.push("Enable SIMD operations for better performance".to_string());
        }

        if !context.parallel_enabled && context.datasize > 10_000 {
            suggestions.push("Enable parallel processing for large datasets".to_string());
        }

        // Memory-based suggestions
        if context.memory_usage_mb > 1000.0 {
            suggestions.push("Reduce memory usage with streaming algorithms".to_string());
        }

        // General suggestions
        match _error {
            StatsError::ComputationError(_) => {
                suggestions.push("Check input data for edge cases or numerical issues".to_string());
            }
            StatsError::InvalidArgument(_) => {
                suggestions.push(
                    "Validate input parameters before calling statistical functions".to_string(),
                );
            }
            StatsError::DomainError(_) => {
                suggestions.push(
                    "Ensure input values are within valid domain for the operation".to_string(),
                );
            }
            _ => {}
        }

        suggestions
    }
}

/// Recovery strategies for different error scenarios
#[derive(Debug, Clone)]
pub enum RecoveryStrategy {
    ReduceMemoryFootprint {
        chunksize: usize,
        streaming: bool,
    },
    EnableOptimizations {
        simd: bool,
        parallel: bool,
    },
    AlgorithmFallback {
        from: &'static str,
        to: &'static str,
    },
    IncreasePrecision {
        use_f64: bool,
        use_stable_algorithms: bool,
    },
}

/// Context builder for Advanced error handling
pub struct AdvancedContextBuilder {
    datasize: usize,
    operation_start: Instant,
    memory_usage_mb: f64,
    simd_enabled: bool,
    parallel_enabled: bool,
}

impl AdvancedContextBuilder {
    pub fn new(datasize: usize) -> Self {
        Self {
            datasize,
            operation_start: Instant::now(),
            memory_usage_mb: 0.0,
            simd_enabled: false,
            parallel_enabled: false,
        }
    }

    pub fn memory_usage(mut self, mb: f64) -> Self {
        self.memory_usage_mb = mb;
        self
    }

    pub fn simd_enabled(mut self, enabled: bool) -> Self {
        self.simd_enabled = enabled;
        self
    }

    pub fn parallel_enabled(mut self, enabled: bool) -> Self {
        self.parallel_enabled = enabled;
        self
    }

    pub fn build(self) -> AdvancedErrorContext {
        AdvancedErrorContext {
            operation_start: self.operation_start,
            datasize: self.datasize,
            memory_usage_mb: self.memory_usage_mb,
            simd_enabled: self.simd_enabled,
            parallel_enabled: self.parallel_enabled,
            suggested_optimization: None,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_advanced_context_builder() {
        let context = AdvancedContextBuilder::new(10000)
            .memory_usage(256.0)
            .simd_enabled(true)
            .parallel_enabled(false)
            .build();

        assert_eq!(context.datasize, 10000);
        assert_eq!(context.memory_usage_mb, 256.0);
        assert!(context.simd_enabled);
        assert!(!context.parallel_enabled);
    }

    #[test]
    fn test_recovery_suggestions() {
        let context = AdvancedContextBuilder::new(50000)
            .memory_usage(500.0)
            .simd_enabled(false)
            .parallel_enabled(false)
            .build();

        let error = StatsError::computation("test error");
        let suggestions = AdvancedErrorRecovery::generate_suggestions(&error, &context);

        assert!(suggestions.contains(&"Enable SIMD operations for better performance".to_string()));
        assert!(suggestions.contains(&"Enable parallel processing for large datasets".to_string()));
    }
}