scirs2_stats/
memory_optimized_advanced.rs

1//! Advanced memory optimization for large-scale statistical computing
2//!
3//! This module provides memory-aware algorithms that automatically adapt
4//! to available memory constraints and optimize data layout for cache efficiency.
5
6use crate::error::{StatsError, StatsResult};
7use scirs2_core::ndarray::{Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Data, Ix2};
8use scirs2_core::numeric::{Float, NumCast, One, Zero};
9use scirs2_core::simd_ops::SimdUnifiedOps;
10use std::cmp::Ordering;
11use std::collections::VecDeque;
12use std::sync::{Arc, Mutex};
13
14/// Memory constraints configuration
15#[derive(Debug, Clone)]
16pub struct MemoryConstraints {
17    /// Maximum memory to use (in bytes)
18    pub max_memory_bytes: usize,
19    /// Preferred chunk size for processing
20    pub chunksize: usize,
21    /// Use memory mapping for large files
22    pub use_memory_mapping: bool,
23    /// Enable garbage collection hints
24    pub enable_gc_hints: bool,
25}
26
27impl Default for MemoryConstraints {
28    fn default() -> Self {
29        // Default to 1GB max memory, 64KB chunks
30        Self {
31            max_memory_bytes: 1_024 * 1_024 * 1_024,
32            chunksize: 64 * 1024,
33            use_memory_mapping: true,
34            enable_gc_hints: true,
35        }
36    }
37}
38
39/// Adaptive memory manager that monitors usage and adjusts strategies
40pub struct AdaptiveMemoryManager {
41    constraints: MemoryConstraints,
42    current_usage: Arc<Mutex<usize>>,
43    peak_usage: Arc<Mutex<usize>>,
44    operation_history: Arc<Mutex<VecDeque<OperationMetrics>>>,
45}
46
47#[derive(Debug, Clone)]
48pub struct OperationMetrics {
49    operation_type: String,
50    memory_used: usize,
51    processing_time: std::time::Duration,
52    chunksize_used: usize,
53}
54
55impl AdaptiveMemoryManager {
56    pub fn new(constraints: MemoryConstraints) -> Self {
57        Self {
58            constraints,
59            current_usage: Arc::new(Mutex::new(0)),
60            peak_usage: Arc::new(Mutex::new(0)),
61            operation_history: Arc::new(Mutex::new(VecDeque::with_capacity(100))),
62        }
63    }
64
65    /// Get optimal chunk size based on current memory usage and data size
66    pub fn get_optimal_chunksize(&self, datasize: usize, elementsize: usize) -> usize {
67        let current_usage = *self.current_usage.lock().expect("Operation failed");
68        let available_memory = self
69            .constraints
70            .max_memory_bytes
71            .saturating_sub(current_usage);
72
73        // Use at most 80% of available memory for chunk processing
74        let max_chunk_memory = available_memory * 4 / 5;
75        let max_chunk_elements = max_chunk_memory / elementsize;
76
77        // Prefer power-of-2 sizes for cache efficiency
78        let optimalsize = max_chunk_elements
79            .min(datasize)
80            .min(self.constraints.chunksize);
81        optimalsize.next_power_of_two() / 2 // Round down to nearest power of 2
82    }
83
84    /// Record memory usage for an operation
85    pub fn record_operation(&self, metrics: OperationMetrics) {
86        let mut history = self.operation_history.lock().expect("Operation failed");
87
88        // Keep only recent operations
89        if history.len() >= 100 {
90            history.pop_front();
91        }
92
93        history.push_back(metrics.clone());
94
95        // Update peak usage
96        let mut peak = self.peak_usage.lock().expect("Operation failed");
97        *peak = (*peak).max(metrics.memory_used);
98    }
99
100    /// Get memory usage statistics
101    pub fn get_statistics(&self) -> MemoryStatistics {
102        let current_usage = *self.current_usage.lock().expect("Operation failed");
103        let peak_usage = *self.peak_usage.lock().expect("Operation failed");
104        let history = self.operation_history.lock().expect("Operation failed");
105
106        let avg_memory_per_op = if !history.is_empty() {
107            history.iter().map(|m| m.memory_used).sum::<usize>() / history.len()
108        } else {
109            0
110        };
111
112        MemoryStatistics {
113            current_usage,
114            peak_usage,
115            avg_memory_per_operation: avg_memory_per_op,
116            operations_completed: history.len(),
117            memory_efficiency: if peak_usage > 0 {
118                (avg_memory_per_op as f64 / peak_usage as f64) * 100.0
119            } else {
120                100.0
121            },
122        }
123    }
124}
125
126#[derive(Debug, Clone)]
127pub struct MemoryStatistics {
128    pub current_usage: usize,
129    pub peak_usage: usize,
130    pub avg_memory_per_operation: usize,
131    pub operations_completed: usize,
132    pub memory_efficiency: f64,
133}
134
135/// Memory-aware correlation matrix computation
136///
137/// Computes correlation matrices using adaptive chunking based on available memory.
138/// For very large matrices, uses block-wise computation to stay within memory constraints.
139#[allow(dead_code)]
140pub fn corrcoef_memory_aware<F>(
141    data: &ArrayView2<F>,
142    method: &str,
143    manager: &mut AdaptiveMemoryManager,
144) -> StatsResult<Array2<F>>
145where
146    F: Float
147        + NumCast
148        + SimdUnifiedOps
149        + Zero
150        + One
151        + Copy
152        + Send
153        + Sync
154        + std::iter::Sum<F>
155        + std::fmt::Debug
156        + std::fmt::Display,
157{
158    let start_time = std::time::Instant::now();
159
160    checkarray_finite_2d(data, "data")?;
161
162    let (n_obs, n_vars) = data.dim();
163    let elementsize = std::mem::size_of::<F>();
164
165    // Estimate memory requirements
166    let matrix_memory = n_vars * n_vars * elementsize;
167    let temp_memory = n_obs * elementsize * 2; // For column pairs
168    let total_estimated = matrix_memory + temp_memory;
169
170    let mut corr_matrix = Array2::<F>::zeros((n_vars, n_vars));
171
172    // Set diagonal to 1.0
173    for i in 0..n_vars {
174        corr_matrix[[i, i]] = F::one();
175    }
176
177    if total_estimated <= manager.constraints.max_memory_bytes {
178        // Can fit in memory - use standard approach
179        corr_matrix = compute_correlation_matrix_standard(data, method)?;
180    } else {
181        // Use block-wise computation
182        let blocksize = manager.get_optimal_chunksize(n_vars, elementsize * n_vars);
183        corr_matrix = compute_correlation_matrix_blocked(data, method, blocksize)?;
184    }
185
186    // Record metrics
187    let metrics = OperationMetrics {
188        operation_type: format!("corrcoef_memory_aware_{}", method),
189        memory_used: total_estimated,
190        processing_time: start_time.elapsed(),
191        chunksize_used: n_vars,
192    };
193    manager.record_operation(metrics);
194
195    Ok(corr_matrix)
196}
197
198/// Cache-oblivious matrix multiplication for large correlation computations
199#[allow(dead_code)]
200pub fn cache_oblivious_matrix_mult<F>(
201    a: &ArrayView2<F>,
202    b: &ArrayView2<F>,
203    threshold: usize,
204) -> StatsResult<Array2<F>>
205where
206    F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Display,
207{
208    let (m, n) = a.dim();
209    let (n2, p) = b.dim();
210
211    if n != n2 {
212        return Err(StatsError::DimensionMismatch(
213            "Matrix dimensions don't match for multiplication".to_string(),
214        ));
215    }
216
217    let mut result = Array2::<F>::zeros((m, p));
218
219    if m <= threshold && n <= threshold && p <= threshold {
220        // Base case: use standard multiplication
221        for i in 0..m {
222            for j in 0..p {
223                let mut sum = F::zero();
224                for k in 0..n {
225                    sum = sum + a[[i, k]] * b[[k, j]];
226                }
227                result[[i, j]] = sum;
228            }
229        }
230    } else {
231        // Recursive case: divide matrices
232        let mid_m = m / 2;
233        let _mid_n = n / 2;
234        let _mid_p = p / 2;
235
236        // This is a simplified version - full implementation would handle
237        // all matrix subdivisions recursively
238        if m > threshold {
239            let a_top = a.slice(scirs2_core::ndarray::s![..mid_m, ..]);
240            let a_bottom = a.slice(scirs2_core::ndarray::s![mid_m.., ..]);
241
242            let result_top = cache_oblivious_matrix_mult(&a_top, b, threshold)?;
243            let result_bottom = cache_oblivious_matrix_mult(&a_bottom, b, threshold)?;
244
245            result
246                .slice_mut(scirs2_core::ndarray::s![..mid_m, ..])
247                .assign(&result_top);
248            result
249                .slice_mut(scirs2_core::ndarray::s![mid_m.., ..])
250                .assign(&result_bottom);
251        }
252    }
253
254    Ok(result)
255}
256
257/// Streaming covariance computation for large datasets
258#[allow(dead_code)]
259pub fn streaming_covariance_matrix<'a, F>(
260    data_chunks: impl Iterator<Item = ArrayView2<'a, F>>,
261    manager: &mut AdaptiveMemoryManager,
262) -> StatsResult<Array2<F>>
263where
264    F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
265{
266    let start_time = std::time::Instant::now();
267
268    let mut n_vars = 0;
269    let mut total_obs = 0;
270    let mut sum_x = Array1::<F>::zeros(0);
271    let mut sum_x2 = Array2::<F>::zeros((0, 0));
272    let mut initialized = false;
273
274    for chunk in data_chunks {
275        let (chunk_obs, chunk_vars) = chunk.dim();
276
277        if !initialized {
278            n_vars = chunk_vars;
279            sum_x = Array1::<F>::zeros(n_vars);
280            sum_x2 = Array2::<F>::zeros((n_vars, n_vars));
281            initialized = true;
282        } else if chunk_vars != n_vars {
283            return Err(StatsError::DimensionMismatch(
284                "All _chunks must have the same number of variables".to_string(),
285            ));
286        }
287
288        total_obs += chunk_obs;
289
290        // Update sums
291        for i in 0..chunk_obs {
292            let row = chunk.row(i);
293
294            // Update sum_x
295            for j in 0..n_vars {
296                sum_x[j] = sum_x[j] + row[j];
297            }
298
299            // Update sum_x2 (cross products)
300            for j in 0..n_vars {
301                for k in j..n_vars {
302                    let product = row[j] * row[k];
303                    sum_x2[[j, k]] = sum_x2[[j, k]] + product;
304                    if j != k {
305                        sum_x2[[k, j]] = sum_x2[[k, j]] + product;
306                    }
307                }
308            }
309        }
310    }
311
312    if total_obs == 0 {
313        return Err(StatsError::InvalidArgument("No data provided".to_string()));
314    }
315
316    // Compute covariance matrix
317    let mut cov_matrix = Array2::<F>::zeros((n_vars, n_vars));
318    let n_f = F::from(total_obs).expect("Failed to convert to float");
319
320    for i in 0..n_vars {
321        for j in 0..n_vars {
322            let mean_i = sum_x[i] / n_f;
323            let mean_j = sum_x[j] / n_f;
324            let cov = (sum_x2[[i, j]] / n_f) - (mean_i * mean_j);
325            cov_matrix[[i, j]] = cov;
326        }
327    }
328
329    // Record metrics
330    let memory_used = (n_vars * n_vars + n_vars) * std::mem::size_of::<F>();
331    let metrics = OperationMetrics {
332        operation_type: "streaming_covariance_matrix".to_string(),
333        memory_used,
334        processing_time: start_time.elapsed(),
335        chunksize_used: n_vars,
336    };
337    manager.record_operation(metrics);
338
339    Ok(cov_matrix)
340}
341
342/// Memory-efficient principal component analysis
343#[allow(dead_code)]
344pub fn pca_memory_efficient<F>(
345    data: &ArrayView2<F>,
346    n_components: Option<usize>,
347    manager: &mut AdaptiveMemoryManager,
348) -> StatsResult<PCAResult<F>>
349where
350    F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Debug + std::fmt::Display,
351{
352    let start_time = std::time::Instant::now();
353
354    let (n_obs, n_vars) = data.dim();
355    let n_components = n_components.unwrap_or(n_vars.min(n_obs));
356
357    // Center the data using streaming mean
358    let mut means = Array1::<F>::zeros(n_vars);
359    for i in 0..n_vars {
360        let column = data.column(i);
361        means[i] = column.iter().fold(F::zero(), |acc, &x| acc + x)
362            / F::from(n_obs).expect("Failed to convert to float");
363    }
364
365    // Estimate memory for centered data
366    let centereddata_memory = n_obs * n_vars * std::mem::size_of::<F>();
367
368    if centereddata_memory <= manager.constraints.max_memory_bytes / 2 {
369        // Can afford to store centered data
370        let mut centereddata = Array2::<F>::zeros((n_obs, n_vars));
371        for i in 0..n_obs {
372            for j in 0..n_vars {
373                centereddata[[i, j]] = data[[i, j]] - means[j];
374            }
375        }
376
377        // Compute covariance matrix
378        let cov_matrix = compute_covariance_from_centered(&centereddata.view())?;
379
380        // Eigendecomposition (simplified - would use proper linear algebra library)
381        let (eigenvectors, eigenvalues) =
382            compute_eigendecomposition(&cov_matrix.view(), n_components)?;
383
384        // Transform data
385        let transformed = matrix_multiply(&centereddata.view(), &eigenvectors.view())?;
386
387        let result = PCAResult {
388            components: eigenvectors,
389            explained_variance: eigenvalues,
390            transformeddata: transformed,
391            mean: means,
392        };
393
394        let metrics = OperationMetrics {
395            operation_type: "pca_memory_efficient".to_string(),
396            memory_used: centereddata_memory,
397            processing_time: start_time.elapsed(),
398            chunksize_used: n_vars,
399        };
400        manager.record_operation(metrics);
401
402        Ok(result)
403    } else {
404        // Use incremental PCA for very large datasets
405        incremental_pca(data, n_components, &means, manager)
406    }
407}
408
409#[derive(Debug, Clone)]
410pub struct PCAResult<F> {
411    pub components: Array2<F>,
412    pub explained_variance: Array1<F>,
413    pub transformeddata: Array2<F>,
414    pub mean: Array1<F>,
415}
416
417/// Streaming principal component analysis for very large datasets
418#[allow(dead_code)]
419pub fn streaming_pca_enhanced<'a, F>(
420    data_chunks: impl Iterator<Item = ArrayView2<'a, F>>,
421    n_components: usize,
422    manager: &mut AdaptiveMemoryManager,
423) -> StatsResult<PCAResult<F>>
424where
425    F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
426{
427    let start_time = std::time::Instant::now();
428
429    // First pass: compute mean and covariance incrementally
430    let mut n_samples_ = 0;
431    let mut n_features = 0;
432    let mut running_mean = Array1::<F>::zeros(0);
433    let mut running_cov = Array2::<F>::zeros((0, 0));
434    let mut initialized = false;
435
436    for chunk in data_chunks {
437        let (chunk_samples, chunk_features) = chunk.dim();
438
439        if !initialized {
440            n_features = chunk_features;
441            running_mean = Array1::<F>::zeros(n_features);
442            running_cov = Array2::<F>::zeros((n_features, n_features));
443            initialized = true;
444        } else if chunk_features != n_features {
445            return Err(StatsError::DimensionMismatch(
446                "All _chunks must have same number of features".to_string(),
447            ));
448        }
449
450        // Update running statistics using Welford's method
451        for i in 0..chunk_samples {
452            n_samples_ += 1;
453            let row = chunk.row(i);
454
455            // Update mean
456            let n_f = F::from(n_samples_).expect("Failed to convert to float");
457            for j in 0..n_features {
458                let delta = row[j] - running_mean[j];
459                running_mean[j] = running_mean[j] + delta / n_f;
460            }
461
462            // Update covariance (simplified incremental update)
463            if n_samples_ > 1 {
464                for j in 0..n_features {
465                    for k in j..n_features {
466                        let prod = (row[j] - running_mean[j]) * (row[k] - running_mean[k]);
467                        running_cov[[j, k]] = running_cov[[j, k]]
468                            * F::from(n_samples_ - 1).expect("Failed to convert to float")
469                            / n_f
470                            + prod / n_f;
471                        if j != k {
472                            running_cov[[k, j]] = running_cov[[j, k]];
473                        }
474                    }
475                }
476            }
477        }
478    }
479
480    if n_samples_ == 0 {
481        return Err(StatsError::InvalidArgument("No data provided".to_string()));
482    }
483
484    // Simplified eigendecomposition (would use proper SVD in production)
485    let (components, explained_variance) =
486        compute_eigendecomposition(&running_cov.view(), n_components)?;
487
488    // Record memory usage
489    let memory_used =
490        n_features * n_features * std::mem::size_of::<F>() + n_features * std::mem::size_of::<F>();
491    manager.record_operation(OperationMetrics {
492        operation_type: "streaming_pca_enhanced".to_string(),
493        memory_used,
494        processing_time: start_time.elapsed(),
495        chunksize_used: manager.constraints.chunksize,
496    });
497
498    Ok(PCAResult {
499        components,
500        explained_variance,
501        transformeddata: Array2::zeros((0, 0)), // Would project data in second pass
502        mean: running_mean,
503    })
504}
505
506/// Enhanced streaming histogram computation with adaptive binning
507#[allow(dead_code)]
508pub fn streaming_histogram_adaptive<'a, F>(
509    data_chunks: impl Iterator<Item = ArrayView1<'a, F>>,
510    manager: &mut AdaptiveMemoryManager,
511) -> StatsResult<(Array1<F>, Array1<usize>)>
512where
513    F: Float + NumCast + Zero + One + Copy + PartialOrd + 'a + std::fmt::Display,
514{
515    let start_time = std::time::Instant::now();
516
517    let mut min_val = F::infinity();
518    let mut max_val = F::neg_infinity();
519    let mut total_count = 0;
520    let mut values = Vec::new(); // For adaptive binning
521
522    // First pass: find range and collect sample for bin determination
523    for chunk in data_chunks {
524        for &value in chunk.iter() {
525            if value < min_val {
526                min_val = value;
527            }
528            if value > max_val {
529                max_val = value;
530            }
531            total_count += 1;
532
533            // Reservoir sampling to maintain memory bounds
534            if values.len() < manager.constraints.chunksize {
535                values.push(value);
536            } else {
537                let j = {
538                    use scirs2_core::random::Rng;
539                    let mut rng = scirs2_core::random::thread_rng();
540                    rng.random_range(0..total_count)
541                };
542                if j < values.len() {
543                    values[j] = value;
544                }
545            }
546        }
547    }
548
549    if total_count == 0 {
550        return Err(StatsError::InvalidArgument("No data provided".to_string()));
551    }
552
553    // Adaptive bin count using Freedman-Diaconis rule
554    values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
555    let q75_idx = (values.len() as f64 * 0.75) as usize;
556    let q25_idx = (values.len() as f64 * 0.25) as usize;
557    let iqr = values[q75_idx] - values[q25_idx];
558    let h = F::from(2.0).expect("Failed to convert constant to float") * iqr
559        / F::from(total_count as f64)
560            .expect("Operation failed")
561            .powf(F::from(1.0 / 3.0).expect("Failed to convert to float"));
562    let n_bins = if h > F::zero() {
563        ((max_val - min_val) / h)
564            .to_usize()
565            .unwrap_or(50)
566            .max(10)
567            .min(1000)
568    } else {
569        50 // Default
570    };
571
572    let bin_width = (max_val - min_val) / F::from(n_bins).expect("Failed to convert to float");
573    let mut bin_edges = Array1::<F>::zeros(n_bins + 1);
574    let mut counts = Array1::<usize>::zeros(n_bins);
575
576    // Set bin edges
577    for i in 0..=n_bins {
578        bin_edges[i] = min_val + F::from(i).expect("Failed to convert to float") * bin_width;
579    }
580
581    // Second pass would count values into bins (simplified here)
582    // In practice, you'd iterate through _chunks again
583    for &value in values.iter() {
584        if value >= min_val && value <= max_val {
585            let bin_idx = ((value - min_val) / bin_width)
586                .to_usize()
587                .unwrap_or(0)
588                .min(n_bins - 1);
589            counts[bin_idx] += 1;
590        }
591    }
592
593    let memory_used = n_bins * (std::mem::size_of::<F>() + std::mem::size_of::<usize>());
594    manager.record_operation(OperationMetrics {
595        operation_type: "streaming_histogram_adaptive".to_string(),
596        memory_used,
597        processing_time: start_time.elapsed(),
598        chunksize_used: manager.constraints.chunksize,
599    });
600
601    Ok((bin_edges, counts))
602}
603
604/// Memory-efficient streaming quantile computation using P² algorithm
605#[allow(dead_code)]
606pub fn streaming_quantiles_p2<'a, F>(
607    data_chunks: impl Iterator<Item = ArrayView1<'a, F>>,
608    quantiles: &[f64],
609    manager: &mut AdaptiveMemoryManager,
610) -> StatsResult<Array1<F>>
611where
612    F: Float + NumCast + Zero + One + Copy + PartialOrd + 'a + std::fmt::Display,
613{
614    let start_time = std::time::Instant::now();
615
616    let mut p2_estimators: Vec<P2Estimator<F>> =
617        quantiles.iter().map(|&q| P2Estimator::new(q)).collect();
618
619    let mut total_count = 0;
620
621    for chunk in data_chunks {
622        for &value in chunk.iter() {
623            total_count += 1;
624            for estimator in &mut p2_estimators {
625                estimator.update(value);
626            }
627        }
628    }
629
630    if total_count == 0 {
631        return Err(StatsError::InvalidArgument("No data provided".to_string()));
632    }
633
634    let results: Vec<F> = p2_estimators.iter().map(|est| est.quantile()).collect();
635
636    let memory_used = quantiles.len() * std::mem::size_of::<P2Estimator<F>>();
637    manager.record_operation(OperationMetrics {
638        operation_type: "streaming_quantiles_p2".to_string(),
639        memory_used,
640        processing_time: start_time.elapsed(),
641        chunksize_used: manager.constraints.chunksize,
642    });
643
644    Ok(Array1::from_vec(results))
645}
646
647/// P² algorithm estimator for streaming quantiles
648#[derive(Debug, Clone)]
649struct P2Estimator<F> {
650    quantile: f64,
651    markers: [F; 5],
652    positions: [f64; 5],
653    desired_positions: [f64; 5],
654    increment: [f64; 5],
655    count: usize,
656}
657
658impl<F> P2Estimator<F>
659where
660    F: Float + NumCast + Copy + PartialOrd + std::fmt::Display,
661{
662    fn new(quantile: f64) -> Self {
663        let mut estimator = Self {
664            quantile,
665            markers: [F::zero(); 5],
666            positions: [1.0, 2.0, 3.0, 4.0, 5.0],
667            desired_positions: [
668                1.0,
669                1.0 + 2.0 * quantile,
670                1.0 + 4.0 * quantile,
671                3.0 + 2.0 * quantile,
672                5.0,
673            ],
674            increment: [0.0, quantile / 2.0, quantile, (1.0 + quantile) / 2.0, 1.0],
675            count: 0,
676        };
677
678        // Initialize desired positions
679        estimator.desired_positions[1] = 1.0 + 2.0 * quantile;
680        estimator.desired_positions[2] = 1.0 + 4.0 * quantile;
681        estimator.desired_positions[3] = 3.0 + 2.0 * quantile;
682
683        estimator
684    }
685
686    fn update(&mut self, value: F) {
687        self.count += 1;
688
689        if self.count <= 5 {
690            // Initialize first 5 values
691            self.markers[self.count - 1] = value;
692            if self.count == 5 {
693                self.markers
694                    .sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
695            }
696            return;
697        }
698
699        // Find cell k such that markers[k] <= value < markers[k+1]
700        let mut k = 0;
701        for i in 0..4 {
702            if value >= self.markers[i] {
703                k = i + 1;
704            }
705        }
706
707        if k == 0 {
708            self.markers[0] = value;
709            k = 1;
710        } else if k == 5 {
711            self.markers[4] = value;
712            k = 4;
713        }
714
715        // Increment positions k through 4
716        for i in k..5 {
717            self.positions[i] += 1.0;
718        }
719
720        // Update desired positions
721        for i in 0..5 {
722            self.desired_positions[i] += self.increment[i];
723        }
724
725        // Adjust heights if necessary
726        for i in 1..4 {
727            let d = self.desired_positions[i] - self.positions[i];
728            if (d >= 1.0 && self.positions[i + 1] - self.positions[i] > 1.0)
729                || (d <= -1.0 && self.positions[i - 1] - self.positions[i] < -1.0)
730            {
731                let d_sign = if d >= 0.0 { 1.0 } else { -1.0 };
732                let new_height = self.parabolic_prediction(i, d_sign);
733
734                if self.markers[i - 1] < new_height && new_height < self.markers[i + 1] {
735                    self.markers[i] = new_height;
736                } else {
737                    self.markers[i] = self.linear_prediction(i, d_sign);
738                }
739                self.positions[i] += d_sign;
740            }
741        }
742    }
743
744    fn parabolic_prediction(&self, i: usize, d: f64) -> F {
745        let qi = self.markers[i];
746        let qi_prev = self.markers[i - 1];
747        let qi_next = self.markers[i + 1];
748        let ni = self.positions[i];
749        let ni_prev = self.positions[i - 1];
750        let ni_next = self.positions[i + 1];
751
752        let d_f = F::from(d).expect("Failed to convert to float");
753        let ni_f = F::from(ni).expect("Failed to convert to float");
754        let ni_prev_f = F::from(ni_prev).expect("Failed to convert to float");
755        let ni_next_f = F::from(ni_next).expect("Failed to convert to float");
756
757        let a = d_f / (ni_next_f - ni_prev_f);
758        let b1 = (ni_f - ni_prev_f + d_f) * (qi_next - qi) / (ni_next_f - ni_f);
759        let b2 = (ni_next_f - ni_f - d_f) * (qi - qi_prev) / (ni_f - ni_prev_f);
760
761        qi + a * (b1 + b2)
762    }
763
764    fn linear_prediction(&self, i: usize, d: f64) -> F {
765        if d > 0.0 {
766            let qi_next = self.markers[i + 1];
767            let qi = self.markers[i];
768            let ni_next = self.positions[i + 1];
769            let ni = self.positions[i];
770            qi + (qi_next - qi) * F::from(d / (ni_next - ni)).expect("Operation failed")
771        } else {
772            let qi = self.markers[i];
773            let qi_prev = self.markers[i - 1];
774            let ni = self.positions[i];
775            let ni_prev = self.positions[i - 1];
776            qi + (qi_prev - qi) * F::from(-d / (ni - ni_prev)).expect("Operation failed")
777        }
778    }
779
780    fn quantile(&self) -> F {
781        if self.count < 5 {
782            // For small datasets, use simple sorting
783            let mut sorted = self.markers[..self.count.min(5)].to_vec();
784            sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
785            let idx = (self.quantile * (sorted.len() - 1) as f64) as usize;
786            sorted[idx]
787        } else {
788            self.markers[2] // The middle marker approximates the desired quantile
789        }
790    }
791}
792
793/// Enhanced streaming regression for large datasets with regularization
794#[allow(dead_code)]
795pub fn streaming_regression_enhanced<'a, F>(
796    data_chunks: impl Iterator<Item = (ArrayView2<'a, F>, ArrayView1<'a, F>)>,
797    regularization: F,
798    manager: &mut AdaptiveMemoryManager,
799) -> StatsResult<Array1<F>>
800where
801    F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
802{
803    let start_time = std::time::Instant::now();
804
805    let mut xtx = Array2::<F>::zeros((0, 0));
806    let mut xty = Array1::<F>::zeros(0);
807    let mut n_samples_ = 0;
808    let mut n_features = 0;
809    let mut initialized = false;
810
811    // Accumulate X'X and X'y incrementally
812    for (x_chunk, y_chunk) in data_chunks {
813        let (chunk_samples, chunk_features) = x_chunk.dim();
814
815        if !initialized {
816            n_features = chunk_features;
817            xtx = Array2::<F>::zeros((n_features, n_features));
818            xty = Array1::<F>::zeros(n_features);
819            initialized = true;
820        } else if chunk_features != n_features {
821            return Err(StatsError::DimensionMismatch(
822                "All _chunks must have same number of features".to_string(),
823            ));
824        }
825
826        if y_chunk.len() != chunk_samples {
827            return Err(StatsError::DimensionMismatch(
828                "X and y must have same number of samples".to_string(),
829            ));
830        }
831
832        n_samples_ += chunk_samples;
833
834        // Update X'X
835        for i in 0..n_features {
836            for j in i..n_features {
837                let mut sum = F::zero();
838                for k in 0..chunk_samples {
839                    sum = sum + x_chunk[[k, i]] * x_chunk[[k, j]];
840                }
841                xtx[[i, j]] = xtx[[i, j]] + sum;
842                if i != j {
843                    xtx[[j, i]] = xtx[[i, j]];
844                }
845            }
846        }
847
848        // Update X'y
849        for i in 0..n_features {
850            let mut sum = F::zero();
851            for k in 0..chunk_samples {
852                sum = sum + x_chunk[[k, i]] * y_chunk[k];
853            }
854            xty[i] = xty[i] + sum;
855        }
856    }
857
858    if n_samples_ == 0 {
859        return Err(StatsError::InvalidArgument("No data provided".to_string()));
860    }
861
862    // Add regularization to diagonal
863    for i in 0..n_features {
864        xtx[[i, i]] = xtx[[i, i]] + regularization;
865    }
866
867    // Solve (X'X + λI)β = X'y using simplified method
868    // In practice, would use proper matrix decomposition
869    let coefficients = solve_linear_system(&xtx.view(), &xty.view())?;
870
871    let memory_used =
872        n_features * n_features * std::mem::size_of::<F>() + n_features * std::mem::size_of::<F>();
873    manager.record_operation(OperationMetrics {
874        operation_type: "streaming_regression_enhanced".to_string(),
875        memory_used,
876        processing_time: start_time.elapsed(),
877        chunksize_used: manager.constraints.chunksize,
878    });
879
880    Ok(coefficients)
881}
882
883/// Simple linear system solver (would use proper LU decomposition in production)
884#[allow(dead_code)]
885fn solve_linear_system<F>(a: &ArrayView2<F>, b: &ArrayView1<F>) -> StatsResult<Array1<F>>
886where
887    F: Float + NumCast + Zero + One + Copy + std::fmt::Display,
888{
889    let n = a.nrows();
890    if a.ncols() != n || b.len() != n {
891        return Err(StatsError::DimensionMismatch(
892            "Matrix dimensions incompatible".to_string(),
893        ));
894    }
895
896    // Simplified Gaussian elimination (not numerically stable for production)
897    let mut aug = Array2::<F>::zeros((n, n + 1));
898
899    // Copy A and b into augmented matrix
900    for i in 0..n {
901        for j in 0..n {
902            aug[[i, j]] = a[[i, j]];
903        }
904        aug[[i, n]] = b[i];
905    }
906
907    // Forward elimination
908    for i in 0..n {
909        // Find pivot
910        let mut max_row = i;
911        for k in (i + 1)..n {
912            if aug[[k, i]].abs() > aug[[max_row, i]].abs() {
913                max_row = k;
914            }
915        }
916
917        // Swap rows
918        if max_row != i {
919            for j in 0..=n {
920                let temp = aug[[i, j]];
921                aug[[i, j]] = aug[[max_row, j]];
922                aug[[max_row, j]] = temp;
923            }
924        }
925
926        // Make diagonal 1
927        let pivot = aug[[i, i]];
928        if pivot.abs() < F::from(1e-12).expect("Failed to convert constant to float") {
929            return Err(StatsError::ComputationError(
930                "Matrix is singular".to_string(),
931            ));
932        }
933
934        for j in i..=n {
935            aug[[i, j]] = aug[[i, j]] / pivot;
936        }
937
938        // Eliminate column
939        for k in 0..n {
940            if k != i {
941                let factor = aug[[k, i]];
942                for j in i..=n {
943                    aug[[k, j]] = aug[[k, j]] - factor * aug[[i, j]];
944                }
945            }
946        }
947    }
948
949    // Extract solution
950    let mut solution = Array1::<F>::zeros(n);
951    for i in 0..n {
952        solution[i] = aug[[i, n]];
953    }
954
955    Ok(solution)
956}
957
958// Helper functions (simplified implementations)
959
960#[allow(dead_code)]
961fn compute_correlation_matrix_standard<F>(
962    data: &ArrayView2<F>,
963    method: &str,
964) -> StatsResult<Array2<F>>
965where
966    F: Float
967        + NumCast
968        + Zero
969        + One
970        + Copy
971        + std::iter::Sum<F>
972        + std::fmt::Debug
973        + std::fmt::Display
974        + Send
975        + Sync
976        + scirs2_core::simd_ops::SimdUnifiedOps,
977{
978    // Use existing corrcoef implementation
979    crate::corrcoef(data, method)
980}
981
982#[allow(dead_code)]
983fn compute_correlation_matrix_blocked<F>(
984    data: &ArrayView2<F>,
985    method: &str,
986    blocksize: usize,
987) -> StatsResult<Array2<F>>
988where
989    F: Float
990        + NumCast
991        + Zero
992        + One
993        + Copy
994        + std::iter::Sum<F>
995        + std::fmt::Debug
996        + std::fmt::Display
997        + scirs2_core::simd_ops::SimdUnifiedOps,
998{
999    let (_, n_vars) = data.dim();
1000    let mut corr_matrix = Array2::<F>::zeros((n_vars, n_vars));
1001
1002    // Set diagonal
1003    for i in 0..n_vars {
1004        corr_matrix[[i, i]] = F::one();
1005    }
1006
1007    // Process in blocks
1008    for i_block in (0..n_vars).step_by(blocksize) {
1009        let i_end = (i_block + blocksize).min(n_vars);
1010
1011        for j_block in (i_block..n_vars).step_by(blocksize) {
1012            let j_end = (j_block + blocksize).min(n_vars);
1013
1014            // Compute correlations for this block
1015            for i in i_block..i_end {
1016                for j in j_block.max(i + 1)..j_end {
1017                    let col_i = data.column(i);
1018                    let col_j = data.column(j);
1019
1020                    let corr = match method {
1021                        "pearson" => crate::pearson_r(&col_i, &col_j)?,
1022                        "spearman" => crate::spearman_r(&col_i, &col_j)?,
1023                        "kendall" => crate::kendall_tau(&col_i, &col_j, "b")?,
1024                        _ => {
1025                            return Err(StatsError::InvalidArgument(format!(
1026                                "Unknown method: {}",
1027                                method
1028                            )))
1029                        }
1030                    };
1031
1032                    corr_matrix[[i, j]] = corr;
1033                    corr_matrix[[j, i]] = corr;
1034                }
1035            }
1036        }
1037    }
1038
1039    Ok(corr_matrix)
1040}
1041
1042#[allow(dead_code)]
1043fn compute_covariance_from_centered<F>(data: &ArrayView2<F>) -> StatsResult<Array2<F>>
1044where
1045    F: Float + NumCast + Zero + Copy + std::fmt::Display,
1046{
1047    let (n_obs, n_vars) = data.dim();
1048    let mut cov_matrix = Array2::<F>::zeros((n_vars, n_vars));
1049    let n_f = F::from(n_obs - 1).expect("Failed to convert to float"); // Sample covariance
1050
1051    for i in 0..n_vars {
1052        for j in i..n_vars {
1053            let mut cov = F::zero();
1054            for k in 0..n_obs {
1055                cov = cov + data[[k, i]] * data[[k, j]];
1056            }
1057            cov = cov / n_f;
1058            cov_matrix[[i, j]] = cov;
1059            cov_matrix[[j, i]] = cov;
1060        }
1061    }
1062
1063    Ok(cov_matrix)
1064}
1065
1066#[allow(dead_code)]
1067fn compute_eigendecomposition<F>(
1068    matrix: &ArrayView2<F>,
1069    n_components: usize,
1070) -> StatsResult<(Array2<F>, Array1<F>)>
1071where
1072    F: Float + NumCast + Zero + One + Copy + std::fmt::Display,
1073{
1074    let n = matrix.dim().0;
1075    let n_components = n_components.min(n);
1076
1077    // Power iteration method for top eigenvalues/eigenvectors
1078    // This is a simplified implementation - in practice would use LAPACK
1079    let mut eigenvalues = Array1::<F>::zeros(n_components);
1080    let mut eigenvectors = Array2::<F>::zeros((n, n_components));
1081
1082    for k in 0..n_components {
1083        // Initialize random vector
1084        let mut v = Array1::<F>::ones(n);
1085
1086        // Power iteration
1087        for _ in 0..100 {
1088            // Max iterations
1089            let mut new_v = Array1::<F>::zeros(n);
1090
1091            // Matrix-vector multiplication
1092            for i in 0..n {
1093                let mut sum = F::zero();
1094                for j in 0..n {
1095                    sum = sum + matrix[[i, j]] * v[j];
1096                }
1097                new_v[i] = sum;
1098            }
1099
1100            // Orthogonalize against previous eigenvectors
1101            for prev_k in 0..k {
1102                let mut dot_product = F::zero();
1103                for i in 0..n {
1104                    dot_product = dot_product + new_v[i] * eigenvectors[[i, prev_k]];
1105                }
1106
1107                for i in 0..n {
1108                    new_v[i] = new_v[i] - dot_product * eigenvectors[[i, prev_k]];
1109                }
1110            }
1111
1112            // Normalize
1113            let mut norm = F::zero();
1114            for i in 0..n {
1115                norm = norm + new_v[i] * new_v[i];
1116            }
1117            norm = norm.sqrt();
1118
1119            if norm > F::epsilon() {
1120                for i in 0..n {
1121                    new_v[i] = new_v[i] / norm;
1122                }
1123            }
1124
1125            // Check convergence
1126            let mut converged = true;
1127            for i in 0..n {
1128                if (new_v[i] - v[i]).abs()
1129                    > F::from(1e-6).expect("Failed to convert constant to float")
1130                {
1131                    converged = false;
1132                    break;
1133                }
1134            }
1135
1136            v = new_v;
1137
1138            if converged {
1139                break;
1140            }
1141        }
1142
1143        // Compute eigenvalue
1144        let mut eigenvalue = F::zero();
1145        for i in 0..n {
1146            let mut sum = F::zero();
1147            for j in 0..n {
1148                sum = sum + matrix[[i, j]] * v[j];
1149            }
1150            eigenvalue = eigenvalue + v[i] * sum;
1151        }
1152
1153        eigenvalues[k] = eigenvalue;
1154        for i in 0..n {
1155            eigenvectors[[i, k]] = v[i];
1156        }
1157    }
1158
1159    Ok((eigenvectors, eigenvalues))
1160}
1161
1162#[allow(dead_code)]
1163fn matrix_multiply<F>(a: &ArrayView2<F>, b: &ArrayView2<F>) -> StatsResult<Array2<F>>
1164where
1165    F: Float + NumCast + Zero + Copy + std::fmt::Display,
1166{
1167    let (m, n) = a.dim();
1168    let (n2, p) = b.dim();
1169
1170    if n != n2 {
1171        return Err(StatsError::DimensionMismatch(
1172            "Matrix dimensions don't match".to_string(),
1173        ));
1174    }
1175
1176    let mut result = Array2::<F>::zeros((m, p));
1177
1178    for i in 0..m {
1179        for j in 0..p {
1180            let mut sum = F::zero();
1181            for k in 0..n {
1182                sum = sum + a[[i, k]] * b[[k, j]];
1183            }
1184            result[[i, j]] = sum;
1185        }
1186    }
1187
1188    Ok(result)
1189}
1190
1191#[allow(dead_code)]
1192fn incremental_pca<F>(
1193    data: &ArrayView2<F>,
1194    n_components: usize,
1195    means: &Array1<F>,
1196    manager: &mut AdaptiveMemoryManager,
1197) -> StatsResult<PCAResult<F>>
1198where
1199    F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Debug + std::fmt::Display,
1200{
1201    let (n_obs, n_vars) = data.dim();
1202    let n_components = n_components.min(n_vars);
1203
1204    // Batch size for incremental processing
1205    let batchsize = manager.get_optimal_chunksize(n_obs, std::mem::size_of::<F>() * n_vars);
1206
1207    // Initialize _components with random orthogonal matrix
1208    let mut components = Array2::<F>::zeros((n_vars, n_components));
1209    for i in 0..n_components {
1210        components[[i % n_vars, i]] = F::one();
1211    }
1212
1213    let mut explained_variance = Array1::<F>::zeros(n_components);
1214    let mut _n_samples_seen = 0;
1215
1216    // Process data in batches
1217    for batch_start in (0..n_obs).step_by(batchsize) {
1218        let batch_end = (batch_start + batchsize).min(n_obs);
1219        let batch = data.slice(scirs2_core::ndarray::s![batch_start..batch_end, ..]);
1220
1221        // Center the batch
1222        let mut centered_batch = Array2::<F>::zeros(batch.dim());
1223        for i in 0..batch.nrows() {
1224            for j in 0..batch.ncols() {
1225                centered_batch[[i, j]] = batch[[i, j]] - means[j];
1226            }
1227        }
1228
1229        // Update _components using simplified incremental update
1230        for k in 0..n_components {
1231            let component = components.column(k).to_owned();
1232
1233            // Project batch onto current component
1234            let mut projections = Array1::<F>::zeros(batch.nrows());
1235            for i in 0..batch.nrows() {
1236                let mut projection = F::zero();
1237                for j in 0..n_vars {
1238                    projection = projection + centered_batch[[i, j]] * component[j];
1239                }
1240                projections[i] = projection;
1241            }
1242
1243            // Update component direction
1244            let mut new_component = Array1::<F>::zeros(n_vars);
1245            for j in 0..n_vars {
1246                let mut sum = F::zero();
1247                for i in 0..batch.nrows() {
1248                    sum = sum + centered_batch[[i, j]] * projections[i];
1249                }
1250                new_component[j] = sum;
1251            }
1252
1253            // Normalize
1254            let mut norm = F::zero();
1255            for j in 0..n_vars {
1256                norm = norm + new_component[j] * new_component[j];
1257            }
1258            norm = norm.sqrt();
1259
1260            if norm > F::epsilon() {
1261                for j in 0..n_vars {
1262                    components[[j, k]] = new_component[j] / norm;
1263                }
1264
1265                // Update explained variance
1266                let variance = projections
1267                    .iter()
1268                    .map(|&x| x * x)
1269                    .fold(F::zero(), |acc, x| acc + x);
1270                explained_variance[k] =
1271                    variance / F::from(batch.nrows()).expect("Operation failed");
1272            }
1273        }
1274
1275        _n_samples_seen += batch.nrows();
1276    }
1277
1278    // Transform the data
1279    let mut transformeddata = Array2::<F>::zeros((n_obs, n_components));
1280    for i in 0..n_obs {
1281        for k in 0..n_components {
1282            let mut projection = F::zero();
1283            for j in 0..n_vars {
1284                let centered_val = data[[i, j]] - means[j];
1285                projection = projection + centered_val * components[[j, k]];
1286            }
1287            transformeddata[[i, k]] = projection;
1288        }
1289    }
1290
1291    Ok(PCAResult {
1292        components,
1293        explained_variance,
1294        transformeddata,
1295        mean: means.clone(),
1296    })
1297}
1298
1299#[allow(dead_code)]
1300fn checkarray_finite_2d<F, D>(arr: &ArrayBase<D, Ix2>, name: &str) -> StatsResult<()>
1301where
1302    F: Float,
1303    D: Data<Elem = F>,
1304{
1305    for &val in arr.iter() {
1306        if !val.is_finite() {
1307            return Err(StatsError::InvalidArgument(format!(
1308                "{} contains non-finite values",
1309                name
1310            )));
1311        }
1312    }
1313    Ok(())
1314}
1315
1316#[cfg(test)]
1317mod tests {
1318    use super::*;
1319    use scirs2_core::ndarray::array;
1320
1321    #[test]
1322    fn test_adaptive_memory_manager() {
1323        let constraints = MemoryConstraints::default();
1324        let manager = AdaptiveMemoryManager::new(constraints);
1325
1326        let chunksize = manager.get_optimal_chunksize(1000, 8);
1327        assert!(chunksize > 0);
1328        assert!(chunksize <= 1000);
1329
1330        let metrics = OperationMetrics {
1331            operation_type: "test".to_string(),
1332            memory_used: 1024,
1333            processing_time: std::time::Duration::from_millis(10),
1334            chunksize_used: chunksize,
1335        };
1336
1337        manager.record_operation(metrics);
1338        let stats = manager.get_statistics();
1339        assert_eq!(stats.operations_completed, 1);
1340    }
1341
1342    #[test]
1343    fn test_cache_oblivious_matrix_mult() {
1344        let a = array![[1.0, 2.0], [3.0, 4.0]];
1345        let b = array![[5.0, 6.0], [7.0, 8.0]];
1346
1347        let result =
1348            cache_oblivious_matrix_mult(&a.view(), &b.view(), 2).expect("Operation failed");
1349
1350        // Expected: [[19, 22], [43, 50]]
1351        assert!((result[[0, 0]] - 19.0).abs() < 1e-10);
1352        assert!((result[[0, 1]] - 22.0).abs() < 1e-10);
1353        assert!((result[[1, 0]] - 43.0).abs() < 1e-10);
1354        assert!((result[[1, 1]] - 50.0).abs() < 1e-10);
1355    }
1356}