scirs2_stats/
memory_optimized_advanced.rs

1//! Advanced memory optimization for large-scale statistical computing
2//!
3//! This module provides memory-aware algorithms that automatically adapt
4//! to available memory constraints and optimize data layout for cache efficiency.
5
6use crate::error::{StatsError, StatsResult};
7use scirs2_core::ndarray::{Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Data, Ix2};
8use scirs2_core::numeric::{Float, NumCast, One, Zero};
9use scirs2_core::simd_ops::SimdUnifiedOps;
10use std::cmp::Ordering;
11use std::collections::VecDeque;
12use std::sync::{Arc, Mutex};
13
14/// Memory constraints configuration
15#[derive(Debug, Clone)]
16pub struct MemoryConstraints {
17    /// Maximum memory to use (in bytes)
18    pub max_memory_bytes: usize,
19    /// Preferred chunk size for processing
20    pub chunksize: usize,
21    /// Use memory mapping for large files
22    pub use_memory_mapping: bool,
23    /// Enable garbage collection hints
24    pub enable_gc_hints: bool,
25}
26
27impl Default for MemoryConstraints {
28    fn default() -> Self {
29        // Default to 1GB max memory, 64KB chunks
30        Self {
31            max_memory_bytes: 1_024 * 1_024 * 1_024,
32            chunksize: 64 * 1024,
33            use_memory_mapping: true,
34            enable_gc_hints: true,
35        }
36    }
37}
38
39/// Adaptive memory manager that monitors usage and adjusts strategies
40pub struct AdaptiveMemoryManager {
41    constraints: MemoryConstraints,
42    current_usage: Arc<Mutex<usize>>,
43    peak_usage: Arc<Mutex<usize>>,
44    operation_history: Arc<Mutex<VecDeque<OperationMetrics>>>,
45}
46
47#[derive(Debug, Clone)]
48pub struct OperationMetrics {
49    operation_type: String,
50    memory_used: usize,
51    processing_time: std::time::Duration,
52    chunksize_used: usize,
53}
54
55impl AdaptiveMemoryManager {
56    pub fn new(constraints: MemoryConstraints) -> Self {
57        Self {
58            constraints,
59            current_usage: Arc::new(Mutex::new(0)),
60            peak_usage: Arc::new(Mutex::new(0)),
61            operation_history: Arc::new(Mutex::new(VecDeque::with_capacity(100))),
62        }
63    }
64
65    /// Get optimal chunk size based on current memory usage and data size
66    pub fn get_optimal_chunksize(&self, datasize: usize, elementsize: usize) -> usize {
67        let current_usage = *self.current_usage.lock().unwrap();
68        let available_memory = self
69            .constraints
70            .max_memory_bytes
71            .saturating_sub(current_usage);
72
73        // Use at most 80% of available memory for chunk processing
74        let max_chunk_memory = available_memory * 4 / 5;
75        let max_chunk_elements = max_chunk_memory / elementsize;
76
77        // Prefer power-of-2 sizes for cache efficiency
78        let optimalsize = max_chunk_elements
79            .min(datasize)
80            .min(self.constraints.chunksize);
81        optimalsize.next_power_of_two() / 2 // Round down to nearest power of 2
82    }
83
84    /// Record memory usage for an operation
85    pub fn record_operation(&self, metrics: OperationMetrics) {
86        let mut history = self.operation_history.lock().unwrap();
87
88        // Keep only recent operations
89        if history.len() >= 100 {
90            history.pop_front();
91        }
92
93        history.push_back(metrics.clone());
94
95        // Update peak usage
96        let mut peak = self.peak_usage.lock().unwrap();
97        *peak = (*peak).max(metrics.memory_used);
98    }
99
100    /// Get memory usage statistics
101    pub fn get_statistics(&self) -> MemoryStatistics {
102        let current_usage = *self.current_usage.lock().unwrap();
103        let peak_usage = *self.peak_usage.lock().unwrap();
104        let history = self.operation_history.lock().unwrap();
105
106        let avg_memory_per_op = if !history.is_empty() {
107            history.iter().map(|m| m.memory_used).sum::<usize>() / history.len()
108        } else {
109            0
110        };
111
112        MemoryStatistics {
113            current_usage,
114            peak_usage,
115            avg_memory_per_operation: avg_memory_per_op,
116            operations_completed: history.len(),
117            memory_efficiency: if peak_usage > 0 {
118                (avg_memory_per_op as f64 / peak_usage as f64) * 100.0
119            } else {
120                100.0
121            },
122        }
123    }
124}
125
126#[derive(Debug, Clone)]
127pub struct MemoryStatistics {
128    pub current_usage: usize,
129    pub peak_usage: usize,
130    pub avg_memory_per_operation: usize,
131    pub operations_completed: usize,
132    pub memory_efficiency: f64,
133}
134
135/// Memory-aware correlation matrix computation
136///
137/// Computes correlation matrices using adaptive chunking based on available memory.
138/// For very large matrices, uses block-wise computation to stay within memory constraints.
139#[allow(dead_code)]
140pub fn corrcoef_memory_aware<F>(
141    data: &ArrayView2<F>,
142    method: &str,
143    manager: &mut AdaptiveMemoryManager,
144) -> StatsResult<Array2<F>>
145where
146    F: Float
147        + NumCast
148        + SimdUnifiedOps
149        + Zero
150        + One
151        + Copy
152        + Send
153        + Sync
154        + std::iter::Sum<F>
155        + std::fmt::Debug
156        + std::fmt::Display,
157{
158    let start_time = std::time::Instant::now();
159
160    checkarray_finite_2d(data, "data")?;
161
162    let (n_obs, n_vars) = data.dim();
163    let elementsize = std::mem::size_of::<F>();
164
165    // Estimate memory requirements
166    let matrix_memory = n_vars * n_vars * elementsize;
167    let temp_memory = n_obs * elementsize * 2; // For column pairs
168    let total_estimated = matrix_memory + temp_memory;
169
170    let mut corr_matrix = Array2::<F>::zeros((n_vars, n_vars));
171
172    // Set diagonal to 1.0
173    for i in 0..n_vars {
174        corr_matrix[[i, i]] = F::one();
175    }
176
177    if total_estimated <= manager.constraints.max_memory_bytes {
178        // Can fit in memory - use standard approach
179        corr_matrix = compute_correlation_matrix_standard(data, method)?;
180    } else {
181        // Use block-wise computation
182        let blocksize = manager.get_optimal_chunksize(n_vars, elementsize * n_vars);
183        corr_matrix = compute_correlation_matrix_blocked(data, method, blocksize)?;
184    }
185
186    // Record metrics
187    let metrics = OperationMetrics {
188        operation_type: format!("corrcoef_memory_aware_{}", method),
189        memory_used: total_estimated,
190        processing_time: start_time.elapsed(),
191        chunksize_used: n_vars,
192    };
193    manager.record_operation(metrics);
194
195    Ok(corr_matrix)
196}
197
198/// Cache-oblivious matrix multiplication for large correlation computations
199#[allow(dead_code)]
200pub fn cache_oblivious_matrix_mult<F>(
201    a: &ArrayView2<F>,
202    b: &ArrayView2<F>,
203    threshold: usize,
204) -> StatsResult<Array2<F>>
205where
206    F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Display,
207{
208    let (m, n) = a.dim();
209    let (n2, p) = b.dim();
210
211    if n != n2 {
212        return Err(StatsError::DimensionMismatch(
213            "Matrix dimensions don't match for multiplication".to_string(),
214        ));
215    }
216
217    let mut result = Array2::<F>::zeros((m, p));
218
219    if m <= threshold && n <= threshold && p <= threshold {
220        // Base case: use standard multiplication
221        for i in 0..m {
222            for j in 0..p {
223                let mut sum = F::zero();
224                for k in 0..n {
225                    sum = sum + a[[i, k]] * b[[k, j]];
226                }
227                result[[i, j]] = sum;
228            }
229        }
230    } else {
231        // Recursive case: divide matrices
232        let mid_m = m / 2;
233        let _mid_n = n / 2;
234        let _mid_p = p / 2;
235
236        // This is a simplified version - full implementation would handle
237        // all matrix subdivisions recursively
238        if m > threshold {
239            let a_top = a.slice(scirs2_core::ndarray::s![..mid_m, ..]);
240            let a_bottom = a.slice(scirs2_core::ndarray::s![mid_m.., ..]);
241
242            let result_top = cache_oblivious_matrix_mult(&a_top, b, threshold)?;
243            let result_bottom = cache_oblivious_matrix_mult(&a_bottom, b, threshold)?;
244
245            result
246                .slice_mut(scirs2_core::ndarray::s![..mid_m, ..])
247                .assign(&result_top);
248            result
249                .slice_mut(scirs2_core::ndarray::s![mid_m.., ..])
250                .assign(&result_bottom);
251        }
252    }
253
254    Ok(result)
255}
256
257/// Streaming covariance computation for large datasets
258#[allow(dead_code)]
259pub fn streaming_covariance_matrix<'a, F>(
260    data_chunks: impl Iterator<Item = ArrayView2<'a, F>>,
261    manager: &mut AdaptiveMemoryManager,
262) -> StatsResult<Array2<F>>
263where
264    F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
265{
266    let start_time = std::time::Instant::now();
267
268    let mut n_vars = 0;
269    let mut total_obs = 0;
270    let mut sum_x = Array1::<F>::zeros(0);
271    let mut sum_x2 = Array2::<F>::zeros((0, 0));
272    let mut initialized = false;
273
274    for chunk in data_chunks {
275        let (chunk_obs, chunk_vars) = chunk.dim();
276
277        if !initialized {
278            n_vars = chunk_vars;
279            sum_x = Array1::<F>::zeros(n_vars);
280            sum_x2 = Array2::<F>::zeros((n_vars, n_vars));
281            initialized = true;
282        } else if chunk_vars != n_vars {
283            return Err(StatsError::DimensionMismatch(
284                "All _chunks must have the same number of variables".to_string(),
285            ));
286        }
287
288        total_obs += chunk_obs;
289
290        // Update sums
291        for i in 0..chunk_obs {
292            let row = chunk.row(i);
293
294            // Update sum_x
295            for j in 0..n_vars {
296                sum_x[j] = sum_x[j] + row[j];
297            }
298
299            // Update sum_x2 (cross products)
300            for j in 0..n_vars {
301                for k in j..n_vars {
302                    let product = row[j] * row[k];
303                    sum_x2[[j, k]] = sum_x2[[j, k]] + product;
304                    if j != k {
305                        sum_x2[[k, j]] = sum_x2[[k, j]] + product;
306                    }
307                }
308            }
309        }
310    }
311
312    if total_obs == 0 {
313        return Err(StatsError::InvalidArgument("No data provided".to_string()));
314    }
315
316    // Compute covariance matrix
317    let mut cov_matrix = Array2::<F>::zeros((n_vars, n_vars));
318    let n_f = F::from(total_obs).unwrap();
319
320    for i in 0..n_vars {
321        for j in 0..n_vars {
322            let mean_i = sum_x[i] / n_f;
323            let mean_j = sum_x[j] / n_f;
324            let cov = (sum_x2[[i, j]] / n_f) - (mean_i * mean_j);
325            cov_matrix[[i, j]] = cov;
326        }
327    }
328
329    // Record metrics
330    let memory_used = (n_vars * n_vars + n_vars) * std::mem::size_of::<F>();
331    let metrics = OperationMetrics {
332        operation_type: "streaming_covariance_matrix".to_string(),
333        memory_used,
334        processing_time: start_time.elapsed(),
335        chunksize_used: n_vars,
336    };
337    manager.record_operation(metrics);
338
339    Ok(cov_matrix)
340}
341
342/// Memory-efficient principal component analysis
343#[allow(dead_code)]
344pub fn pca_memory_efficient<F>(
345    data: &ArrayView2<F>,
346    n_components: Option<usize>,
347    manager: &mut AdaptiveMemoryManager,
348) -> StatsResult<PCAResult<F>>
349where
350    F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Debug + std::fmt::Display,
351{
352    let start_time = std::time::Instant::now();
353
354    let (n_obs, n_vars) = data.dim();
355    let n_components = n_components.unwrap_or(n_vars.min(n_obs));
356
357    // Center the data using streaming mean
358    let mut means = Array1::<F>::zeros(n_vars);
359    for i in 0..n_vars {
360        let column = data.column(i);
361        means[i] = column.iter().fold(F::zero(), |acc, &x| acc + x) / F::from(n_obs).unwrap();
362    }
363
364    // Estimate memory for centered data
365    let centereddata_memory = n_obs * n_vars * std::mem::size_of::<F>();
366
367    if centereddata_memory <= manager.constraints.max_memory_bytes / 2 {
368        // Can afford to store centered data
369        let mut centereddata = Array2::<F>::zeros((n_obs, n_vars));
370        for i in 0..n_obs {
371            for j in 0..n_vars {
372                centereddata[[i, j]] = data[[i, j]] - means[j];
373            }
374        }
375
376        // Compute covariance matrix
377        let cov_matrix = compute_covariance_from_centered(&centereddata.view())?;
378
379        // Eigendecomposition (simplified - would use proper linear algebra library)
380        let (eigenvectors, eigenvalues) =
381            compute_eigendecomposition(&cov_matrix.view(), n_components)?;
382
383        // Transform data
384        let transformed = matrix_multiply(&centereddata.view(), &eigenvectors.view())?;
385
386        let result = PCAResult {
387            components: eigenvectors,
388            explained_variance: eigenvalues,
389            transformeddata: transformed,
390            mean: means,
391        };
392
393        let metrics = OperationMetrics {
394            operation_type: "pca_memory_efficient".to_string(),
395            memory_used: centereddata_memory,
396            processing_time: start_time.elapsed(),
397            chunksize_used: n_vars,
398        };
399        manager.record_operation(metrics);
400
401        Ok(result)
402    } else {
403        // Use incremental PCA for very large datasets
404        incremental_pca(data, n_components, &means, manager)
405    }
406}
407
408#[derive(Debug, Clone)]
409pub struct PCAResult<F> {
410    pub components: Array2<F>,
411    pub explained_variance: Array1<F>,
412    pub transformeddata: Array2<F>,
413    pub mean: Array1<F>,
414}
415
416/// Streaming principal component analysis for very large datasets
417#[allow(dead_code)]
418pub fn streaming_pca_enhanced<'a, F>(
419    data_chunks: impl Iterator<Item = ArrayView2<'a, F>>,
420    n_components: usize,
421    manager: &mut AdaptiveMemoryManager,
422) -> StatsResult<PCAResult<F>>
423where
424    F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
425{
426    let start_time = std::time::Instant::now();
427
428    // First pass: compute mean and covariance incrementally
429    let mut n_samples_ = 0;
430    let mut n_features = 0;
431    let mut running_mean = Array1::<F>::zeros(0);
432    let mut running_cov = Array2::<F>::zeros((0, 0));
433    let mut initialized = false;
434
435    for chunk in data_chunks {
436        let (chunk_samples, chunk_features) = chunk.dim();
437
438        if !initialized {
439            n_features = chunk_features;
440            running_mean = Array1::<F>::zeros(n_features);
441            running_cov = Array2::<F>::zeros((n_features, n_features));
442            initialized = true;
443        } else if chunk_features != n_features {
444            return Err(StatsError::DimensionMismatch(
445                "All _chunks must have same number of features".to_string(),
446            ));
447        }
448
449        // Update running statistics using Welford's method
450        for i in 0..chunk_samples {
451            n_samples_ += 1;
452            let row = chunk.row(i);
453
454            // Update mean
455            let n_f = F::from(n_samples_).unwrap();
456            for j in 0..n_features {
457                let delta = row[j] - running_mean[j];
458                running_mean[j] = running_mean[j] + delta / n_f;
459            }
460
461            // Update covariance (simplified incremental update)
462            if n_samples_ > 1 {
463                for j in 0..n_features {
464                    for k in j..n_features {
465                        let prod = (row[j] - running_mean[j]) * (row[k] - running_mean[k]);
466                        running_cov[[j, k]] =
467                            running_cov[[j, k]] * F::from(n_samples_ - 1).unwrap() / n_f
468                                + prod / n_f;
469                        if j != k {
470                            running_cov[[k, j]] = running_cov[[j, k]];
471                        }
472                    }
473                }
474            }
475        }
476    }
477
478    if n_samples_ == 0 {
479        return Err(StatsError::InvalidArgument("No data provided".to_string()));
480    }
481
482    // Simplified eigendecomposition (would use proper SVD in production)
483    let (components, explained_variance) =
484        compute_eigendecomposition(&running_cov.view(), n_components)?;
485
486    // Record memory usage
487    let memory_used =
488        n_features * n_features * std::mem::size_of::<F>() + n_features * std::mem::size_of::<F>();
489    manager.record_operation(OperationMetrics {
490        operation_type: "streaming_pca_enhanced".to_string(),
491        memory_used,
492        processing_time: start_time.elapsed(),
493        chunksize_used: manager.constraints.chunksize,
494    });
495
496    Ok(PCAResult {
497        components,
498        explained_variance,
499        transformeddata: Array2::zeros((0, 0)), // Would project data in second pass
500        mean: running_mean,
501    })
502}
503
504/// Enhanced streaming histogram computation with adaptive binning
505#[allow(dead_code)]
506pub fn streaming_histogram_adaptive<'a, F>(
507    data_chunks: impl Iterator<Item = ArrayView1<'a, F>>,
508    manager: &mut AdaptiveMemoryManager,
509) -> StatsResult<(Array1<F>, Array1<usize>)>
510where
511    F: Float + NumCast + Zero + One + Copy + PartialOrd + 'a + std::fmt::Display,
512{
513    let start_time = std::time::Instant::now();
514
515    let mut min_val = F::infinity();
516    let mut max_val = F::neg_infinity();
517    let mut total_count = 0;
518    let mut values = Vec::new(); // For adaptive binning
519
520    // First pass: find range and collect sample for bin determination
521    for chunk in data_chunks {
522        for &value in chunk.iter() {
523            if value < min_val {
524                min_val = value;
525            }
526            if value > max_val {
527                max_val = value;
528            }
529            total_count += 1;
530
531            // Reservoir sampling to maintain memory bounds
532            if values.len() < manager.constraints.chunksize {
533                values.push(value);
534            } else {
535                let j = {
536                    use scirs2_core::random::Rng;
537                    let mut rng = scirs2_core::random::thread_rng();
538                    rng.gen_range(0..total_count)
539                };
540                if j < values.len() {
541                    values[j] = value;
542                }
543            }
544        }
545    }
546
547    if total_count == 0 {
548        return Err(StatsError::InvalidArgument("No data provided".to_string()));
549    }
550
551    // Adaptive bin count using Freedman-Diaconis rule
552    values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
553    let q75_idx = (values.len() as f64 * 0.75) as usize;
554    let q25_idx = (values.len() as f64 * 0.25) as usize;
555    let iqr = values[q75_idx] - values[q25_idx];
556    let h = F::from(2.0).unwrap() * iqr
557        / F::from(total_count as f64)
558            .unwrap()
559            .powf(F::from(1.0 / 3.0).unwrap());
560    let n_bins = if h > F::zero() {
561        ((max_val - min_val) / h)
562            .to_usize()
563            .unwrap_or(50)
564            .max(10)
565            .min(1000)
566    } else {
567        50 // Default
568    };
569
570    let bin_width = (max_val - min_val) / F::from(n_bins).unwrap();
571    let mut bin_edges = Array1::<F>::zeros(n_bins + 1);
572    let mut counts = Array1::<usize>::zeros(n_bins);
573
574    // Set bin edges
575    for i in 0..=n_bins {
576        bin_edges[i] = min_val + F::from(i).unwrap() * bin_width;
577    }
578
579    // Second pass would count values into bins (simplified here)
580    // In practice, you'd iterate through _chunks again
581    for &value in values.iter() {
582        if value >= min_val && value <= max_val {
583            let bin_idx = ((value - min_val) / bin_width)
584                .to_usize()
585                .unwrap_or(0)
586                .min(n_bins - 1);
587            counts[bin_idx] += 1;
588        }
589    }
590
591    let memory_used = n_bins * (std::mem::size_of::<F>() + std::mem::size_of::<usize>());
592    manager.record_operation(OperationMetrics {
593        operation_type: "streaming_histogram_adaptive".to_string(),
594        memory_used,
595        processing_time: start_time.elapsed(),
596        chunksize_used: manager.constraints.chunksize,
597    });
598
599    Ok((bin_edges, counts))
600}
601
602/// Memory-efficient streaming quantile computation using P² algorithm
603#[allow(dead_code)]
604pub fn streaming_quantiles_p2<'a, F>(
605    data_chunks: impl Iterator<Item = ArrayView1<'a, F>>,
606    quantiles: &[f64],
607    manager: &mut AdaptiveMemoryManager,
608) -> StatsResult<Array1<F>>
609where
610    F: Float + NumCast + Zero + One + Copy + PartialOrd + 'a + std::fmt::Display,
611{
612    let start_time = std::time::Instant::now();
613
614    let mut p2_estimators: Vec<P2Estimator<F>> =
615        quantiles.iter().map(|&q| P2Estimator::new(q)).collect();
616
617    let mut total_count = 0;
618
619    for chunk in data_chunks {
620        for &value in chunk.iter() {
621            total_count += 1;
622            for estimator in &mut p2_estimators {
623                estimator.update(value);
624            }
625        }
626    }
627
628    if total_count == 0 {
629        return Err(StatsError::InvalidArgument("No data provided".to_string()));
630    }
631
632    let results: Vec<F> = p2_estimators.iter().map(|est| est.quantile()).collect();
633
634    let memory_used = quantiles.len() * std::mem::size_of::<P2Estimator<F>>();
635    manager.record_operation(OperationMetrics {
636        operation_type: "streaming_quantiles_p2".to_string(),
637        memory_used,
638        processing_time: start_time.elapsed(),
639        chunksize_used: manager.constraints.chunksize,
640    });
641
642    Ok(Array1::from_vec(results))
643}
644
645/// P² algorithm estimator for streaming quantiles
646#[derive(Debug, Clone)]
647struct P2Estimator<F> {
648    quantile: f64,
649    markers: [F; 5],
650    positions: [f64; 5],
651    desired_positions: [f64; 5],
652    increment: [f64; 5],
653    count: usize,
654}
655
656impl<F> P2Estimator<F>
657where
658    F: Float + NumCast + Copy + PartialOrd + std::fmt::Display,
659{
660    fn new(quantile: f64) -> Self {
661        let mut estimator = Self {
662            quantile,
663            markers: [F::zero(); 5],
664            positions: [1.0, 2.0, 3.0, 4.0, 5.0],
665            desired_positions: [
666                1.0,
667                1.0 + 2.0 * quantile,
668                1.0 + 4.0 * quantile,
669                3.0 + 2.0 * quantile,
670                5.0,
671            ],
672            increment: [0.0, quantile / 2.0, quantile, (1.0 + quantile) / 2.0, 1.0],
673            count: 0,
674        };
675
676        // Initialize desired positions
677        estimator.desired_positions[1] = 1.0 + 2.0 * quantile;
678        estimator.desired_positions[2] = 1.0 + 4.0 * quantile;
679        estimator.desired_positions[3] = 3.0 + 2.0 * quantile;
680
681        estimator
682    }
683
684    fn update(&mut self, value: F) {
685        self.count += 1;
686
687        if self.count <= 5 {
688            // Initialize first 5 values
689            self.markers[self.count - 1] = value;
690            if self.count == 5 {
691                self.markers
692                    .sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
693            }
694            return;
695        }
696
697        // Find cell k such that markers[k] <= value < markers[k+1]
698        let mut k = 0;
699        for i in 0..4 {
700            if value >= self.markers[i] {
701                k = i + 1;
702            }
703        }
704
705        if k == 0 {
706            self.markers[0] = value;
707            k = 1;
708        } else if k == 5 {
709            self.markers[4] = value;
710            k = 4;
711        }
712
713        // Increment positions k through 4
714        for i in k..5 {
715            self.positions[i] += 1.0;
716        }
717
718        // Update desired positions
719        for i in 0..5 {
720            self.desired_positions[i] += self.increment[i];
721        }
722
723        // Adjust heights if necessary
724        for i in 1..4 {
725            let d = self.desired_positions[i] - self.positions[i];
726            if (d >= 1.0 && self.positions[i + 1] - self.positions[i] > 1.0)
727                || (d <= -1.0 && self.positions[i - 1] - self.positions[i] < -1.0)
728            {
729                let d_sign = if d >= 0.0 { 1.0 } else { -1.0 };
730                let new_height = self.parabolic_prediction(i, d_sign);
731
732                if self.markers[i - 1] < new_height && new_height < self.markers[i + 1] {
733                    self.markers[i] = new_height;
734                } else {
735                    self.markers[i] = self.linear_prediction(i, d_sign);
736                }
737                self.positions[i] += d_sign;
738            }
739        }
740    }
741
742    fn parabolic_prediction(&self, i: usize, d: f64) -> F {
743        let qi = self.markers[i];
744        let qi_prev = self.markers[i - 1];
745        let qi_next = self.markers[i + 1];
746        let ni = self.positions[i];
747        let ni_prev = self.positions[i - 1];
748        let ni_next = self.positions[i + 1];
749
750        let d_f = F::from(d).unwrap();
751        let ni_f = F::from(ni).unwrap();
752        let ni_prev_f = F::from(ni_prev).unwrap();
753        let ni_next_f = F::from(ni_next).unwrap();
754
755        let a = d_f / (ni_next_f - ni_prev_f);
756        let b1 = (ni_f - ni_prev_f + d_f) * (qi_next - qi) / (ni_next_f - ni_f);
757        let b2 = (ni_next_f - ni_f - d_f) * (qi - qi_prev) / (ni_f - ni_prev_f);
758
759        qi + a * (b1 + b2)
760    }
761
762    fn linear_prediction(&self, i: usize, d: f64) -> F {
763        if d > 0.0 {
764            let qi_next = self.markers[i + 1];
765            let qi = self.markers[i];
766            let ni_next = self.positions[i + 1];
767            let ni = self.positions[i];
768            qi + (qi_next - qi) * F::from(d / (ni_next - ni)).unwrap()
769        } else {
770            let qi = self.markers[i];
771            let qi_prev = self.markers[i - 1];
772            let ni = self.positions[i];
773            let ni_prev = self.positions[i - 1];
774            qi + (qi_prev - qi) * F::from(-d / (ni - ni_prev)).unwrap()
775        }
776    }
777
778    fn quantile(&self) -> F {
779        if self.count < 5 {
780            // For small datasets, use simple sorting
781            let mut sorted = self.markers[..self.count.min(5)].to_vec();
782            sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
783            let idx = (self.quantile * (sorted.len() - 1) as f64) as usize;
784            sorted[idx]
785        } else {
786            self.markers[2] // The middle marker approximates the desired quantile
787        }
788    }
789}
790
791/// Enhanced streaming regression for large datasets with regularization
792#[allow(dead_code)]
793pub fn streaming_regression_enhanced<'a, F>(
794    data_chunks: impl Iterator<Item = (ArrayView2<'a, F>, ArrayView1<'a, F>)>,
795    regularization: F,
796    manager: &mut AdaptiveMemoryManager,
797) -> StatsResult<Array1<F>>
798where
799    F: Float + NumCast + Zero + One + Copy + 'a + std::fmt::Display,
800{
801    let start_time = std::time::Instant::now();
802
803    let mut xtx = Array2::<F>::zeros((0, 0));
804    let mut xty = Array1::<F>::zeros(0);
805    let mut n_samples_ = 0;
806    let mut n_features = 0;
807    let mut initialized = false;
808
809    // Accumulate X'X and X'y incrementally
810    for (x_chunk, y_chunk) in data_chunks {
811        let (chunk_samples, chunk_features) = x_chunk.dim();
812
813        if !initialized {
814            n_features = chunk_features;
815            xtx = Array2::<F>::zeros((n_features, n_features));
816            xty = Array1::<F>::zeros(n_features);
817            initialized = true;
818        } else if chunk_features != n_features {
819            return Err(StatsError::DimensionMismatch(
820                "All _chunks must have same number of features".to_string(),
821            ));
822        }
823
824        if y_chunk.len() != chunk_samples {
825            return Err(StatsError::DimensionMismatch(
826                "X and y must have same number of samples".to_string(),
827            ));
828        }
829
830        n_samples_ += chunk_samples;
831
832        // Update X'X
833        for i in 0..n_features {
834            for j in i..n_features {
835                let mut sum = F::zero();
836                for k in 0..chunk_samples {
837                    sum = sum + x_chunk[[k, i]] * x_chunk[[k, j]];
838                }
839                xtx[[i, j]] = xtx[[i, j]] + sum;
840                if i != j {
841                    xtx[[j, i]] = xtx[[i, j]];
842                }
843            }
844        }
845
846        // Update X'y
847        for i in 0..n_features {
848            let mut sum = F::zero();
849            for k in 0..chunk_samples {
850                sum = sum + x_chunk[[k, i]] * y_chunk[k];
851            }
852            xty[i] = xty[i] + sum;
853        }
854    }
855
856    if n_samples_ == 0 {
857        return Err(StatsError::InvalidArgument("No data provided".to_string()));
858    }
859
860    // Add regularization to diagonal
861    for i in 0..n_features {
862        xtx[[i, i]] = xtx[[i, i]] + regularization;
863    }
864
865    // Solve (X'X + λI)β = X'y using simplified method
866    // In practice, would use proper matrix decomposition
867    let coefficients = solve_linear_system(&xtx.view(), &xty.view())?;
868
869    let memory_used =
870        n_features * n_features * std::mem::size_of::<F>() + n_features * std::mem::size_of::<F>();
871    manager.record_operation(OperationMetrics {
872        operation_type: "streaming_regression_enhanced".to_string(),
873        memory_used,
874        processing_time: start_time.elapsed(),
875        chunksize_used: manager.constraints.chunksize,
876    });
877
878    Ok(coefficients)
879}
880
881/// Simple linear system solver (would use proper LU decomposition in production)
882#[allow(dead_code)]
883fn solve_linear_system<F>(a: &ArrayView2<F>, b: &ArrayView1<F>) -> StatsResult<Array1<F>>
884where
885    F: Float + NumCast + Zero + One + Copy + std::fmt::Display,
886{
887    let n = a.nrows();
888    if a.ncols() != n || b.len() != n {
889        return Err(StatsError::DimensionMismatch(
890            "Matrix dimensions incompatible".to_string(),
891        ));
892    }
893
894    // Simplified Gaussian elimination (not numerically stable for production)
895    let mut aug = Array2::<F>::zeros((n, n + 1));
896
897    // Copy A and b into augmented matrix
898    for i in 0..n {
899        for j in 0..n {
900            aug[[i, j]] = a[[i, j]];
901        }
902        aug[[i, n]] = b[i];
903    }
904
905    // Forward elimination
906    for i in 0..n {
907        // Find pivot
908        let mut max_row = i;
909        for k in (i + 1)..n {
910            if aug[[k, i]].abs() > aug[[max_row, i]].abs() {
911                max_row = k;
912            }
913        }
914
915        // Swap rows
916        if max_row != i {
917            for j in 0..=n {
918                let temp = aug[[i, j]];
919                aug[[i, j]] = aug[[max_row, j]];
920                aug[[max_row, j]] = temp;
921            }
922        }
923
924        // Make diagonal 1
925        let pivot = aug[[i, i]];
926        if pivot.abs() < F::from(1e-12).unwrap() {
927            return Err(StatsError::ComputationError(
928                "Matrix is singular".to_string(),
929            ));
930        }
931
932        for j in i..=n {
933            aug[[i, j]] = aug[[i, j]] / pivot;
934        }
935
936        // Eliminate column
937        for k in 0..n {
938            if k != i {
939                let factor = aug[[k, i]];
940                for j in i..=n {
941                    aug[[k, j]] = aug[[k, j]] - factor * aug[[i, j]];
942                }
943            }
944        }
945    }
946
947    // Extract solution
948    let mut solution = Array1::<F>::zeros(n);
949    for i in 0..n {
950        solution[i] = aug[[i, n]];
951    }
952
953    Ok(solution)
954}
955
956// Helper functions (simplified implementations)
957
958#[allow(dead_code)]
959fn compute_correlation_matrix_standard<F>(
960    data: &ArrayView2<F>,
961    method: &str,
962) -> StatsResult<Array2<F>>
963where
964    F: Float
965        + NumCast
966        + Zero
967        + One
968        + Copy
969        + std::iter::Sum<F>
970        + std::fmt::Debug
971        + std::fmt::Display
972        + Send
973        + Sync
974        + scirs2_core::simd_ops::SimdUnifiedOps,
975{
976    // Use existing corrcoef implementation
977    crate::corrcoef(data, method)
978}
979
980#[allow(dead_code)]
981fn compute_correlation_matrix_blocked<F>(
982    data: &ArrayView2<F>,
983    method: &str,
984    blocksize: usize,
985) -> StatsResult<Array2<F>>
986where
987    F: Float
988        + NumCast
989        + Zero
990        + One
991        + Copy
992        + std::iter::Sum<F>
993        + std::fmt::Debug
994        + std::fmt::Display
995        + scirs2_core::simd_ops::SimdUnifiedOps,
996{
997    let (_, n_vars) = data.dim();
998    let mut corr_matrix = Array2::<F>::zeros((n_vars, n_vars));
999
1000    // Set diagonal
1001    for i in 0..n_vars {
1002        corr_matrix[[i, i]] = F::one();
1003    }
1004
1005    // Process in blocks
1006    for i_block in (0..n_vars).step_by(blocksize) {
1007        let i_end = (i_block + blocksize).min(n_vars);
1008
1009        for j_block in (i_block..n_vars).step_by(blocksize) {
1010            let j_end = (j_block + blocksize).min(n_vars);
1011
1012            // Compute correlations for this block
1013            for i in i_block..i_end {
1014                for j in j_block.max(i + 1)..j_end {
1015                    let col_i = data.column(i);
1016                    let col_j = data.column(j);
1017
1018                    let corr = match method {
1019                        "pearson" => crate::pearson_r(&col_i, &col_j)?,
1020                        "spearman" => crate::spearman_r(&col_i, &col_j)?,
1021                        "kendall" => crate::kendall_tau(&col_i, &col_j, "b")?,
1022                        _ => {
1023                            return Err(StatsError::InvalidArgument(format!(
1024                                "Unknown method: {}",
1025                                method
1026                            )))
1027                        }
1028                    };
1029
1030                    corr_matrix[[i, j]] = corr;
1031                    corr_matrix[[j, i]] = corr;
1032                }
1033            }
1034        }
1035    }
1036
1037    Ok(corr_matrix)
1038}
1039
1040#[allow(dead_code)]
1041fn compute_covariance_from_centered<F>(data: &ArrayView2<F>) -> StatsResult<Array2<F>>
1042where
1043    F: Float + NumCast + Zero + Copy + std::fmt::Display,
1044{
1045    let (n_obs, n_vars) = data.dim();
1046    let mut cov_matrix = Array2::<F>::zeros((n_vars, n_vars));
1047    let n_f = F::from(n_obs - 1).unwrap(); // Sample covariance
1048
1049    for i in 0..n_vars {
1050        for j in i..n_vars {
1051            let mut cov = F::zero();
1052            for k in 0..n_obs {
1053                cov = cov + data[[k, i]] * data[[k, j]];
1054            }
1055            cov = cov / n_f;
1056            cov_matrix[[i, j]] = cov;
1057            cov_matrix[[j, i]] = cov;
1058        }
1059    }
1060
1061    Ok(cov_matrix)
1062}
1063
1064#[allow(dead_code)]
1065fn compute_eigendecomposition<F>(
1066    matrix: &ArrayView2<F>,
1067    n_components: usize,
1068) -> StatsResult<(Array2<F>, Array1<F>)>
1069where
1070    F: Float + NumCast + Zero + One + Copy + std::fmt::Display,
1071{
1072    let n = matrix.dim().0;
1073    let n_components = n_components.min(n);
1074
1075    // Power iteration method for top eigenvalues/eigenvectors
1076    // This is a simplified implementation - in practice would use LAPACK
1077    let mut eigenvalues = Array1::<F>::zeros(n_components);
1078    let mut eigenvectors = Array2::<F>::zeros((n, n_components));
1079
1080    for k in 0..n_components {
1081        // Initialize random vector
1082        let mut v = Array1::<F>::ones(n);
1083
1084        // Power iteration
1085        for _ in 0..100 {
1086            // Max iterations
1087            let mut new_v = Array1::<F>::zeros(n);
1088
1089            // Matrix-vector multiplication
1090            for i in 0..n {
1091                let mut sum = F::zero();
1092                for j in 0..n {
1093                    sum = sum + matrix[[i, j]] * v[j];
1094                }
1095                new_v[i] = sum;
1096            }
1097
1098            // Orthogonalize against previous eigenvectors
1099            for prev_k in 0..k {
1100                let mut dot_product = F::zero();
1101                for i in 0..n {
1102                    dot_product = dot_product + new_v[i] * eigenvectors[[i, prev_k]];
1103                }
1104
1105                for i in 0..n {
1106                    new_v[i] = new_v[i] - dot_product * eigenvectors[[i, prev_k]];
1107                }
1108            }
1109
1110            // Normalize
1111            let mut norm = F::zero();
1112            for i in 0..n {
1113                norm = norm + new_v[i] * new_v[i];
1114            }
1115            norm = norm.sqrt();
1116
1117            if norm > F::epsilon() {
1118                for i in 0..n {
1119                    new_v[i] = new_v[i] / norm;
1120                }
1121            }
1122
1123            // Check convergence
1124            let mut converged = true;
1125            for i in 0..n {
1126                if (new_v[i] - v[i]).abs() > F::from(1e-6).unwrap() {
1127                    converged = false;
1128                    break;
1129                }
1130            }
1131
1132            v = new_v;
1133
1134            if converged {
1135                break;
1136            }
1137        }
1138
1139        // Compute eigenvalue
1140        let mut eigenvalue = F::zero();
1141        for i in 0..n {
1142            let mut sum = F::zero();
1143            for j in 0..n {
1144                sum = sum + matrix[[i, j]] * v[j];
1145            }
1146            eigenvalue = eigenvalue + v[i] * sum;
1147        }
1148
1149        eigenvalues[k] = eigenvalue;
1150        for i in 0..n {
1151            eigenvectors[[i, k]] = v[i];
1152        }
1153    }
1154
1155    Ok((eigenvectors, eigenvalues))
1156}
1157
1158#[allow(dead_code)]
1159fn matrix_multiply<F>(a: &ArrayView2<F>, b: &ArrayView2<F>) -> StatsResult<Array2<F>>
1160where
1161    F: Float + NumCast + Zero + Copy + std::fmt::Display,
1162{
1163    let (m, n) = a.dim();
1164    let (n2, p) = b.dim();
1165
1166    if n != n2 {
1167        return Err(StatsError::DimensionMismatch(
1168            "Matrix dimensions don't match".to_string(),
1169        ));
1170    }
1171
1172    let mut result = Array2::<F>::zeros((m, p));
1173
1174    for i in 0..m {
1175        for j in 0..p {
1176            let mut sum = F::zero();
1177            for k in 0..n {
1178                sum = sum + a[[i, k]] * b[[k, j]];
1179            }
1180            result[[i, j]] = sum;
1181        }
1182    }
1183
1184    Ok(result)
1185}
1186
1187#[allow(dead_code)]
1188fn incremental_pca<F>(
1189    data: &ArrayView2<F>,
1190    n_components: usize,
1191    means: &Array1<F>,
1192    manager: &mut AdaptiveMemoryManager,
1193) -> StatsResult<PCAResult<F>>
1194where
1195    F: Float + NumCast + Zero + One + Copy + Send + Sync + std::fmt::Debug + std::fmt::Display,
1196{
1197    let (n_obs, n_vars) = data.dim();
1198    let n_components = n_components.min(n_vars);
1199
1200    // Batch size for incremental processing
1201    let batchsize = manager.get_optimal_chunksize(n_obs, std::mem::size_of::<F>() * n_vars);
1202
1203    // Initialize _components with random orthogonal matrix
1204    let mut components = Array2::<F>::zeros((n_vars, n_components));
1205    for i in 0..n_components {
1206        components[[i % n_vars, i]] = F::one();
1207    }
1208
1209    let mut explained_variance = Array1::<F>::zeros(n_components);
1210    let mut _n_samples_seen = 0;
1211
1212    // Process data in batches
1213    for batch_start in (0..n_obs).step_by(batchsize) {
1214        let batch_end = (batch_start + batchsize).min(n_obs);
1215        let batch = data.slice(scirs2_core::ndarray::s![batch_start..batch_end, ..]);
1216
1217        // Center the batch
1218        let mut centered_batch = Array2::<F>::zeros(batch.dim());
1219        for i in 0..batch.nrows() {
1220            for j in 0..batch.ncols() {
1221                centered_batch[[i, j]] = batch[[i, j]] - means[j];
1222            }
1223        }
1224
1225        // Update _components using simplified incremental update
1226        for k in 0..n_components {
1227            let component = components.column(k).to_owned();
1228
1229            // Project batch onto current component
1230            let mut projections = Array1::<F>::zeros(batch.nrows());
1231            for i in 0..batch.nrows() {
1232                let mut projection = F::zero();
1233                for j in 0..n_vars {
1234                    projection = projection + centered_batch[[i, j]] * component[j];
1235                }
1236                projections[i] = projection;
1237            }
1238
1239            // Update component direction
1240            let mut new_component = Array1::<F>::zeros(n_vars);
1241            for j in 0..n_vars {
1242                let mut sum = F::zero();
1243                for i in 0..batch.nrows() {
1244                    sum = sum + centered_batch[[i, j]] * projections[i];
1245                }
1246                new_component[j] = sum;
1247            }
1248
1249            // Normalize
1250            let mut norm = F::zero();
1251            for j in 0..n_vars {
1252                norm = norm + new_component[j] * new_component[j];
1253            }
1254            norm = norm.sqrt();
1255
1256            if norm > F::epsilon() {
1257                for j in 0..n_vars {
1258                    components[[j, k]] = new_component[j] / norm;
1259                }
1260
1261                // Update explained variance
1262                let variance = projections
1263                    .iter()
1264                    .map(|&x| x * x)
1265                    .fold(F::zero(), |acc, x| acc + x);
1266                explained_variance[k] = variance / F::from(batch.nrows()).unwrap();
1267            }
1268        }
1269
1270        _n_samples_seen += batch.nrows();
1271    }
1272
1273    // Transform the data
1274    let mut transformeddata = Array2::<F>::zeros((n_obs, n_components));
1275    for i in 0..n_obs {
1276        for k in 0..n_components {
1277            let mut projection = F::zero();
1278            for j in 0..n_vars {
1279                let centered_val = data[[i, j]] - means[j];
1280                projection = projection + centered_val * components[[j, k]];
1281            }
1282            transformeddata[[i, k]] = projection;
1283        }
1284    }
1285
1286    Ok(PCAResult {
1287        components,
1288        explained_variance,
1289        transformeddata,
1290        mean: means.clone(),
1291    })
1292}
1293
1294#[allow(dead_code)]
1295fn checkarray_finite_2d<F, D>(arr: &ArrayBase<D, Ix2>, name: &str) -> StatsResult<()>
1296where
1297    F: Float,
1298    D: Data<Elem = F>,
1299{
1300    for &val in arr.iter() {
1301        if !val.is_finite() {
1302            return Err(StatsError::InvalidArgument(format!(
1303                "{} contains non-finite values",
1304                name
1305            )));
1306        }
1307    }
1308    Ok(())
1309}
1310
1311#[cfg(test)]
1312mod tests {
1313    use super::*;
1314    use scirs2_core::ndarray::array;
1315
1316    #[test]
1317    fn test_adaptive_memory_manager() {
1318        let constraints = MemoryConstraints::default();
1319        let manager = AdaptiveMemoryManager::new(constraints);
1320
1321        let chunksize = manager.get_optimal_chunksize(1000, 8);
1322        assert!(chunksize > 0);
1323        assert!(chunksize <= 1000);
1324
1325        let metrics = OperationMetrics {
1326            operation_type: "test".to_string(),
1327            memory_used: 1024,
1328            processing_time: std::time::Duration::from_millis(10),
1329            chunksize_used: chunksize,
1330        };
1331
1332        manager.record_operation(metrics);
1333        let stats = manager.get_statistics();
1334        assert_eq!(stats.operations_completed, 1);
1335    }
1336
1337    #[test]
1338    fn test_cache_oblivious_matrix_mult() {
1339        let a = array![[1.0, 2.0], [3.0, 4.0]];
1340        let b = array![[5.0, 6.0], [7.0, 8.0]];
1341
1342        let result = cache_oblivious_matrix_mult(&a.view(), &b.view(), 2).unwrap();
1343
1344        // Expected: [[19, 22], [43, 50]]
1345        assert!((result[[0, 0]] - 19.0).abs() < 1e-10);
1346        assert!((result[[0, 1]] - 22.0).abs() < 1e-10);
1347        assert!((result[[1, 0]] - 43.0).abs() < 1e-10);
1348        assert!((result[[1, 1]] - 50.0).abs() < 1e-10);
1349    }
1350}