scirs2_stats/
memory_profiling.rs

1//! Memory profiling and optimization utilities for statistical operations
2//!
3//! This module provides tools for profiling memory usage and implementing
4//! memory-efficient algorithms with adaptive strategies based on available memory.
5
6use crate::error::{StatsError, StatsResult};
7use scirs2_core::ndarray::{s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Data, Ix1, Ix2};
8use scirs2_core::numeric::{Float, NumCast};
9use std::collections::VecDeque;
10use std::sync::Arc;
11
12/// Memory usage profiler for statistical operations
13#[derive(Debug, Clone)]
14pub struct MemoryProfile {
15    /// Peak memory usage in bytes
16    pub peak_memory: usize,
17    /// Average memory usage in bytes
18    pub avg_memory: usize,
19    /// Number of allocations
20    pub allocations: usize,
21    /// Number of deallocations
22    pub deallocations: usize,
23    /// Memory efficiency score (0-1)
24    pub efficiency_score: f64,
25}
26
27/// Memory-aware algorithm selector
28pub struct MemoryAdaptiveAlgorithm {
29    /// Available memory in bytes
30    available_memory: usize,
31    /// Preferred chunk size for streaming operations
32    preferred_chunksize: usize,
33    /// Whether to use in-place algorithms when possible
34    #[allow(dead_code)]
35    prefer_inplace: bool,
36}
37
38impl Default for MemoryAdaptiveAlgorithm {
39    fn default() -> Self {
40        Self::new()
41    }
42}
43
44impl MemoryAdaptiveAlgorithm {
45    /// Create a new memory-adaptive algorithm selector
46    pub fn new() -> Self {
47        // Estimate available memory (simplified - in production would use system calls)
48        let available_memory = Self::estimate_available_memory();
49        let preferred_chunksize = Self::calculate_optimal_chunksize(available_memory);
50
51        Self {
52            available_memory,
53            preferred_chunksize,
54            prefer_inplace: available_memory < 1_000_000_000, // Prefer in-place if < 1GB
55        }
56    }
57
58    /// Estimate available system memory using platform-specific calls
59    fn estimate_available_memory() -> usize {
60        #[cfg(target_os = "linux")]
61        {
62            Self::get_available_memory_linux()
63        }
64        #[cfg(target_os = "windows")]
65        {
66            Self::get_available_memory_windows()
67        }
68        #[cfg(target_os = "macos")]
69        {
70            Self::get_available_memory_macos()
71        }
72        #[cfg(not(any(target_os = "linux", target_os = "windows", target_os = "macos")))]
73        {
74            // Fallback for other systems
75            Self::get_available_memory_fallback()
76        }
77    }
78
79    #[cfg(target_os = "linux")]
80    fn get_available_memory_linux() -> usize {
81        use std::fs;
82
83        // Read /proc/meminfo for accurate memory information
84        if let Ok(meminfo) = fs::read_to_string("/proc/meminfo") {
85            let mut mem_available = None;
86            let mut mem_free = None;
87            let mut mem_total = None;
88
89            for line in meminfo.lines() {
90                if line.starts_with("MemAvailable:") {
91                    if let Some(value) = line.split_whitespace().nth(1) {
92                        if let Ok(kb) = value.parse::<usize>() {
93                            mem_available = Some(kb * 1024); // Convert KB to bytes
94                        }
95                    }
96                } else if line.starts_with("MemFree:") {
97                    if let Some(value) = line.split_whitespace().nth(1) {
98                        if let Ok(kb) = value.parse::<usize>() {
99                            mem_free = Some(kb * 1024);
100                        }
101                    }
102                } else if line.starts_with("MemTotal:") {
103                    if let Some(value) = line.split_whitespace().nth(1) {
104                        if let Ok(kb) = value.parse::<usize>() {
105                            mem_total = Some(kb * 1024);
106                        }
107                    }
108                }
109            }
110
111            // Prefer MemAvailable (includes reclaimable memory), fallback to MemFree
112            if let Some(available) = mem_available {
113                return available;
114            } else if let Some(free) = mem_free {
115                return free;
116            } else if let Some(total) = mem_total {
117                // Conservative estimate: 50% of total if we can't get precise info
118                return total / 2;
119            }
120        }
121
122        // Fallback if /proc/meminfo is not readable
123        Self::get_available_memory_fallback()
124    }
125
126    #[cfg(target_os = "windows")]
127    fn get_available_memory_windows() -> usize {
128        // On Windows, we would use GlobalMemoryStatusEx API
129        // For cross-platform compatibility without external dependencies,
130        // we'll use a conservative estimate based on typical Windows systems
131
132        // This could be improved by using winapi crate:
133        // use winapi::um::sysinfoapi::{GlobalMemoryStatusEx, MEMORYSTATUSEX};
134        // But to avoid dependencies, we use a reasonable estimate
135
136        // Assume at least 4GB total, use 25% as available
137        let conservative_total = 4_000_000_000; // 4GB
138        conservative_total / 4
139    }
140
141    #[cfg(target_os = "macos")]
142    fn get_available_memory_macos() -> usize {
143        use std::process::Command;
144
145        // Use vm_stat command to get memory information
146        if let Ok(output) = Command::new("vm_stat").output() {
147            if let Ok(stdout) = String::from_utf8(output.stdout) {
148                let mut pagesize = 4096; // Default page size
149                let mut free_pages = 0;
150                let mut inactive_pages = 0;
151
152                for line in stdout.lines() {
153                    if line.starts_with("Mach Virtual Memory Statistics:") {
154                        // Try to extract page size if available
155                        if line.contains("page size of") {
156                            if let Some(size_str) = line.split("page size of ").nth(1) {
157                                if let Some(size_str) = size_str.split(" bytes").next() {
158                                    if let Ok(size) = size_str.parse::<usize>() {
159                                        pagesize = size;
160                                    }
161                                }
162                            }
163                        }
164                    } else if line.starts_with("Pages free:") {
165                        if let Some(count_str) = line.split(':').nth(1) {
166                            if let Some(count_str) = count_str.trim().split('.').next() {
167                                if let Ok(count) = count_str.parse::<usize>() {
168                                    free_pages = count;
169                                }
170                            }
171                        }
172                    } else if line.starts_with("Pages inactive:") {
173                        if let Some(count_str) = line.split(':').nth(1) {
174                            if let Some(count_str) = count_str.trim().split('.').next() {
175                                if let Ok(count) = count_str.parse::<usize>() {
176                                    inactive_pages = count;
177                                }
178                            }
179                        }
180                    }
181                }
182
183                // Available memory is approximately free + inactive pages
184                return (free_pages + inactive_pages) * pagesize;
185            }
186        }
187
188        // Fallback if vm_stat fails
189        Self::get_available_memory_fallback()
190    }
191
192    fn get_available_memory_fallback() -> usize {
193        // Conservative fallback for unknown systems
194        // Assume at least 2GB total memory, use 25% as available
195        let conservative_total = 2_000_000_000; // 2GB
196        conservative_total / 4 // 500MB
197    }
198
199    /// Calculate optimal chunk size based on available memory
200    fn calculate_optimal_chunksize(_availablememory: usize) -> usize {
201        // Aim for chunks that fit comfortably in L3 cache (typically 8-32MB)
202        let l3_cache_estimate = 8_000_000; // 8MB
203        let max_chunk = _availablememory / 10; // Use at most 10% of available _memory
204
205        l3_cache_estimate.min(max_chunk).max(4096)
206    }
207
208    /// Check if an operation can be performed in available memory
209    pub fn can_allocate(&self, bytes: usize) -> bool {
210        bytes <= self.available_memory / 2 // Conservative: use at most half
211    }
212
213    /// Get recommended algorithm based on data size
214    pub fn recommend_algorithm<F: Float>(&self, datasize: usize) -> AlgorithmChoice {
215        let elementsize = std::mem::size_of::<F>();
216        let total_bytes = datasize * elementsize;
217
218        if total_bytes < 1_000_000 {
219            // < 1MB
220            AlgorithmChoice::Direct
221        } else if self.can_allocate(total_bytes) {
222            AlgorithmChoice::Optimized
223        } else {
224            AlgorithmChoice::Streaming(self.preferred_chunksize / elementsize)
225        }
226    }
227}
228
229#[derive(Debug, Clone)]
230pub enum AlgorithmChoice {
231    /// Use direct algorithm (small data)
232    Direct,
233    /// Use optimized algorithm (medium data)
234    Optimized,
235    /// Use streaming algorithm with given chunk size
236    Streaming(usize),
237}
238
239/// Zero-copy view-based statistics
240///
241/// These functions operate on views to avoid unnecessary copying
242pub mod zero_copy {
243    use super::*;
244
245    /// Compute statistics on overlapping windows without copying
246    pub fn rolling_stats_zerocopy<F, D, S>(
247        data: &ArrayBase<D, Ix1>,
248        windowsize: usize,
249        stat_fn: S,
250    ) -> StatsResult<Array1<F>>
251    where
252        F: Float + NumCast,
253        D: Data<Elem = F>,
254        S: Fn(ArrayView1<F>) -> StatsResult<F>,
255    {
256        let n = data.len();
257        if windowsize == 0 || windowsize > n {
258            return Err(StatsError::invalid_argument("Invalid window size"));
259        }
260
261        let output_len = n - windowsize + 1;
262        let mut results = Array1::zeros(output_len);
263
264        // Use views to avoid copying
265        for i in 0..output_len {
266            let window = data.slice(s![i..i + windowsize]);
267            results[i] = stat_fn(window)?;
268        }
269
270        Ok(results)
271    }
272
273    /// Compute pairwise operations using views
274    pub fn pairwise_operation_zerocopy<F, D, Op>(
275        data: &ArrayBase<D, Ix2>,
276        operation: Op,
277    ) -> StatsResult<Array2<F>>
278    where
279        F: Float + NumCast,
280        D: Data<Elem = F>,
281        Op: Fn(ArrayView1<F>, ArrayView1<F>) -> StatsResult<F>,
282    {
283        let n = data.nrows();
284        let mut result = Array2::zeros((n, n));
285
286        for i in 0..n {
287            result[(i, i)] = F::one(); // Diagonal
288            for j in (i + 1)..n {
289                let row_i = data.row(i);
290                let row_j = data.row(j);
291                let value = operation(row_i, row_j)?;
292                result[(i, j)] = value;
293                result[(j, i)] = value; // Symmetric
294            }
295        }
296
297        Ok(result)
298    }
299}
300
301/// Memory-mapped statistical operations for very large datasets
302pub mod memory_mapped {
303    use super::*;
304
305    /// Chunked mean calculation for memory-mapped data
306    pub fn mmap_mean<'a, F: Float + NumCast + std::fmt::Display + std::iter::Sum<F> + 'a>(
307        data_chunks: impl Iterator<Item = ArrayView1<'a, F>>,
308        total_count: usize,
309    ) -> StatsResult<F> {
310        if total_count == 0 {
311            return Err(StatsError::invalid_argument("Empty dataset"));
312        }
313
314        let mut total_sum = F::zero();
315        let mut count_processed = 0;
316
317        for chunk in data_chunks {
318            let chunk_sum = chunk.sum();
319            total_sum = total_sum + chunk_sum;
320            count_processed += chunk.len();
321        }
322
323        if count_processed != total_count {
324            return Err(StatsError::invalid_argument("Chunk _count mismatch"));
325        }
326
327        Ok(total_sum / F::from(total_count).unwrap())
328    }
329
330    /// Chunked variance calculation using Welford's algorithm
331    pub fn mmap_variance<'a, F: Float + NumCast + std::fmt::Display + 'a>(
332        data_chunks: impl Iterator<Item = ArrayView1<'a, F>>,
333        total_count: usize,
334        ddof: usize,
335    ) -> StatsResult<(F, F)> {
336        if total_count <= ddof {
337            return Err(StatsError::invalid_argument("Insufficient data for ddof"));
338        }
339
340        let mut mean = F::zero();
341        let mut m2 = F::zero();
342        let mut _count = 0;
343
344        for chunk in data_chunks {
345            for &value in chunk.iter() {
346                _count += 1;
347                let delta = value - mean;
348                mean = mean + delta / F::from(_count).unwrap();
349                let delta2 = value - mean;
350                m2 = m2 + delta * delta2;
351            }
352        }
353
354        let variance = m2 / F::from(_count - ddof).unwrap();
355        Ok((mean, variance))
356    }
357}
358
359/// Ring buffer for streaming statistics with fixed memory usage
360pub struct RingBufferStats<F: Float> {
361    buffer: VecDeque<F>,
362    capacity: usize,
363    sum: F,
364    sum_squares: F,
365}
366
367impl<F: Float + NumCast + std::fmt::Display> RingBufferStats<F> {
368    /// Create a new ring buffer with fixed capacity
369    pub fn new(capacity: usize) -> Self {
370        Self {
371            buffer: VecDeque::with_capacity(capacity),
372            capacity,
373            sum: F::zero(),
374            sum_squares: F::zero(),
375        }
376    }
377
378    /// Add a new value, potentially evicting the oldest
379    pub fn push(&mut self, value: F) {
380        if self.buffer.len() >= self.capacity {
381            if let Some(old_value) = self.buffer.pop_front() {
382                self.sum = self.sum - old_value;
383                self.sum_squares = self.sum_squares - old_value * old_value;
384            }
385        }
386
387        self.buffer.push_back(value);
388        self.sum = self.sum + value;
389        self.sum_squares = self.sum_squares + value * value;
390    }
391
392    /// Get current mean
393    pub fn mean(&self) -> F {
394        if self.buffer.is_empty() {
395            F::zero()
396        } else {
397            self.sum / F::from(self.buffer.len()).unwrap()
398        }
399    }
400
401    /// Get current variance
402    pub fn variance(&self, ddof: usize) -> Option<F> {
403        let n = self.buffer.len();
404        if n <= ddof {
405            return None;
406        }
407
408        let mean = self.mean();
409        let var = self.sum_squares / F::from(n).unwrap() - mean * mean;
410        Some(var * F::from(n).unwrap() / F::from(n - ddof).unwrap())
411    }
412
413    /// Get current standard deviation
414    pub fn std(&self, ddof: usize) -> Option<F> {
415        self.variance(ddof).map(|v| v.sqrt())
416    }
417}
418
419/// Lazy evaluation for statistical operations
420pub struct LazyStatComputation<F: Float> {
421    data_ref: Arc<Vec<F>>,
422    operations: Vec<StatOperation>,
423}
424
425#[derive(Clone)]
426enum StatOperation {
427    Mean,
428    Variance(usize), // ddof
429    Quantile(f64),
430    #[allow(dead_code)]
431    StandardScaling,
432}
433
434impl<F: Float + NumCast + std::iter::Sum + std::fmt::Display> LazyStatComputation<F> {
435    /// Create a new lazy computation
436    pub fn new(data: Vec<F>) -> Self {
437        Self {
438            data_ref: Arc::new(data),
439            operations: Vec::new(),
440        }
441    }
442
443    /// Add mean computation
444    pub fn mean(mut self) -> Self {
445        self.operations.push(StatOperation::Mean);
446        self
447    }
448
449    /// Add variance computation
450    pub fn variance(mut self, ddof: usize) -> Self {
451        self.operations.push(StatOperation::Variance(ddof));
452        self
453    }
454
455    /// Add quantile computation
456    pub fn quantile(mut self, q: f64) -> Self {
457        self.operations.push(StatOperation::Quantile(q));
458        self
459    }
460
461    /// Execute all operations efficiently
462    pub fn compute(&self) -> StatsResult<Vec<F>> {
463        let mut results = Vec::new();
464        let data = &*self.data_ref;
465
466        // Check which operations we need
467        let need_mean = self
468            .operations
469            .iter()
470            .any(|op| matches!(op, StatOperation::Mean | StatOperation::Variance(_)));
471        let need_sorted = self
472            .operations
473            .iter()
474            .any(|op| matches!(op, StatOperation::Quantile(_)));
475
476        // Compute shared values
477        let mean = if need_mean {
478            Some(data.iter().fold(F::zero(), |acc, &x| acc + x) / F::from(data.len()).unwrap())
479        } else {
480            None
481        };
482
483        let sorteddata = if need_sorted {
484            let mut sorted = data.clone();
485            sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
486            Some(sorted)
487        } else {
488            None
489        };
490
491        // Execute operations
492        for op in &self.operations {
493            match op {
494                StatOperation::Mean => {
495                    results.push(mean.unwrap());
496                }
497                StatOperation::Variance(ddof) => {
498                    let m = mean.unwrap();
499                    let var = data
500                        .iter()
501                        .map(|&x| {
502                            let diff = x - m;
503                            diff * diff
504                        })
505                        .sum::<F>()
506                        / F::from(data.len() - ddof).unwrap();
507                    results.push(var);
508                }
509                StatOperation::Quantile(q) => {
510                    let sorted = sorteddata.as_ref().unwrap();
511                    let pos = *q * (sorted.len() - 1) as f64;
512                    let idx = pos.floor() as usize;
513                    let frac = pos - pos.floor();
514
515                    let result = if frac == 0.0 {
516                        sorted[idx]
517                    } else {
518                        let lower = sorted[idx];
519                        let upper = sorted[idx + 1];
520                        lower + F::from(frac).unwrap() * (upper - lower)
521                    };
522                    results.push(result);
523                }
524                StatOperation::StandardScaling => {
525                    // This would require returning transformed data
526                    // For now, just return a placeholder
527                    results.push(F::one());
528                }
529            }
530        }
531
532        Ok(results)
533    }
534}
535
536/// Memory usage tracker for profiling
537pub struct MemoryTracker {
538    current_usage: usize,
539    peak_usage: usize,
540    allocations: usize,
541    deallocations: usize,
542}
543
544impl Default for MemoryTracker {
545    fn default() -> Self {
546        Self::new()
547    }
548}
549
550impl MemoryTracker {
551    /// Create a new memory tracker
552    pub fn new() -> Self {
553        Self {
554            current_usage: 0,
555            peak_usage: 0,
556            allocations: 0,
557            deallocations: 0,
558        }
559    }
560
561    /// Record an allocation
562    pub fn record_allocation(&mut self, bytes: usize) {
563        self.current_usage += bytes;
564        self.peak_usage = self.peak_usage.max(self.current_usage);
565        self.allocations += 1;
566    }
567
568    /// Record a deallocation
569    pub fn record_deallocation(&mut self, bytes: usize) {
570        self.current_usage = self.current_usage.saturating_sub(bytes);
571        self.deallocations += 1;
572    }
573
574    /// Get memory profile
575    pub fn get_profile(&self) -> MemoryProfile {
576        let efficiency_score = if self.peak_usage > 0 {
577            1.0 - (self.current_usage as f64 / self.peak_usage as f64)
578        } else {
579            1.0
580        };
581
582        MemoryProfile {
583            peak_memory: self.peak_usage,
584            avg_memory: (self.peak_usage + self.current_usage) / 2,
585            allocations: self.allocations,
586            deallocations: self.deallocations,
587            efficiency_score,
588        }
589    }
590}
591
592/// Cache-friendly matrix operations
593pub mod cache_friendly {
594    use super::*;
595
596    /// Tiled matrix multiplication for better cache usage
597    pub fn tiled_matrix_operation<F, D1, D2, Op>(
598        a: &ArrayBase<D1, Ix2>,
599        b: &ArrayBase<D2, Ix2>,
600        tilesize: usize,
601        operation: Op,
602    ) -> StatsResult<Array2<F>>
603    where
604        F: Float + NumCast,
605        D1: Data<Elem = F>,
606        D2: Data<Elem = F>,
607        Op: Fn(ArrayView2<F>, ArrayView2<F>) -> StatsResult<Array2<F>>,
608    {
609        let (m, k1) = a.dim();
610        let (k2, n) = b.dim();
611
612        if k1 != k2 {
613            return Err(StatsError::dimension_mismatch(
614                "Matrix dimensions incompatible",
615            ));
616        }
617
618        let mut result = Array2::zeros((m, n));
619
620        // Process in tiles for better cache locality
621        for i in (0..m).step_by(tilesize) {
622            for j in (0..n).step_by(tilesize) {
623                for k in (0..k1).step_by(tilesize) {
624                    let i_end = (i + tilesize).min(m);
625                    let j_end = (j + tilesize).min(n);
626                    let k_end = (k + tilesize).min(k1);
627
628                    let a_tile = a.slice(s![i..i_end, k..k_end]);
629                    let b_tile = b.slice(s![k..k_end, j..j_end]);
630
631                    let tile_result = operation(a_tile, b_tile)?;
632
633                    // Add to result
634                    let mut result_tile = result.slice_mut(s![i..i_end, j..j_end]);
635                    result_tile.zip_mut_with(&tile_result, |r, &t| *r = *r + t);
636                }
637            }
638        }
639
640        Ok(result)
641    }
642}
643
644#[cfg(test)]
645mod tests {
646    use super::*;
647    use approx::assert_relative_eq;
648    use scirs2_core::ndarray::array;
649
650    #[test]
651    fn test_memory_adaptive_algorithm() {
652        let adapter = MemoryAdaptiveAlgorithm::new();
653
654        // Test algorithm selection
655        match adapter.recommend_algorithm::<f64>(100) {
656            AlgorithmChoice::Direct => (), // Small data
657            _ => panic!("Expected Direct algorithm for small data"),
658        }
659
660        // Use a much larger data size that will definitely exceed available memory
661        // Force streaming by using a size that requires more than available memory
662        let hugedatasize = adapter.available_memory / 4; // This will definitely trigger streaming
663        match adapter.recommend_algorithm::<f64>(hugedatasize) {
664            AlgorithmChoice::Streaming(_) => (), // Large data
665            other => panic!(
666                "Expected Streaming algorithm for large data, got {:?}",
667                other
668            ),
669        }
670    }
671
672    #[test]
673    fn test_ring_buffer_stats() {
674        let mut buffer = RingBufferStats::<f64>::new(5);
675
676        // Add values
677        for i in 1..=5 {
678            buffer.push(i as f64);
679        }
680
681        assert_relative_eq!(buffer.mean(), 3.0, epsilon = 1e-10);
682
683        // Add more values (should evict oldest)
684        buffer.push(6.0);
685        assert_relative_eq!(buffer.mean(), 4.0, epsilon = 1e-10); // (2+3+4+5+6)/5
686    }
687
688    #[test]
689    fn test_lazy_computation() {
690        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
691        let lazy = LazyStatComputation::new(data)
692            .mean()
693            .variance(1)
694            .quantile(0.5);
695
696        let results = lazy.compute().unwrap();
697        assert_eq!(results.len(), 3);
698        assert_relative_eq!(results[0], 3.0, epsilon = 1e-10); // mean
699        assert_relative_eq!(results[1], 2.5, epsilon = 1e-10); // variance
700        assert_relative_eq!(results[2], 3.0, epsilon = 1e-10); // median
701    }
702
703    #[test]
704    fn test_zero_copy_rolling() {
705        let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
706        let results =
707            zero_copy::rolling_stats_zerocopy(&data.view(), 3, |window| Ok(window.mean().unwrap()))
708                .unwrap();
709
710        assert_eq!(results.len(), 3);
711        assert_relative_eq!(results[0], 2.0, epsilon = 1e-10);
712        assert_relative_eq!(results[1], 3.0, epsilon = 1e-10);
713        assert_relative_eq!(results[2], 4.0, epsilon = 1e-10);
714    }
715}