use crate::error::{StatsError, StatsResult};
use scirs2_core::ndarray::{ArrayBase, Data, Ix1};
use scirs2_core::numeric::{Float, NumCast};
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
pub struct MemoryProfiler {
allocations: Arc<Mutex<HashMap<String, AllocationStats>>>,
peak_memory: Arc<Mutex<usize>>,
current_memory: Arc<Mutex<usize>>,
enabled: bool,
}
#[derive(Debug, Clone, Default)]
pub struct AllocationStats {
pub total_allocations: usize,
pub total_bytes: usize,
pub peak_bytes: usize,
pub averagesize: f64,
pub allocation_times: Vec<Duration>,
}
impl Default for MemoryProfiler {
fn default() -> Self {
Self::new()
}
}
impl MemoryProfiler {
pub fn new() -> Self {
Self {
allocations: Arc::new(Mutex::new(HashMap::new())),
peak_memory: Arc::new(Mutex::new(0)),
current_memory: Arc::new(Mutex::new(0)),
enabled: true,
}
}
pub fn record_allocation(&self, category: &str, size: usize, duration: Duration) {
if !self.enabled {
return;
}
let mut allocations = self.allocations.lock().expect("Operation failed");
let stats = allocations.entry(category.to_string()).or_default();
stats.total_allocations += 1;
stats.total_bytes += size;
stats.peak_bytes = stats.peak_bytes.max(size);
stats.averagesize = stats.total_bytes as f64 / stats.total_allocations as f64;
stats.allocation_times.push(duration);
let mut current = self.current_memory.lock().expect("Operation failed");
*current += size;
let mut peak = self.peak_memory.lock().expect("Operation failed");
*peak = (*peak).max(*current);
}
pub fn record_deallocation(&self, size: usize) {
if !self.enabled {
return;
}
let mut current = self.current_memory.lock().expect("Operation failed");
*current = current.saturating_sub(size);
}
pub fn get_report(&self) -> MemoryReport {
let allocations = self.allocations.lock().expect("Operation failed").clone();
let peak_memory = *self.peak_memory.lock().expect("Operation failed");
let current_memory = *self.current_memory.lock().expect("Operation failed");
let recommendations = self.generate_recommendations(&allocations);
MemoryReport {
allocations,
peak_memory,
current_memory,
recommendations,
}
}
fn generate_recommendations(
&self,
allocations: &HashMap<String, AllocationStats>,
) -> Vec<String> {
let mut recommendations = Vec::new();
for (category, stats) in allocations {
if stats.total_allocations > 1000 && stats.averagesize < 1024.0 {
recommendations.push(format!(
"Consider memory pooling for '{}' category (many small allocations: {} allocations, avg size: {:.1} bytes)",
category, stats.total_allocations, stats.averagesize
));
}
if stats.peak_bytes > 10 * 1024 * 1024 {
recommendations.push(format!(
"Consider streaming processing for '{}' category (large allocation: {:.1} MB)",
category,
stats.peak_bytes as f64 / 1024.0 / 1024.0
));
}
if let Some(&max_time) = stats.allocation_times.iter().max() {
if max_time > Duration::from_millis(10) {
recommendations.push(format!(
"Consider pre-allocation for '{}' category (slow allocation: {:?})",
category, max_time
));
}
}
}
recommendations
}
pub fn set_enabled(&mut self, enabled: bool) {
self.enabled = enabled;
}
pub fn reset(&self) {
self.allocations.lock().expect("Operation failed").clear();
*self.peak_memory.lock().expect("Operation failed") = 0;
*self.current_memory.lock().expect("Operation failed") = 0;
}
}
#[derive(Debug)]
pub struct MemoryReport {
pub allocations: HashMap<String, AllocationStats>,
pub peak_memory: usize,
pub current_memory: usize,
pub recommendations: Vec<String>,
}
impl MemoryReport {
pub fn print_report(&self) {
println!("=== Memory Usage Report ===");
println!(
"Peak Memory Usage: {:.2} MB",
self.peak_memory as f64 / 1024.0 / 1024.0
);
println!(
"Current Memory Usage: {:.2} MB",
self.current_memory as f64 / 1024.0 / 1024.0
);
println!();
println!("Allocation Statistics by Category:");
for (category, stats) in &self.allocations {
println!(" {}:", category);
println!(" Total Allocations: {}", stats.total_allocations);
println!(
" Total Bytes: {:.2} MB",
stats.total_bytes as f64 / 1024.0 / 1024.0
);
println!(
" Peak Allocation: {:.2} KB",
stats.peak_bytes as f64 / 1024.0
);
println!(" Average Size: {:.1} bytes", stats.averagesize);
if !stats.allocation_times.is_empty() {
let avg_time = stats.allocation_times.iter().sum::<Duration>().as_micros() as f64
/ stats.allocation_times.len() as f64;
println!(" Average Allocation Time: {:.1} µs", avg_time);
}
println!();
}
if !self.recommendations.is_empty() {
println!("Optimization Recommendations:");
for (i, rec) in self.recommendations.iter().enumerate() {
println!(" {}. {}", i + 1, rec);
}
}
}
}
pub struct StatisticsCache<F> {
cache: HashMap<String, CachedResult<F>>,
max_entries: usize,
max_memory: usize,
current_memory: usize,
profiler: Option<Arc<MemoryProfiler>>,
}
#[derive(Clone)]
struct CachedResult<F> {
value: F,
timestamp: Instant,
memorysize: usize,
access_count: usize,
}
impl<F: Float + Clone + std::fmt::Display> StatisticsCache<F> {
pub fn new(_max_entries: usize, maxmemory: usize) -> Self {
Self {
cache: HashMap::new(),
max_entries: _max_entries,
max_memory: maxmemory,
current_memory: 0,
profiler: None,
}
}
pub fn with_profiler(mut self, profiler: Arc<MemoryProfiler>) -> Self {
self.profiler = Some(profiler);
self
}
pub fn put(&mut self, key: String, value: F) {
let memorysize = std::mem::size_of::<F>() + key.len();
self.maybe_evict(memorysize);
let cached_result = CachedResult {
value,
timestamp: Instant::now(),
memorysize,
access_count: 0,
};
if let Some(old_result) = self.cache.insert(key.clone(), cached_result) {
self.current_memory -= old_result.memorysize;
}
self.current_memory += memorysize;
if let Some(profiler) = &self.profiler {
profiler.record_allocation("statistics_cache", memorysize, Duration::from_nanos(0));
}
}
pub fn get(&mut self, key: &str) -> Option<F> {
if let Some(entry) = self.cache.get_mut(key) {
entry.access_count += 1;
Some(entry.value)
} else {
None
}
}
fn maybe_evict(&mut self, neededsize: usize) {
while self.current_memory + neededsize > self.max_memory && !self.cache.is_empty() {
self.evict_lru();
}
while self.cache.len() >= self.max_entries && !self.cache.is_empty() {
self.evict_lru();
}
}
fn evict_lru(&mut self) {
if let Some((key_to_remove, entry_to_remove)) = self
.cache
.iter()
.min_by_key(|(_, entry)| (entry.access_count, entry.timestamp))
.map(|(k, v)| (k.clone(), v.clone()))
{
self.cache.remove(&key_to_remove);
self.current_memory -= entry_to_remove.memorysize;
if let Some(profiler) = &self.profiler {
profiler.record_deallocation(entry_to_remove.memorysize);
}
}
}
pub fn get_stats(&self) -> CacheStats {
CacheStats {
entries: self.cache.len(),
memory_usage: self.current_memory,
hit_rate: self.calculate_hit_rate(),
}
}
fn calculate_hit_rate(&self) -> f64 {
let total_accesses: usize = self.cache.values().map(|entry| entry.access_count).sum();
if total_accesses == 0 {
0.0
} else {
total_accesses as f64 / (total_accesses + self.cache.len()) as f64
}
}
pub fn clear(&mut self) {
if let Some(profiler) = &self.profiler {
for entry in self.cache.values() {
profiler.record_deallocation(entry.memorysize);
}
}
self.cache.clear();
self.current_memory = 0;
}
}
#[derive(Debug)]
pub struct CacheStats {
pub entries: usize,
pub memory_usage: usize,
pub hit_rate: f64,
}
pub struct AdaptiveMemoryManager {
memory_threshold_low: usize,
memory_threshold_high: usize,
profiler: Arc<MemoryProfiler>,
}
impl AdaptiveMemoryManager {
pub fn new(profiler: Arc<MemoryProfiler>) -> Self {
Self {
memory_threshold_low: 100 * 1024 * 1024, memory_threshold_high: 1024 * 1024 * 1024, profiler,
}
}
pub fn choose_algorithm(&self, datasize: usize) -> AlgorithmChoice {
let current_memory = *self
.profiler
.current_memory
.lock()
.expect("Operation failed");
if current_memory > self.memory_threshold_high {
if datasize > 1_000_000 {
AlgorithmChoice::Streaming
} else {
AlgorithmChoice::InPlace
}
} else if current_memory > self.memory_threshold_low {
if datasize > 100_000 {
AlgorithmChoice::Chunked
} else {
AlgorithmChoice::Standard
}
} else {
if datasize > 10_000 {
AlgorithmChoice::Parallel
} else {
AlgorithmChoice::Standard
}
}
}
pub fn suggest_chunksize(&self, datasize: usize, elementsize: usize) -> usize {
let current_memory = *self
.profiler
.current_memory
.lock()
.expect("Operation failed");
let available_memory = self.memory_threshold_high.saturating_sub(current_memory);
let max_chunk_memory = available_memory / 10;
let max_chunk_elements = max_chunk_memory / elementsize;
max_chunk_elements.clamp(1000, datasize / 4)
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum AlgorithmChoice {
Standard, InPlace, Chunked, Streaming, Parallel, }
pub struct ProfiledStatistics<F> {
profiler: Arc<MemoryProfiler>,
cache: StatisticsCache<F>,
adaptive_manager: AdaptiveMemoryManager,
}
impl<F> ProfiledStatistics<F>
where
F: Float + NumCast + Clone + Send + Sync + std::fmt::Display,
{
pub fn new(profiler: Arc<MemoryProfiler>) -> Self {
let cache = StatisticsCache::new(1000, 50 * 1024 * 1024) .with_profiler(profiler.clone());
let adaptive_manager = AdaptiveMemoryManager::new(profiler.clone());
Self {
profiler: profiler.clone(),
cache,
adaptive_manager,
}
}
pub fn mean_profiled<D>(&mut self, data: &ArrayBase<D, Ix1>) -> StatsResult<F>
where
D: Data<Elem = F>,
{
let start_time = Instant::now();
let cache_key = format!("mean_{}", data.len());
if let Some(cached_result) = self.cache.get(&cache_key) {
return Ok(cached_result);
}
let algorithm = self.adaptive_manager.choose_algorithm(data.len());
let result = match algorithm {
AlgorithmChoice::Streaming => self.compute_mean_streaming(data),
AlgorithmChoice::Chunked => self.compute_mean_chunked(data),
_ => self.compute_mean_standard(data),
}?;
let duration = start_time.elapsed();
self.profiler.record_allocation(
"mean_computation",
data.len() * std::mem::size_of::<F>(),
duration,
);
self.cache.put(cache_key, result);
Ok(result)
}
fn compute_mean_streaming<D>(&self, data: &ArrayBase<D, Ix1>) -> StatsResult<F>
where
D: Data<Elem = F>,
{
let mut sum = F::zero();
let mut count = 0;
for &value in data.iter() {
sum = sum + value;
count += 1;
}
if count == 0 {
return Err(StatsError::invalid_argument(
"Cannot compute mean of empty array",
));
}
Ok(sum / F::from(count).expect("Failed to convert to float"))
}
fn compute_mean_chunked<D>(&self, data: &ArrayBase<D, Ix1>) -> StatsResult<F>
where
D: Data<Elem = F>,
{
let chunksize = self
.adaptive_manager
.suggest_chunksize(data.len(), std::mem::size_of::<F>());
let mut total_sum = F::zero();
let mut total_count = 0;
for chunk_start in (0..data.len()).step_by(chunksize) {
let chunk_end = (chunk_start + chunksize).min(data.len());
let chunk = data.slice(scirs2_core::ndarray::s![chunk_start..chunk_end]);
let chunk_sum = chunk.iter().fold(F::zero(), |acc, &x| acc + x);
total_sum = total_sum + chunk_sum;
total_count += chunk.len();
}
if total_count == 0 {
return Err(StatsError::invalid_argument(
"Cannot compute mean of empty array",
));
}
Ok(total_sum / F::from(total_count).expect("Failed to convert to float"))
}
fn compute_mean_standard<D>(&self, data: &ArrayBase<D, Ix1>) -> StatsResult<F>
where
D: Data<Elem = F>,
{
let sum = data.iter().fold(F::zero(), |acc, &x| acc + x);
let count = data.len();
if count == 0 {
return Err(StatsError::invalid_argument(
"Cannot compute mean of empty array",
));
}
Ok(sum / F::from(count).expect("Failed to convert to float"))
}
pub fn get_memory_report(&self) -> MemoryReport {
self.profiler.get_report()
}
pub fn get_cache_stats(&self) -> CacheStats {
self.cache.get_stats()
}
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_memory_profiler() {
let profiler = MemoryProfiler::new();
profiler.record_allocation("test", 1024, Duration::from_millis(5));
profiler.record_allocation("test", 2048, Duration::from_millis(10));
let report = profiler.get_report();
assert_eq!(report.allocations["test"].total_allocations, 2);
assert_eq!(report.allocations["test"].total_bytes, 3072);
assert_eq!(report.allocations["test"].peak_bytes, 2048);
}
#[test]
fn test_statistics_cache() {
let mut cache = StatisticsCache::new(2, 1024);
cache.put("key1".to_string(), 42.0);
cache.put("key2".to_string(), 24.0);
assert_eq!(cache.get("key1"), Some(42.0));
assert_eq!(cache.get("key2"), Some(24.0));
assert_eq!(cache.get("key3"), None);
cache.put("key3".to_string(), 12.0);
assert_eq!(cache.cache.len(), 2); }
#[test]
fn test_profiled_statistics() {
let profiler = Arc::new(MemoryProfiler::new());
let mut stats = ProfiledStatistics::new(profiler.clone());
let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
let mean = stats.mean_profiled(&data.view()).expect("Operation failed");
assert_relative_eq!(mean, 3.0, epsilon = 1e-10);
let mean2 = stats.mean_profiled(&data.view()).expect("Operation failed");
assert_relative_eq!(mean2, 3.0, epsilon = 1e-10);
let report = stats.get_memory_report();
assert!(!report.allocations.is_empty());
}
#[test]
fn test_adaptive_memory_manager() {
let profiler = Arc::new(MemoryProfiler::new());
let manager = AdaptiveMemoryManager::new(profiler);
let choice_small = manager.choose_algorithm(1000);
let choice_large = manager.choose_algorithm(1_000_000);
assert_ne!(choice_small, choice_large);
let chunksize = manager.suggest_chunksize(100_000, 8);
assert!(chunksize > 0);
assert!(chunksize <= 25_000); }
}