scirs2-metrics 0.3.2

//! Memory-efficient metrics computation
//!
//! This module provides utilities for computing metrics with minimal memory usage,
//! which is particularly useful for large datasets.

use scirs2_core::ndarray::{ArrayBase, Data, Ix1};
use std::marker::PhantomData;

use super::parallel::ParallelConfig;
use crate::error::{MetricsError, Result};

/// Trait for streaming computation of metrics
///
/// This trait allows metrics to be computed incrementally without
/// loading the entire dataset into memory at once.
pub trait StreamingMetric<T> {
    /// Type for intermediate state
    type State;

    /// Initialize the state
    fn init_state(&self) -> Self::State;

    /// Update the state with a new batch of data
    fn update_state(
        &self,
        state: &mut Self::State,
        batch_true: &[T],
        batch_pred: &[T],
    ) -> Result<()>;

    /// Compute the final metric from the state
    fn finalize(&self, state: &Self::State) -> Result<f64>;
}

/// Chunked metrics computation for memory efficiency
///
/// This struct provides methods for computing metrics on large datasets
/// by processing the data in manageable chunks.
#[derive(Debug, Clone)]
pub struct ChunkedMetrics {
    /// Size of each data chunk
    pub chunk_size: usize,
    /// Configuration for parallel processing
    pub parallel_config: ParallelConfig,
}

impl Default for ChunkedMetrics {
    fn default() -> Self {
        ChunkedMetrics {
            chunk_size: 10000,
            parallel_config: ParallelConfig::default(),
        }
    }
}

impl ChunkedMetrics {
    /// Create a new ChunkedMetrics with default settings
    pub fn new() -> Self {
        Default::default()
    }

    /// Set the chunk size
    pub fn with_chunk_size(mut self, size: usize) -> Self {
        self.chunk_size = size;
        self
    }

    /// Set the parallel configuration
    pub fn with_parallel_config(mut self, config: ParallelConfig) -> Self {
        self.parallel_config = config;
        self
    }

    /// Compute a streaming metric on large arrays
    ///
    /// # Arguments
    ///
    /// * `y_true` - True labels or values
    /// * `y_pred` - Predicted labels or values
    /// * `metric` - The streaming metric to compute
    ///
    /// # Returns
    ///
    /// * The computed metric value
    ///
    /// # Examples
    ///
    /// ```
    /// use scirs2_core::ndarray::Array1;
    /// use scirs2_metrics::optimization::memory::{ChunkedMetrics, StreamingMetric};
    /// use scirs2_metrics::error::Result;
    ///
    /// // Example streaming implementation of mean absolute error
    /// struct StreamingMAE;
    ///
    /// impl StreamingMetric<f64> for StreamingMAE {
    ///     type State = (f64, usize); // (sum_of_absolute_errors, count)
    ///
    ///     fn init_state(&self) -> Self::State {
    ///         (0.0, 0)
    ///     }
    ///
    ///     fn update_state(&self, state: &mut Self::State, batch_true: &[f64], batch_pred: &[f64]) -> Result<()> {
    ///         for (y_t, y_p) in batch_true.iter().zip(batch_pred.iter()) {
    ///             state.0 += (y_t - y_p).abs();
    ///             state.1 += 1;
    ///         }
    ///         Ok(())
    ///     }
    ///
    ///     fn finalize(&self, state: &Self::State) -> Result<f64> {
    ///         if state.1 == 0 {
    ///             return Err(scirs2_metrics::error::MetricsError::DivisionByZero);
    ///         }
    ///         Ok(state.0 / state.1 as f64)
    ///     }
    /// }
    ///
    /// // Generate some example data
    /// let y_true = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
    /// let y_pred = Array1::from_vec(vec![1.2, 2.3, 2.9, 4.1, 5.2]);
    ///
    /// // Compute MAE using chunked processing
    /// let chunked = ChunkedMetrics::new().with_chunk_size(2);
    /// let mae = chunked.compute_streaming(&y_true, &y_pred, &StreamingMAE).expect("Operation failed");
    ///
    /// // The actual calculations in the streaming implementation may have
    /// // different numeric precision due to chunking, so we just verify
    /// // that we get a reasonable result instead of an exact value.
    /// assert!(mae >= 0.1 && mae <= 0.5);
    /// ```
    pub fn compute_streaming<T, S1, S2, M>(
        &self,
        y_true: &ArrayBase<S1, Ix1>,
        y_pred: &ArrayBase<S2, Ix1>,
        metric: &M,
    ) -> Result<f64>
    where
        T: Clone,
        S1: Data<Elem = T>,
        S2: Data<Elem = T>,
        M: StreamingMetric<T>,
    {
        // Check dimensions
        if y_true.len() != y_pred.len() {
            return Err(MetricsError::DimensionMismatch(format!(
                "y_true and y_pred must have the same length, got {} and {}",
                y_true.len(),
                y_pred.len()
            )));
        }

        // Convert arrays to vectors for easier chunking
        let y_true_vec: Vec<T> = y_true.iter().cloned().collect();
        let y_pred_vec: Vec<T> = y_pred.iter().cloned().collect();

        // Initialize state
        let mut state = metric.init_state();

        // Process data in chunks
        for chunk_idx in 0..y_true_vec.len().div_ceil(self.chunk_size) {
            let start = chunk_idx * self.chunk_size;
            let end = (start + self.chunk_size).min(y_true_vec.len());

            metric.update_state(&mut state, &y_true_vec[start..end], &y_pred_vec[start..end])?;
        }

        // Finalize and return the result
        metric.finalize(&state)
    }

    /// Compute metrics on large 2D arrays with row-wise operations
    ///
    /// This method processes a large 2D array in chunks of rows to reduce memory usage.
    ///
    /// # Arguments
    ///
    /// * `data` - Input 1D array
    /// * `row_op` - Operation to perform on each chunk of data
    /// * `combine` - Function to combine results from all chunks
    ///
    /// # Returns
    ///
    /// * The computed result
    pub fn compute_rowwise<T, R>(
        &self,
        data: &[T],
        row_op: impl Fn(&[T]) -> Result<R>,
        combine: impl Fn(&[R]) -> Result<R>,
    ) -> Result<R>
    where
        T: Clone,
        R: Clone,
    {
        if data.len() <= self.chunk_size {
            // If data fits in a single chunk, just process it directly
            return row_op(data);
        }

        // Process chunks
        let mut results = Vec::new();

        for chunk_idx in 0..data.len().div_ceil(self.chunk_size) {
            let start = chunk_idx * self.chunk_size;
            let end = (start + self.chunk_size).min(data.len());

            let result = row_op(&data[start..end])?;
            results.push(result);
        }

        // Combine results from all chunks
        combine(&results)
    }
}

/// On-the-fly computation of incremental metrics
///
/// This struct provides utilities for incrementally updating metrics as new data arrives,
/// without storing the entire dataset.
#[derive(Debug, Clone)]
pub struct IncrementalMetrics<T, S> {
    /// Current state of the metric
    state: S,
    /// Number of samples processed
    count: usize,
    /// Marker for element type
    _marker: PhantomData<T>,
}

impl<T, S> Default for IncrementalMetrics<T, S>
where
    S: Default,
{
    fn default() -> Self {
        Self::new()
    }
}

impl<T, S> IncrementalMetrics<T, S>
where
    S: Default,
{
    /// Create a new IncrementalMetrics with default state
    pub fn new() -> Self {
        IncrementalMetrics {
            state: S::default(),
            count: 0,
            _marker: PhantomData,
        }
    }

    /// Create a new IncrementalMetrics with the given state
    pub fn with_state(state: S) -> Self {
        IncrementalMetrics {
            state,
            count: 0,
            _marker: PhantomData,
        }
    }

    /// Get the current state
    pub fn state(&self) -> &S {
        &self.state
    }

    /// Get the number of samples processed
    pub fn count(&self) -> usize {
        self.count
    }

    /// Update the state with a single sample
    ///
    /// # Arguments
    ///
    /// * `y_true` - True value
    /// * `y_pred` - Predicted value
    /// * `update_fn` - Function to update the state
    ///
    /// # Returns
    ///
    /// * Result indicating success or error
    pub fn update<F>(&mut self, y_true: T, y_pred: T, updatefn: F) -> Result<()>
    where
        F: FnOnce(&mut S, T, T) -> Result<()>,
    {
        updatefn(&mut self.state, y_true, y_pred)?;
        self.count += 1;
        Ok(())
    }

    /// Update the state with a batch of samples
    ///
    /// # Arguments
    ///
    /// * `y_true` - True values
    /// * `y_pred` - Predicted values
    /// * `update_fn` - Function to update the state
    ///
    /// # Returns
    ///
    /// * Result indicating success or error
    pub fn update_batch<F>(&mut self, y_true: &[T], y_pred: &[T], updatefn: F) -> Result<()>
    where
        F: Fn(&mut S, &[T], &[T]) -> Result<()>,
    {
        if y_true.len() != y_pred.len() {
            return Err(MetricsError::DimensionMismatch(
                "y_true and y_pred must have the same length".to_string(),
            ));
        }

        updatefn(&mut self.state, y_true, y_pred)?;
        self.count += y_true.len();
        Ok(())
    }

    /// Compute the final metric from the current state
    ///
    /// # Arguments
    ///
    /// * `finalize_fn` - Function to compute the final metric
    ///
    /// # Returns
    ///
    /// * The computed metric
    pub fn finalize<F, R>(&self, finalizefn: F) -> Result<R>
    where
        F: FnOnce(&S, usize) -> Result<R>,
    {
        finalizefn(&self.state, self.count)
    }
}

/// Trait for memory-mapped metrics computation
///
/// This trait allows metrics to be computed on very large datasets that don't fit in memory
/// by processing them in a streaming fashion.
pub trait MemoryMappedMetric<T> {
    /// Type for intermediate state
    type State;

    /// Initialize the state
    fn init_state(&self) -> Self::State;

    /// Process a chunk of data
    fn process_chunk(&self, state: &mut Self::State, chunkidx: usize, chunk: &[T]) -> Result<()>;

    /// Finalize the computation
    fn finalize(&self, state: &Self::State) -> Result<f64>;
}

use crossbeam_utils::CachePadded;
use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2};
use scirs2_core::numeric::Float;
/// Zero-copy memory operations and custom allocators for high-performance metrics computation
///
/// This module provides advanced memory management techniques including zero-copy operations,
/// custom allocators, memory pooling, and SIMD-aligned allocations for optimal performance.
use std::alloc::{alloc, dealloc, GlobalAlloc, Layout, System};
use std::cell::UnsafeCell;
use std::collections::HashMap;
use std::mem::{align_of, size_of, MaybeUninit};
use std::ptr::{addr_of_mut, NonNull};
use std::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
use std::sync::{Arc, Mutex, RwLock};

/// Zero-copy memory manager for metrics computation
#[derive(Debug)]
pub struct ZeroCopyMemoryManager {
    /// Memory pools organized by size classes
    memory_pools: HashMap<usize, MemoryPool>,
    /// SIMD-aligned allocator
    simd_allocator: SimdAlignedAllocator,
    /// Arena allocator for temporary allocations
    arena_allocator: ArenaAllocator,
    /// Memory mapping manager
    mmap_manager: MemoryMappingManager,
    /// Lock-free memory recycler
    recycler: LockFreeRecycler,
    /// Memory usage statistics
    stats: MemoryStats,
}

/// Custom memory pool for efficient allocation and deallocation
#[derive(Debug)]
pub struct MemoryPool {
    /// Size of each block in this pool
    block_size: usize,
    /// Alignment requirement
    alignment: usize,
    /// Stack of available blocks
    free_blocks: Arc<Mutex<Vec<NonNull<u8>>>>,
    /// Total allocated capacity
    capacity: AtomicUsize,
    /// Number of allocated blocks
    allocated_count: AtomicUsize,
    /// Pool-specific statistics
    pool_stats: PoolStatistics,
}

/// SIMD-aligned allocator for vectorized operations
#[derive(Debug)]
pub struct SimdAlignedAllocator {
    /// Cache for different SIMD alignment requirements
    alignment_cache: HashMap<usize, Vec<NonNull<u8>>>,
    /// Statistics for SIMD allocations
    simd_stats: SimdStats,
}

/// Arena allocator for temporary memory management
#[derive(Debug)]
pub struct ArenaAllocator {
    /// Current arena
    current_arena: Arc<Mutex<Arena>>,
    /// List of all arenas
    arenas: Vec<Arc<Mutex<Arena>>>,
    /// Default arena size
    _default_arenasize: usize,
    /// Arena statistics
    arena_stats: ArenaStats,
}

/// Individual memory arena
#[derive(Debug)]
pub struct Arena {
    /// Raw memory block
    memory: NonNull<u8>,
    /// Size of the arena
    size: usize,
    /// Current allocation offset
    offset: usize,
    /// Alignment padding
    alignment: usize,
}

/// Memory mapping manager for large datasets
#[derive(Debug)]
pub struct MemoryMappingManager {
    /// Active memory mappings
    mappings: HashMap<String, MemoryMapping>,
    /// Mapping statistics
    mapping_stats: MappingStats,
}

/// Memory mapping entry
#[derive(Debug)]
pub struct MemoryMapping {
    /// File descriptor or handle
    file_handle: i32,
    /// Mapped memory region
    memory_region: NonNull<u8>,
    /// Size of the mapping
    size: usize,
    /// Access mode (read/write/execute)
    access_mode: AccessMode,
    /// Reference count
    ref_count: AtomicUsize,
}

/// Access modes for memory mappings
#[derive(Debug, Clone, Copy)]
pub enum AccessMode {
    ReadOnly,
    ReadWrite,
    WriteOnly,
    Execute,
}

/// Lock-free memory recycler using hazard pointers
#[derive(Debug)]
pub struct LockFreeRecycler {
    /// Free list heads for different size classes
    free_lists: Vec<AtomicPtr<RecyclerNode>>,
    /// Hazard pointer array
    hazard_pointers: Vec<AtomicPtr<RecyclerNode>>,
    /// Retired nodes pending reclamation
    retired_nodes: CachePadded<Mutex<Vec<*mut RecyclerNode>>>,
    /// Recycler statistics
    recycler_stats: RecyclerStats,
}

/// Node in the lock-free recycler
#[repr(align(64))] // Cache line alignment
#[derive(Debug)]
pub struct RecyclerNode {
    /// Next node in the free list
    next: AtomicPtr<RecyclerNode>,
    /// Size of the memory block
    size: usize,
    /// Memory block data
    data: NonNull<u8>,
    /// Timestamp for LRU eviction
    timestamp: AtomicUsize,
}

/// Zero-copy array view wrapper
#[derive(Debug)]
pub struct ZeroCopyArrayView<'a, T> {
    /// Underlying data pointer
    data: NonNull<T>,
    /// Length of the array
    len: usize,
    /// Lifetime marker
    _lifetime: std::marker::PhantomData<&'a T>,
    /// Memory manager reference
    memory_manager: &'a ZeroCopyMemoryManager,
}

/// Zero-copy mutable array view wrapper
#[derive(Debug)]
pub struct ZeroCopyArrayViewMut<'a, T> {
    /// Underlying data pointer
    data: NonNull<T>,
    /// Length of the array
    len: usize,
    /// Lifetime marker
    _lifetime: std::marker::PhantomData<&'a mut T>,
    /// Memory manager reference
    memory_manager: &'a ZeroCopyMemoryManager,
}

/// Memory buffer for zero-copy operations
pub struct ZeroCopyBuffer<T> {
    /// Raw data pointer
    data: NonNull<T>,
    /// Buffer capacity
    capacity: usize,
    /// Current length
    length: usize,
    /// Memory layout
    layout: Layout,
    /// Reference to allocator
    allocator: Arc<dyn CustomAllocator>,
}

/// Custom allocator trait for different allocation strategies
pub trait CustomAllocator: Send + Sync + std::fmt::Debug {
    /// Allocate memory with specific alignment
    fn allocate(&self, size: usize, alignment: usize) -> Result<NonNull<u8>>;

    /// Deallocate previously allocated memory
    fn deallocate(&self, ptr: NonNull<u8>, size: usize, alignment: usize);

    /// Reallocate memory to a new size
    fn reallocate(
        &self,
        ptr: NonNull<u8>,
        old_size: usize,
        newsize: usize,
        alignment: usize,
    ) -> Result<NonNull<u8>>;

    /// Get allocator statistics
    fn get_stats(&self) -> AllocatorStats;

    /// Reset allocator state
    fn reset(&self);
}

/// Thread-local memory allocator for high-performance scenarios
pub struct ThreadLocalAllocator {
    /// Thread-local memory pools
    local_pools: std::thread::LocalKey<UnsafeCell<HashMap<usize, Vec<NonNull<u8>>>>>,
    /// Fallback to global allocator
    global_fallback: Arc<dyn CustomAllocator>,
    /// Thread-local statistics
    local_stats: std::thread::LocalKey<UnsafeCell<AllocatorStats>>,
}

/// Slab allocator for fixed-size allocations
#[derive(Debug)]
pub struct SlabAllocator {
    /// Slab size
    slab_size: usize,
    /// Object size
    object_size: usize,
    /// Objects per slab
    objects_per_slab: usize,
    /// List of slabs
    slabs: Vec<Slab>,
    /// Free object list
    free_objects: Vec<NonNull<u8>>,
    /// Slab statistics
    slab_stats: SlabStats,
}

/// Individual slab in the slab allocator
#[derive(Debug)]
pub struct Slab {
    /// Slab memory
    memory: NonNull<u8>,
    /// Free mask (bit per object)
    free_mask: Vec<u64>,
    /// Number of free objects
    free_count: usize,
    /// Slab ID
    id: usize,
}

/// Buddy allocator for power-of-2 sized allocations
#[derive(Debug)]
pub struct BuddyAllocator {
    /// Memory block
    memory_block: NonNull<u8>,
    /// Block size (power of 2)
    block_size: usize,
    /// Free lists for each order
    free_lists: Vec<Vec<NonNull<u8>>>,
    /// Allocation bitmap
    allocation_bitmap: Vec<u64>,
    /// Buddy statistics
    buddy_stats: BuddyStats,
}

/// Memory statistics tracking
#[derive(Debug)]
pub struct MemoryStats {
    /// Total bytes allocated
    pub total_allocated: AtomicUsize,
    /// Total bytes deallocated
    pub total_deallocated: AtomicUsize,
    /// Peak memory usage
    pub peak_usage: AtomicUsize,
    /// Current memory usage
    pub current_usage: AtomicUsize,
    /// Number of allocations
    pub allocation_count: AtomicUsize,
    /// Number of deallocations
    pub deallocation_count: AtomicUsize,
    /// Memory fragmentation ratio
    pub fragmentation_ratio: AtomicUsize, // Stored as fixed-point
}

/// Pool-specific statistics
#[derive(Debug)]
pub struct PoolStatistics {
    /// Hits (successful allocations from pool)
    pub hits: AtomicUsize,
    /// Misses (allocations that went to system allocator)
    pub misses: AtomicUsize,
    /// Pool utilization percentage
    pub utilization: AtomicUsize,
    /// Average allocation time (nanoseconds)
    pub avg_allocation_time: AtomicUsize,
}

/// SIMD allocation statistics
#[derive(Debug)]
pub struct SimdStats {
    /// SIMD allocations by alignment
    pub allocations_by_alignment: HashMap<usize, AtomicUsize>,
    /// SIMD memory usage
    pub simd_memory_usage: AtomicUsize,
    /// Vectorization efficiency
    pub vectorization_efficiency: AtomicUsize,
}

/// Arena allocator statistics
#[derive(Debug)]
pub struct ArenaStats {
    /// Number of arenas created
    pub arenas_created: AtomicUsize,
    /// Total arena memory
    pub total_arena_memory: AtomicUsize,
    /// Arena utilization
    pub arena_utilization: AtomicUsize,
    /// Memory waste due to fragmentation
    pub fragmentation_waste: AtomicUsize,
}

/// Memory mapping statistics
#[derive(Debug)]
pub struct MappingStats {
    /// Number of active mappings
    pub active_mappings: AtomicUsize,
    /// Total mapped memory
    pub total_mapped_memory: AtomicUsize,
    /// Mapping cache hits
    pub cache_hits: AtomicUsize,
    /// Mapping cache misses
    pub cache_misses: AtomicUsize,
}

/// Lock-free recycler statistics
#[derive(Debug)]
pub struct RecyclerStats {
    /// Successful recycles
    pub successful_recycles: AtomicUsize,
    /// Failed recycles
    pub failed_recycles: AtomicUsize,
    /// Hazard pointer contentions
    pub hazard_contentions: AtomicUsize,
    /// Memory reclaimed
    pub memory_reclaimed: AtomicUsize,
}

/// General allocator statistics
#[derive(Debug)]
pub struct AllocatorStats {
    /// Allocation requests
    pub allocation_requests: AtomicUsize,
    /// Deallocation requests
    pub deallocation_requests: AtomicUsize,
    /// Bytes allocated
    pub bytes_allocated: AtomicUsize,
    /// Bytes deallocated
    pub bytes_deallocated: AtomicUsize,
    /// Allocation failures
    pub allocation_failures: AtomicUsize,
}

/// Slab allocator statistics
#[derive(Debug)]
pub struct SlabStats {
    /// Slabs allocated
    pub slabs_allocated: AtomicUsize,
    /// Objects allocated
    pub objects_allocated: AtomicUsize,
    /// Slab utilization
    pub slab_utilization: AtomicUsize,
    /// Internal fragmentation
    pub internal_fragmentation: AtomicUsize,
}

/// Buddy allocator statistics
#[derive(Debug)]
pub struct BuddyStats {
    /// Allocations by order
    pub allocations_by_order: Vec<AtomicUsize>,
    /// Coalescing operations
    pub coalescing_operations: AtomicUsize,
    /// Splitting operations
    pub splitting_operations: AtomicUsize,
    /// External fragmentation
    pub external_fragmentation: AtomicUsize,
}

impl ZeroCopyMemoryManager {
    /// Create a new zero-copy memory manager
    pub fn new() -> Result<Self> {
        Ok(Self {
            memory_pools: HashMap::new(),
            simd_allocator: SimdAlignedAllocator::new(),
            arena_allocator: ArenaAllocator::new(1024 * 1024)?, // 1MB default
            mmap_manager: MemoryMappingManager::new(),
            recycler: LockFreeRecycler::new(),
            stats: MemoryStats::new(),
        })
    }

    /// Allocate zero-copy buffer
    pub fn allocate_buffer<T>(&self, capacity: usize) -> Result<ZeroCopyBuffer<T>> {
        let layout = Layout::array::<T>(capacity)
            .map_err(|_| MetricsError::MemoryError("Invalid layout".to_string()))?;

        let allocator = self.get_optimal_allocator(layout.size(), layout.align());
        let ptr = allocator.allocate(layout.size(), layout.align())?;

        self.stats
            .total_allocated
            .fetch_add(layout.size(), Ordering::Relaxed);
        self.stats.allocation_count.fetch_add(1, Ordering::Relaxed);

        Ok(ZeroCopyBuffer {
            data: ptr.cast::<T>(),
            capacity,
            length: 0,
            layout,
            allocator,
        })
    }

    /// Create zero-copy view from existing data
    pub fn create_view<'a, T>(&'a self, data: &'a [T]) -> ZeroCopyArrayView<'a, T> {
        ZeroCopyArrayView {
            data: NonNull::new(data.as_ptr() as *mut T).expect("Operation failed"),
            len: data.len(),
            _lifetime: std::marker::PhantomData,
            memory_manager: self,
        }
    }

    /// Create zero-copy mutable view from existing data
    pub fn create_view_mut<'a, T>(&'a self, data: &'a mut [T]) -> ZeroCopyArrayViewMut<'a, T> {
        ZeroCopyArrayViewMut {
            data: NonNull::new(data.as_mut_ptr()).expect("Operation failed"),
            len: data.len(),
            _lifetime: std::marker::PhantomData,
            memory_manager: self,
        }
    }

    /// Allocate SIMD-aligned memory
    pub fn allocate_simd_aligned<T: Float>(
        &mut self,
        count: usize,
        alignment: usize,
    ) -> Result<ZeroCopyBuffer<T>> {
        let size = count * size_of::<T>();
        let ptr = self.simd_allocator.allocate_aligned(size, alignment)?;

        Ok(ZeroCopyBuffer {
            data: ptr.cast::<T>(),
            capacity: count,
            length: 0,
            layout: Layout::from_size_align(size, alignment).expect("Operation failed"),
            allocator: Arc::new(SystemAllocator),
        })
    }

    /// Map file into memory for zero-copy access
    pub fn map_file<T>(
        &self,
        file_path: &str,
        access_mode: AccessMode,
    ) -> Result<ZeroCopyArrayView<T>> {
        let mapping = self.mmap_manager.map_file(file_path, access_mode)?;
        let len = mapping.size / size_of::<T>();

        Ok(ZeroCopyArrayView {
            data: mapping.memory_region.cast::<T>(),
            len,
            _lifetime: std::marker::PhantomData,
            memory_manager: self,
        })
    }

    /// Get optimal allocator for given size and alignment
    fn get_optimal_allocator(&self, size: usize, alignment: usize) -> Arc<dyn CustomAllocator> {
        // Choose allocator based on size and alignment requirements
        if size <= 4096 && alignment <= 64 {
            // Use pool allocator for small allocations
            Arc::new(PoolAllocator::new(size))
        } else if alignment > 64 {
            // Use SIMD allocator for high-alignment requirements
            Arc::new(SimdAllocatorWrapper::new())
        } else if size >= 1024 * 1024 {
            // Use arena allocator for large allocations
            Arc::new(ArenaAllocatorWrapper::new())
        } else {
            // Use system allocator as fallback
            Arc::new(SystemAllocator)
        }
    }

    /// Get memory statistics
    pub fn get_stats(&self) -> &MemoryStats {
        &self.stats
    }

    /// Perform garbage collection
    pub fn garbage_collect(&self) -> Result<usize> {
        let mut reclaimed = 0;

        // Reclaim from recycler
        reclaimed += self.recycler.reclaim_memory()?;

        // Compact memory pools
        for pool in self.memory_pools.values() {
            reclaimed += pool.compact()?;
        }

        // Compact arenas
        reclaimed += self.arena_allocator.compact()?;

        Ok(reclaimed)
    }
}

impl MemoryPool {
    /// Create a new memory pool
    pub fn new(_blocksize: usize, alignment: usize, initialcapacity: usize) -> Self {
        Self {
            block_size: _blocksize,
            alignment,
            free_blocks: Arc::new(Mutex::new(Vec::with_capacity(initialcapacity))),
            capacity: AtomicUsize::new(0),
            allocated_count: AtomicUsize::new(0),
            pool_stats: PoolStatistics::new(),
        }
    }

    /// Allocate a block from the pool
    pub fn allocate(&self) -> Result<NonNull<u8>> {
        let start_time = std::time::Instant::now();

        let mut free_blocks = self.free_blocks.lock().expect("Operation failed");
        if let Some(ptr) = free_blocks.pop() {
            self.pool_stats.hits.fetch_add(1, Ordering::Relaxed);
            drop(free_blocks);

            self.allocated_count.fetch_add(1, Ordering::Relaxed);
            let allocation_time = start_time.elapsed().as_nanos() as usize;
            self.update_avg_allocation_time(allocation_time);

            Ok(ptr)
        } else {
            self.pool_stats.misses.fetch_add(1, Ordering::Relaxed);
            drop(free_blocks);

            // Allocate new block
            let layout = Layout::from_size_align(self.block_size, self.alignment)
                .map_err(|_| MetricsError::MemoryError("Invalid layout".to_string()))?;

            let ptr = unsafe { alloc(layout) };
            if ptr.is_null() {
                return Err(MetricsError::MemoryError("Allocation failed".to_string()));
            }

            self.capacity.fetch_add(1, Ordering::Relaxed);
            self.allocated_count.fetch_add(1, Ordering::Relaxed);

            Ok(NonNull::new(ptr).expect("Operation failed"))
        }
    }

    /// Deallocate a block back to the pool
    pub fn deallocate(&self, ptr: NonNull<u8>) {
        self.free_blocks.lock().expect("Operation failed").push(ptr);
        self.allocated_count.fetch_sub(1, Ordering::Relaxed);
    }

    /// Compact the pool by releasing unused blocks
    pub fn compact(&self) -> Result<usize> {
        let mut free_blocks = self.free_blocks.lock().expect("Operation failed");
        let mut reclaimed = 0;

        // Keep only half of the free blocks, deallocate the rest
        let keep_count = free_blocks.len() / 2;
        let to_deallocate = free_blocks.split_off(keep_count);

        for ptr in to_deallocate {
            unsafe {
                let layout = Layout::from_size_align(self.block_size, self.alignment)
                    .expect("Operation failed");
                dealloc(ptr.as_ptr(), layout);
            }
            reclaimed += self.block_size;
        }

        self.capacity
            .fetch_sub(reclaimed / self.block_size, Ordering::Relaxed);
        Ok(reclaimed)
    }

    fn update_avg_allocation_time(&self, newtime: usize) {
        // Simple exponential moving average
        let current_avg = self.pool_stats.avg_allocation_time.load(Ordering::Relaxed);
        let new_avg = if current_avg == 0 {
            newtime
        } else {
            (current_avg * 7 + newtime) / 8 // 7/8 weight to old, 1/8 to new
        };
        self.pool_stats
            .avg_allocation_time
            .store(new_avg, Ordering::Relaxed);
    }
}

impl SimdAlignedAllocator {
    /// Create a new SIMD-aligned allocator
    pub fn new() -> Self {
        Self {
            alignment_cache: HashMap::new(),
            simd_stats: SimdStats::new(),
        }
    }

    /// Allocate SIMD-aligned memory
    pub fn allocate_aligned(&mut self, size: usize, alignment: usize) -> Result<NonNull<u8>> {
        // Ensure alignment is power of 2 and at least pointer size
        let alignment = alignment.max(align_of::<usize>()).next_power_of_two();

        let layout = Layout::from_size_align(size, alignment)
            .map_err(|_| MetricsError::MemoryError("Invalid SIMD layout".to_string()))?;

        let ptr = unsafe { alloc(layout) };
        if ptr.is_null() {
            return Err(MetricsError::MemoryError(
                "SIMD allocation failed".to_string(),
            ));
        }

        self.simd_stats
            .simd_memory_usage
            .fetch_add(size, Ordering::Relaxed);
        self.simd_stats
            .allocations_by_alignment
            .entry(alignment)
            .or_insert_with(|| AtomicUsize::new(0))
            .fetch_add(1, Ordering::Relaxed);

        Ok(NonNull::new(ptr).expect("Operation failed"))
    }
}

impl ArenaAllocator {
    /// Create a new arena allocator
    pub fn new(_default_arenasize: usize) -> Result<Self> {
        let initial_arena = Arc::new(Mutex::new(Arena::new(_default_arenasize)?));

        Ok(Self {
            current_arena: initial_arena.clone(),
            arenas: vec![initial_arena],
            _default_arenasize,
            arena_stats: ArenaStats::new(),
        })
    }

    /// Allocate from the arena
    pub fn allocate(&mut self, size: usize, alignment: usize) -> Result<NonNull<u8>> {
        let mut arena = self.current_arena.lock().expect("Operation failed");

        if let Ok(ptr) = arena.allocate(size, alignment) {
            Ok(ptr)
        } else {
            // Current arena is full, create a new one
            drop(arena);

            let new_arena_size = self._default_arenasize.max(size * 2);
            let new_arena = Arc::new(Mutex::new(Arena::new(new_arena_size)?));
            self.arenas.push(new_arena.clone());

            let mut arena = new_arena.lock().expect("Operation failed");
            arena.allocate(size, alignment)
        }
    }

    /// Reset all arenas
    pub fn reset(&self) {
        for arena in &self.arenas {
            arena.lock().expect("Operation failed").reset();
        }
    }

    /// Compact arenas by removing empty ones
    pub fn compact(&self) -> Result<usize> {
        // This is a simplified version - in practice you'd want more sophisticated compaction
        self.arena_stats
            .fragmentation_waste
            .store(0, Ordering::Relaxed);
        Ok(0)
    }
}

impl Arena {
    /// Create a new arena
    pub fn new(size: usize) -> Result<Self> {
        let layout = Layout::from_size_align(size, 64) // 64-byte alignment for cache lines
            .map_err(|_| MetricsError::MemoryError("Invalid arena layout".to_string()))?;

        let ptr = unsafe { alloc(layout) };
        if ptr.is_null() {
            return Err(MetricsError::MemoryError(
                "Arena allocation failed".to_string(),
            ));
        }

        Ok(Self {
            memory: NonNull::new(ptr).expect("Operation failed"),
            size,
            offset: 0,
            alignment: 64,
        })
    }

    /// Allocate from the arena
    pub fn allocate(&mut self, size: usize, alignment: usize) -> Result<NonNull<u8>> {
        // Align the current offset
        let aligned_offset = (self.offset + alignment - 1) & !(alignment - 1);

        if aligned_offset + size > self.size {
            return Err(MetricsError::MemoryError("Arena exhausted".to_string()));
        }

        let ptr = unsafe { self.memory.as_ptr().add(aligned_offset) };
        self.offset = aligned_offset + size;

        Ok(NonNull::new(ptr).expect("Operation failed"))
    }

    /// Reset the arena
    pub fn reset(&mut self) {
        self.offset = 0;
    }
}

impl MemoryMappingManager {
    /// Create a new memory mapping manager
    pub fn new() -> Self {
        Self {
            mappings: HashMap::new(),
            mapping_stats: MappingStats::new(),
        }
    }

    /// Map a file into memory
    pub fn map_file(&self, _file_path: &str, _accessmode: AccessMode) -> Result<&MemoryMapping> {
        // This is a simplified implementation
        // In practice, you'd use platform-specific APIs (mmap on Unix, MapViewOfFile on Windows)
        Err(MetricsError::MemoryError(
            "Memory mapping not implemented".to_string(),
        ))
    }
}

impl LockFreeRecycler {
    /// Create a new lock-free recycler
    pub fn new() -> Self {
        const NUM_SIZE_CLASSES: usize = 64;

        Self {
            free_lists: (0..NUM_SIZE_CLASSES)
                .map(|_| AtomicPtr::new(std::ptr::null_mut()))
                .collect(),
            hazard_pointers: (0..NUM_SIZE_CLASSES)
                .map(|_| AtomicPtr::new(std::ptr::null_mut()))
                .collect(),
            retired_nodes: CachePadded::new(Mutex::new(Vec::new())),
            recycler_stats: RecyclerStats::new(),
        }
    }

    /// Reclaim memory from the recycler
    pub fn reclaim_memory(&self) -> Result<usize> {
        let mut reclaimed = 0;
        let mut retired = self.retired_nodes.lock().expect("Operation failed");

        // Simple reclamation - in practice you'd implement hazard pointer protocol
        for node_ptr in retired.drain(..) {
            unsafe {
                let node = Box::from_raw(node_ptr);
                reclaimed += node.size;
            }
        }

        self.recycler_stats
            .memory_reclaimed
            .fetch_add(reclaimed, Ordering::Relaxed);
        Ok(reclaimed)
    }
}

// Wrapper implementations for CustomAllocator trait

#[derive(Debug)]
pub struct SystemAllocator;

impl CustomAllocator for SystemAllocator {
    fn allocate(&self, size: usize, alignment: usize) -> Result<NonNull<u8>> {
        let layout = Layout::from_size_align(size, alignment)
            .map_err(|_| MetricsError::MemoryError("Invalid layout".to_string()))?;

        let ptr = unsafe { alloc(layout) };
        if ptr.is_null() {
            return Err(MetricsError::MemoryError(
                "System allocation failed".to_string(),
            ));
        }

        Ok(NonNull::new(ptr).expect("Operation failed"))
    }

    fn deallocate(&self, ptr: NonNull<u8>, size: usize, alignment: usize) {
        let layout = Layout::from_size_align(size, alignment).expect("Operation failed");
        unsafe { dealloc(ptr.as_ptr(), layout) };
    }

    fn reallocate(
        &self,
        ptr: NonNull<u8>,
        old_size: usize,
        newsize: usize,
        alignment: usize,
    ) -> Result<NonNull<u8>> {
        let new_ptr = self.allocate(newsize, alignment)?;
        unsafe {
            std::ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_size.min(newsize));
        }
        self.deallocate(ptr, old_size, alignment);
        Ok(new_ptr)
    }

    fn get_stats(&self) -> AllocatorStats {
        AllocatorStats::new()
    }

    fn reset(&self) {
        // System allocator doesn't need reset
    }
}

#[derive(Debug)]
pub struct PoolAllocator {
    block_size: usize,
}

impl PoolAllocator {
    pub fn new(_blocksize: usize) -> Self {
        Self {
            block_size: _blocksize,
        }
    }
}

impl CustomAllocator for PoolAllocator {
    fn allocate(&self, size: usize, alignment: usize) -> Result<NonNull<u8>> {
        if size > self.block_size {
            return Err(MetricsError::MemoryError(
                "Size exceeds pool block size".to_string(),
            ));
        }

        let layout = Layout::from_size_align(self.block_size, alignment)
            .map_err(|_| MetricsError::MemoryError("Invalid pool layout".to_string()))?;

        let ptr = unsafe { alloc(layout) };
        if ptr.is_null() {
            return Err(MetricsError::MemoryError(
                "Pool allocation failed".to_string(),
            ));
        }

        Ok(NonNull::new(ptr).expect("Operation failed"))
    }

    fn deallocate(&self, ptr: NonNull<u8>, size: usize, alignment: usize) {
        let layout = Layout::from_size_align(self.block_size, alignment).expect("Operation failed");
        unsafe { dealloc(ptr.as_ptr(), layout) };
    }

    fn reallocate(
        &self,
        ptr: NonNull<u8>,
        old_size: usize,
        newsize: usize,
        alignment: usize,
    ) -> Result<NonNull<u8>> {
        if newsize <= self.block_size {
            Ok(ptr) // No need to reallocate
        } else {
            let new_ptr = self.allocate(newsize, alignment)?;
            unsafe {
                std::ptr::copy_nonoverlapping(
                    ptr.as_ptr(),
                    new_ptr.as_ptr(),
                    old_size.min(newsize),
                );
            }
            self.deallocate(ptr, old_size, alignment);
            Ok(new_ptr)
        }
    }

    fn get_stats(&self) -> AllocatorStats {
        AllocatorStats::new()
    }

    fn reset(&self) {
        // Pool allocator doesn't maintain state to reset
    }
}

#[derive(Debug)]
pub struct SimdAllocatorWrapper;

impl SimdAllocatorWrapper {
    pub fn new() -> Self {
        Self
    }
}

impl CustomAllocator for SimdAllocatorWrapper {
    fn allocate(&self, size: usize, alignment: usize) -> Result<NonNull<u8>> {
        let simd_alignment = alignment.max(32).next_power_of_two(); // At least 32-byte aligned for AVX

        let layout = Layout::from_size_align(size, simd_alignment)
            .map_err(|_| MetricsError::MemoryError("Invalid SIMD layout".to_string()))?;

        let ptr = unsafe { alloc(layout) };
        if ptr.is_null() {
            return Err(MetricsError::MemoryError(
                "SIMD allocation failed".to_string(),
            ));
        }

        Ok(NonNull::new(ptr).expect("Operation failed"))
    }

    fn deallocate(&self, ptr: NonNull<u8>, size: usize, alignment: usize) {
        let simd_alignment = alignment.max(32).next_power_of_two();
        let layout = Layout::from_size_align(size, simd_alignment).expect("Operation failed");
        unsafe { dealloc(ptr.as_ptr(), layout) };
    }

    fn reallocate(
        &self,
        ptr: NonNull<u8>,
        old_size: usize,
        newsize: usize,
        alignment: usize,
    ) -> Result<NonNull<u8>> {
        let new_ptr = self.allocate(newsize, alignment)?;
        unsafe {
            std::ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_size.min(newsize));
        }
        self.deallocate(ptr, old_size, alignment);
        Ok(new_ptr)
    }

    fn get_stats(&self) -> AllocatorStats {
        AllocatorStats::new()
    }

    fn reset(&self) {
        // SIMD allocator doesn't maintain state to reset
    }
}

#[derive(Debug)]
pub struct ArenaAllocatorWrapper;

impl ArenaAllocatorWrapper {
    pub fn new() -> Self {
        Self
    }
}

impl CustomAllocator for ArenaAllocatorWrapper {
    fn allocate(&self, size: usize, alignment: usize) -> Result<NonNull<u8>> {
        // Simplified arena allocation - in practice you'd use a real arena
        let layout = Layout::from_size_align(size, alignment)
            .map_err(|_| MetricsError::MemoryError("Invalid arena layout".to_string()))?;

        let ptr = unsafe { alloc(layout) };
        if ptr.is_null() {
            return Err(MetricsError::MemoryError(
                "Arena allocation failed".to_string(),
            ));
        }

        Ok(NonNull::new(ptr).expect("Operation failed"))
    }

    fn deallocate(&self, ptr: NonNull<u8>, size: usize, alignment: usize) {
        let layout = Layout::from_size_align(size, alignment).expect("Operation failed");
        unsafe { dealloc(ptr.as_ptr(), layout) };
    }

    fn reallocate(
        &self,
        ptr: NonNull<u8>,
        old_size: usize,
        newsize: usize,
        alignment: usize,
    ) -> Result<NonNull<u8>> {
        let new_ptr = self.allocate(newsize, alignment)?;
        unsafe {
            std::ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_size.min(newsize));
        }
        self.deallocate(ptr, old_size, alignment);
        Ok(new_ptr)
    }

    fn get_stats(&self) -> AllocatorStats {
        AllocatorStats::new()
    }

    fn reset(&self) {
        // Arena allocator wrapper doesn't maintain state to reset
    }
}

// Implementation of statistics structures

impl MemoryStats {
    pub fn new() -> Self {
        Self {
            total_allocated: AtomicUsize::new(0),
            total_deallocated: AtomicUsize::new(0),
            peak_usage: AtomicUsize::new(0),
            current_usage: AtomicUsize::new(0),
            allocation_count: AtomicUsize::new(0),
            deallocation_count: AtomicUsize::new(0),
            fragmentation_ratio: AtomicUsize::new(0),
        }
    }
}

impl PoolStatistics {
    pub fn new() -> Self {
        Self {
            hits: AtomicUsize::new(0),
            misses: AtomicUsize::new(0),
            utilization: AtomicUsize::new(0),
            avg_allocation_time: AtomicUsize::new(0),
        }
    }
}

impl SimdStats {
    pub fn new() -> Self {
        Self {
            allocations_by_alignment: HashMap::new(),
            simd_memory_usage: AtomicUsize::new(0),
            vectorization_efficiency: AtomicUsize::new(0),
        }
    }
}

impl ArenaStats {
    pub fn new() -> Self {
        Self {
            arenas_created: AtomicUsize::new(0),
            total_arena_memory: AtomicUsize::new(0),
            arena_utilization: AtomicUsize::new(0),
            fragmentation_waste: AtomicUsize::new(0),
        }
    }
}

impl MappingStats {
    pub fn new() -> Self {
        Self {
            active_mappings: AtomicUsize::new(0),
            total_mapped_memory: AtomicUsize::new(0),
            cache_hits: AtomicUsize::new(0),
            cache_misses: AtomicUsize::new(0),
        }
    }
}

impl RecyclerStats {
    pub fn new() -> Self {
        Self {
            successful_recycles: AtomicUsize::new(0),
            failed_recycles: AtomicUsize::new(0),
            hazard_contentions: AtomicUsize::new(0),
            memory_reclaimed: AtomicUsize::new(0),
        }
    }
}

impl AllocatorStats {
    pub fn new() -> Self {
        Self {
            allocation_requests: AtomicUsize::new(0),
            deallocation_requests: AtomicUsize::new(0),
            bytes_allocated: AtomicUsize::new(0),
            bytes_deallocated: AtomicUsize::new(0),
            allocation_failures: AtomicUsize::new(0),
        }
    }
}

impl SlabStats {
    pub fn new() -> Self {
        Self {
            slabs_allocated: AtomicUsize::new(0),
            objects_allocated: AtomicUsize::new(0),
            slab_utilization: AtomicUsize::new(0),
            internal_fragmentation: AtomicUsize::new(0),
        }
    }
}

impl BuddyStats {
    pub fn new() -> Self {
        Self {
            allocations_by_order: (0..32).map(|_| AtomicUsize::new(0)).collect(),
            coalescing_operations: AtomicUsize::new(0),
            splitting_operations: AtomicUsize::new(0),
            external_fragmentation: AtomicUsize::new(0),
        }
    }
}

// Zero-copy buffer implementations

impl<T> ZeroCopyBuffer<T> {
    /// Get the length of the buffer
    pub fn len(&self) -> usize {
        self.length
    }

    /// Check if the buffer is empty
    pub fn is_empty(&self) -> bool {
        self.length == 0
    }

    /// Get the capacity of the buffer
    pub fn capacity(&self) -> usize {
        self.capacity
    }

    /// Push an element to the buffer
    pub fn push(&mut self, value: T) -> Result<()> {
        if self.length >= self.capacity {
            return Err(MetricsError::MemoryError(
                "Buffer capacity exceeded".to_string(),
            ));
        }

        unsafe {
            std::ptr::write(self.data.as_ptr().add(self.length), value);
        }
        self.length += 1;
        Ok(())
    }

    /// Get a slice of the buffer data
    pub fn as_slice(&self) -> &[T] {
        unsafe { std::slice::from_raw_parts(self.data.as_ptr(), self.length) }
    }

    /// Get a mutable slice of the buffer data
    pub fn as_mut_slice(&mut self) -> &mut [T] {
        unsafe { std::slice::from_raw_parts_mut(self.data.as_ptr(), self.length) }
    }

    /// Resize the buffer (zero-copy when shrinking)
    pub fn resize(&mut self, newsize: usize) -> Result<()> {
        if newsize <= self.capacity {
            self.length = newsize;
            Ok(())
        } else {
            // Need to reallocate
            let new_ptr = self.allocator.reallocate(
                self.data.cast::<u8>(),
                self.layout.size(),
                newsize * size_of::<T>(),
                self.layout.align(),
            )?;

            self.data = new_ptr.cast::<T>();
            self.capacity = newsize;
            self.length = newsize;
            Ok(())
        }
    }
}

impl<T> Drop for ZeroCopyBuffer<T> {
    fn drop(&mut self) {
        // Drop all constructed elements
        for i in 0..self.length {
            unsafe {
                std::ptr::drop_in_place(self.data.as_ptr().add(i));
            }
        }

        // Deallocate memory
        self.allocator.deallocate(
            self.data.cast::<u8>(),
            self.layout.size(),
            self.layout.align(),
        );
    }
}

impl<'a, T> ZeroCopyArrayView<'a, T> {
    /// Get the length of the view
    pub fn len(&self) -> usize {
        self.len
    }

    /// Check if the view is empty
    pub fn is_empty(&self) -> bool {
        self.len == 0
    }

    /// Get element at index
    pub fn get(&self, index: usize) -> Option<&T> {
        if index < self.len {
            unsafe { Some(&*self.data.as_ptr().add(index)) }
        } else {
            None
        }
    }

    /// Get a slice of the view
    pub fn as_slice(&self) -> &[T] {
        unsafe { std::slice::from_raw_parts(self.data.as_ptr(), self.len) }
    }

    /// Create a subview (zero-copy)
    pub fn subview(&self, start: usize, len: usize) -> Result<ZeroCopyArrayView<'a, T>> {
        if start + len > self.len {
            return Err(MetricsError::IndexError(
                "Subview bounds exceed array".to_string(),
            ));
        }

        Ok(ZeroCopyArrayView {
            data: unsafe { NonNull::new_unchecked(self.data.as_ptr().add(start)) },
            len,
            _lifetime: std::marker::PhantomData,
            memory_manager: self.memory_manager,
        })
    }
}

impl<'a, T> ZeroCopyArrayViewMut<'a, T> {
    /// Get the length of the view
    pub fn len(&self) -> usize {
        self.len
    }

    /// Check if the view is empty
    pub fn is_empty(&self) -> bool {
        self.len == 0
    }

    /// Get element at index
    pub fn get(&self, index: usize) -> Option<&T> {
        if index < self.len {
            unsafe { Some(&*self.data.as_ptr().add(index)) }
        } else {
            None
        }
    }

    /// Get mutable element at index
    pub fn get_mut(&mut self, index: usize) -> Option<&mut T> {
        if index < self.len {
            unsafe { Some(&mut *self.data.as_ptr().add(index)) }
        } else {
            None
        }
    }

    /// Get a slice of the view
    pub fn as_slice(&self) -> &[T] {
        unsafe { std::slice::from_raw_parts(self.data.as_ptr(), self.len) }
    }

    /// Get a mutable slice of the view
    pub fn as_mut_slice(&mut self) -> &mut [T] {
        unsafe { std::slice::from_raw_parts_mut(self.data.as_ptr(), self.len) }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use scirs2_core::ndarray::Array1;

    // Example streaming implementation of mean absolute error
    struct StreamingMAE;

    impl StreamingMetric<f64> for StreamingMAE {
        type State = (f64, usize); // (sum_of_absolute_errors, count)

        fn init_state(&self) -> Self::State {
            (0.0, 0)
        }

        fn update_state(
            &self,
            state: &mut Self::State,
            batch_true: &[f64],
            batch_pred: &[f64],
        ) -> Result<()> {
            for (y_t, y_p) in batch_true.iter().zip(batch_pred.iter()) {
                state.0 += (y_t - y_p).abs();
                state.1 += 1;
            }
            Ok(())
        }

        fn finalize(&self, state: &Self::State) -> Result<f64> {
            if state.1 == 0 {
                return Err(MetricsError::DivisionByZero);
            }
            Ok(state.0 / state.1 as f64)
        }
    }

    #[test]
    fn test_chunked_streaming_metric() {
        // Create test data
        let y_true = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
        let y_pred = Array1::from_vec(vec![1.2, 2.3, 2.9, 4.1, 5.2]);

        // Compute using chunked processing with chunk_size=2
        let chunked = ChunkedMetrics::new().with_chunk_size(2);
        let mae = chunked
            .compute_streaming(&y_true, &y_pred, &StreamingMAE)
            .expect("Operation failed");

        // Compute expected MAE directly
        let expected_mae = y_true
            .iter()
            .zip(y_pred.iter())
            .map(|(t, p)| (t - p).abs())
            .sum::<f64>()
            / y_true.len() as f64;

        assert!((mae - expected_mae).abs() < 1e-10);
    }

    #[test]
    fn test_compute_rowwise() {
        // Create test data
        let data: Vec<f64> = (0..100).map(|x| x as f64).collect();

        // Define row operation (sum of squares)
        let row_op = |chunk: &[f64]| -> Result<f64> { Ok(chunk.iter().map(|x| x * x).sum()) };

        // Define combiner (sum)
        let combine = |results: &[f64]| -> Result<f64> { Ok(results.iter().sum()) };

        // Compute using chunked processing with chunk_size=10
        let chunked = ChunkedMetrics::new().with_chunk_size(10);
        let result = chunked
            .compute_rowwise(&data, row_op, combine)
            .expect("Operation failed");

        // Compute expected result directly
        let expected: f64 = data.iter().map(|x| x * x).sum();

        assert!((result - expected).abs() < 1e-10);
    }

    #[test]
    fn test_incremental_metrics() {
        // Create test data
        let data = vec![(1.0, 1.2), (2.0, 1.8), (3.0, 3.1), (4.0, 4.2), (5.0, 4.9)];

        // Update function for mean squared error
        let mse_update = |state: &mut f64, y_true: f64, y_pred: f64| -> Result<()> {
            *state += (y_true - y_pred).powi(2);
            Ok(())
        };

        // Finalize function for mean squared error
        let mse_finalize = |state: &f64, count: usize| -> Result<f64> {
            if count == 0 {
                return Err(MetricsError::DivisionByZero);
            }
            Ok(*state / count as f64)
        };

        // Calculate expected MSE
        let expected_mse =
            data.iter().map(|&(t, p)| (t - p) * (t - p)).sum::<f64>() / data.len() as f64;

        // Test incremental calculation
        let mut incremental = IncrementalMetrics::<f64, f64>::new();

        for &(y_true, y_pred) in &data {
            incremental
                .update(y_true, y_pred, mse_update)
                .expect("Operation failed");
        }

        let mse = incremental
            .finalize(mse_finalize)
            .expect("Operation failed");
        assert!((mse - expected_mse).abs() < 1e-10);

        // Test batch update
        let (y_true, y_pred): (Vec<_>, Vec<_>) = data.iter().cloned().unzip();

        let batch_update = |state: &mut f64, y_true: &[f64], y_pred: &[f64]| -> Result<()> {
            for (t, p) in y_true.iter().zip(y_pred.iter()) {
                *state += (t - p).powi(2);
            }
            Ok(())
        };

        let mut incremental_batch = IncrementalMetrics::<f64, f64>::new();
        incremental_batch
            .update_batch(&y_true, &y_pred, batch_update)
            .expect("Operation failed");

        let mse_batch = incremental_batch
            .finalize(mse_finalize)
            .expect("Operation failed");
        assert!((mse_batch - expected_mse).abs() < 1e-10);
    }
}