pandrs 0.3.0 - Docs.rs

//! Unified Memory Management System for PandRS
//!
//! This module provides a comprehensive, pluggable memory management interface
//! with adaptive storage strategy selection as specified in the memory management
//! unification strategy document.

use crate::core::error::{Error, Result};
use std::any::Any;
use std::collections::HashMap;
use std::ops::Range;
use std::sync::{
    atomic::{AtomicUsize, Ordering},
    Arc, Mutex, RwLock,
};
use std::time::Instant;

/// Storage type enumeration for strategy selection
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum StorageType {
    /// Columnar storage with compression
    ColumnStore,
    /// Memory-mapped file storage
    MemoryMapped,
    /// String pool with deduplication
    StringPool,
    /// Hybrid large-scale with tiering
    HybridLargeScale,
    /// Disk-based storage
    DiskBased,
    /// In-memory optimized storage
    InMemory,
}

/// Access pattern hints for optimization
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AccessPattern {
    /// Sequential access pattern
    Sequential,
    /// Random access pattern
    Random,
    /// Streaming access pattern
    Streaming,
    /// Columnar access pattern
    Columnar,
    /// High temporal locality
    HighLocality,
    /// Medium temporal locality
    MediumLocality,
    /// Low temporal locality
    LowLocality,
    /// High duplication in data
    HighDuplication,
    /// Low duplication in data
    LowDuplication,
    /// Long strings predominant
    LongStrings,
    /// Short strings predominant
    ShortStrings,
    /// Temporal hot spot pattern
    TemporalHotSpot,
    /// Cold archival pattern
    ColdArchival,
    /// Strided access with specific stride
    Strided { stride: usize },
}

/// Performance priority specification
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PerformancePriority {
    /// Optimize for speed
    Speed,
    /// Optimize for memory usage
    Memory,
    /// Balanced optimization
    Balanced,
    /// Optimize for throughput
    Throughput,
    /// Optimize for latency
    Latency,
}

/// Durability level specification
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DurabilityLevel {
    /// No persistence required
    Temporary,
    /// Session persistence
    Session,
    /// Durable storage
    Durable,
    /// Highly durable with replication
    HighDurability,
}

/// Compression preference
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompressionPreference {
    /// No compression
    None,
    /// Automatic compression selection
    Auto,
    /// Fast compression
    Fast,
    /// High compression ratio
    High,
    /// Balanced compression
    Balanced,
}

/// Concurrency level specification
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConcurrencyLevel {
    /// Single-threaded access
    Single,
    /// Low concurrency
    Low,
    /// Medium concurrency
    Medium,
    /// High concurrency
    High,
    /// Very high concurrency
    VeryHigh,
}

/// I/O pattern specification
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IoPattern {
    /// Read-heavy workload
    ReadHeavy,
    /// Write-heavy workload
    WriteHeavy,
    /// Balanced read/write
    Balanced,
    /// Append-only pattern
    AppendOnly,
    /// Update-in-place pattern
    UpdateInPlace,
}

/// Data characteristics for optimization
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DataCharacteristics {
    /// Numeric data
    Numeric,
    /// String data
    Text,
    /// Mixed data types
    Mixed,
    /// Time series data
    TimeSeries,
    /// Categorical data
    Categorical,
    /// Sparse data
    Sparse,
    /// Dense data
    Dense,
}

/// Storage requirements specification for strategy selection
#[derive(Debug, Clone)]
pub struct StorageRequirements {
    /// Expected data size in bytes
    pub estimated_size: usize,
    /// Access pattern hint
    pub access_pattern: AccessPattern,
    /// Performance priority (speed vs memory)
    pub performance_priority: PerformancePriority,
    /// Durability requirements
    pub durability: DurabilityLevel,
    /// Compression preferences
    pub compression: CompressionPreference,
    /// Concurrency requirements
    pub concurrency: ConcurrencyLevel,
    /// Memory constraints
    pub memory_limit: Option<usize>,
    /// I/O pattern expectations
    pub io_pattern: IoPattern,
    /// Data characteristics
    pub data_characteristics: DataCharacteristics,
}

impl Default for StorageRequirements {
    fn default() -> Self {
        Self {
            estimated_size: 1024 * 1024, // 1MB default
            access_pattern: AccessPattern::Random,
            performance_priority: PerformancePriority::Balanced,
            durability: DurabilityLevel::Temporary,
            compression: CompressionPreference::Auto,
            concurrency: ConcurrencyLevel::Medium,
            memory_limit: None,
            io_pattern: IoPattern::Balanced,
            data_characteristics: DataCharacteristics::Mixed,
        }
    }
}

/// Storage configuration for creating storage
#[derive(Debug, Clone)]
pub struct StorageConfig {
    /// Storage requirements
    pub requirements: StorageRequirements,
    /// Additional configuration options
    pub options: HashMap<String, String>,
    /// Data sample for analysis (first 100 rows or similar)
    pub data_sample: Option<Vec<u8>>,
    /// Expected access pattern
    pub expected_access_pattern: AccessPattern,
    /// Constraints
    pub constraints: StorageConstraints,
}

impl Default for StorageConfig {
    fn default() -> Self {
        Self {
            requirements: StorageRequirements::default(),
            options: HashMap::new(),
            data_sample: None,
            expected_access_pattern: AccessPattern::Random,
            constraints: StorageConstraints::default(),
        }
    }
}

/// Storage constraints
#[derive(Debug, Clone)]
pub struct StorageConstraints {
    /// Maximum memory usage in bytes
    pub max_memory: Option<usize>,
    /// Maximum disk usage in bytes
    pub max_disk: Option<usize>,
    /// Maximum CPU usage percentage
    pub max_cpu_percent: Option<f64>,
    /// Required availability level
    pub availability_requirement: f64,
}

impl Default for StorageConstraints {
    fn default() -> Self {
        Self {
            max_memory: None,
            max_disk: None,
            max_cpu_percent: Some(80.0),
            availability_requirement: 0.99,
        }
    }
}

/// Data chunk for read/write operations
#[derive(Debug, Clone)]
pub struct DataChunk {
    /// Raw data bytes
    pub data: Vec<u8>,
    /// Chunk metadata
    pub metadata: ChunkMetadata,
}

impl DataChunk {
    pub fn new(data: Vec<u8>) -> Self {
        Self {
            metadata: ChunkMetadata::new(data.len()),
            data,
        }
    }

    pub fn len(&self) -> usize {
        self.data.len()
    }

    pub fn is_empty(&self) -> bool {
        self.data.is_empty()
    }

    pub fn from_slice(data: &[u8]) -> Self {
        Self::new(data.to_vec())
    }

    pub fn from_strings(strings: Vec<String>) -> Self {
        let data = strings.join("\0").into_bytes();
        Self::new(data)
    }

    pub fn as_strings(&self) -> Result<Vec<String>> {
        let data_str = String::from_utf8(self.data.clone())
            .map_err(|e| Error::InvalidOperation(format!("Invalid UTF-8 data: {}", e)))?;
        Ok(data_str.split('\0').map(|s| s.to_string()).collect())
    }

    pub fn new_test_data(size: usize) -> Self {
        let data = vec![0u8; size];
        Self::new(data)
    }
}

/// Chunk metadata
#[derive(Debug, Clone)]
pub struct ChunkMetadata {
    /// Size in bytes
    pub size: usize,
    /// Checksum for integrity
    pub checksum: u64,
    /// Compression type used
    pub compression: CompressionType,
    /// Creation timestamp
    pub created_at: Instant,
}

impl ChunkMetadata {
    fn new(size: usize) -> Self {
        Self {
            size,
            checksum: 0,
            compression: CompressionType::None,
            created_at: Instant::now(),
        }
    }
}

/// Compression type enumeration
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum CompressionType {
    None,
    Auto,
    Lz4,
    Zstd,
    Snappy,
    Gzip,
}

/// Chunk range specification
#[derive(Debug, Clone)]
pub struct ChunkRange {
    /// Start offset in bytes
    pub start: usize,
    /// End offset in bytes
    pub end: usize,
}

impl ChunkRange {
    pub fn new(start: usize, end: usize) -> Self {
        Self { start, end }
    }

    pub fn len(&self) -> usize {
        self.end.saturating_sub(self.start)
    }

    pub fn is_empty(&self) -> bool {
        self.start >= self.end
    }

    pub fn full() -> Self {
        Self {
            start: 0,
            end: usize::MAX,
        }
    }
}

impl From<Range<usize>> for ChunkRange {
    fn from(range: Range<usize>) -> Self {
        Self::new(range.start, range.end)
    }
}

/// Strategy capability assessment
#[derive(Debug, Clone)]
pub struct StrategyCapability {
    /// Can handle the requirements
    pub can_handle: bool,
    /// Confidence score (0.0 to 1.0)
    pub confidence: f64,
    /// Estimated performance score
    pub performance_score: f64,
    /// Resource cost estimate
    pub resource_cost: ResourceCost,
}

/// Resource cost estimate
#[derive(Debug, Clone)]
pub struct ResourceCost {
    /// Memory cost in bytes
    pub memory: usize,
    /// CPU cost percentage
    pub cpu: f64,
    /// Disk space cost in bytes
    pub disk: usize,
    /// Network bandwidth cost in bytes/sec
    pub network: usize,
}

/// Performance profile for strategy
#[derive(Debug, Clone)]
pub struct PerformanceProfile {
    /// Read operation speed
    pub read_speed: Speed,
    /// Write operation speed
    pub write_speed: Speed,
    /// Memory efficiency
    pub memory_efficiency: Efficiency,
    /// Compression ratio achieved
    pub compression_ratio: f64,
    /// Query optimization capability
    pub query_optimization: QueryOptimization,
    /// Parallel scalability
    pub parallel_scalability: ParallelScalability,
}

/// Speed enumeration
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Speed {
    VerySlow,
    Slow,
    Medium,
    Fast,
    VeryFast,
}

/// Efficiency enumeration
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Efficiency {
    Poor,
    Fair,
    Good,
    Excellent,
    Outstanding,
}

/// Query optimization capability
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum QueryOptimization {
    None,
    Basic,
    Good,
    Excellent,
}

/// Parallel scalability
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ParallelScalability {
    None,
    Limited,
    Good,
    Excellent,
}

/// Storage statistics
#[derive(Debug, Clone)]
pub struct StorageStats {
    /// Total storage size in bytes
    pub total_size: usize,
    /// Used storage size in bytes
    pub used_size: usize,
    /// Number of read operations
    pub read_operations: u64,
    /// Number of write operations
    pub write_operations: u64,
    /// Average read latency in nanoseconds
    pub avg_read_latency_ns: u64,
    /// Average write latency in nanoseconds
    pub avg_write_latency_ns: u64,
    /// Cache hit rate
    pub cache_hit_rate: f64,
}

impl Default for StorageStats {
    fn default() -> Self {
        Self {
            total_size: 0,
            used_size: 0,
            read_operations: 0,
            write_operations: 0,
            avg_read_latency_ns: 0,
            avg_write_latency_ns: 0,
            cache_hit_rate: 0.0,
        }
    }
}

/// Base trait for all storage strategies in PandRS
pub trait StorageStrategy: Send + Sync {
    type Handle;
    type Error: std::error::Error + Send + Sync + 'static;
    type Metadata: Clone + Send + Sync;

    /// Strategy identifier for selection and monitoring
    fn name(&self) -> &'static str;

    /// Create new storage with specific configuration
    fn create_storage(
        &mut self,
        config: &StorageConfig,
    ) -> std::result::Result<Self::Handle, Self::Error>;

    /// Read data chunk from storage
    fn read_chunk(
        &self,
        handle: &Self::Handle,
        range: ChunkRange,
    ) -> std::result::Result<DataChunk, Self::Error>;

    /// Write data chunk to storage
    fn write_chunk(
        &mut self,
        handle: &Self::Handle,
        chunk: DataChunk,
    ) -> std::result::Result<(), Self::Error>;

    /// Append data chunk to existing storage
    fn append_chunk(
        &mut self,
        handle: &Self::Handle,
        chunk: DataChunk,
    ) -> std::result::Result<(), Self::Error>;

    /// Flush pending writes to persistent storage
    fn flush(&mut self, handle: &Self::Handle) -> std::result::Result<(), Self::Error>;

    /// Delete storage and free resources
    fn delete_storage(&mut self, handle: &Self::Handle) -> std::result::Result<(), Self::Error>;

    /// Check if strategy can handle specific requirements
    fn can_handle(&self, requirements: &StorageRequirements) -> StrategyCapability;

    /// Get performance characteristics of this strategy
    fn performance_profile(&self) -> PerformanceProfile;

    /// Get current memory and storage statistics
    fn storage_stats(&self) -> StorageStats;

    /// Optimize strategy for specific access pattern
    fn optimize_for_pattern(
        &mut self,
        pattern: AccessPattern,
    ) -> std::result::Result<(), Self::Error>;

    /// Compact storage to reduce fragmentation
    fn compact(
        &mut self,
        handle: &Self::Handle,
    ) -> std::result::Result<CompactionResult, Self::Error>;
}

/// Compaction result
#[derive(Debug, Clone)]
pub struct CompactionResult {
    /// Size before compaction
    pub size_before: usize,
    /// Size after compaction
    pub size_after: usize,
    /// Time taken for compaction
    pub duration: std::time::Duration,
}

/// Storage handle with metadata and resource tracking
#[derive(Debug)]
pub struct StorageHandle {
    /// Unique identifier for this storage
    pub id: StorageId,
    /// Strategy that manages this storage
    pub strategy_type: StorageType,
    /// Strategy-specific handle
    pub inner_handle: Box<dyn Any + Send + Sync>,
    /// Storage metadata
    pub metadata: StorageMetadata,
    /// Reference counting for resource management
    pub ref_count: Arc<AtomicUsize>,
    /// Performance monitoring data
    pub performance_tracker: PerformanceTracker,
}

impl StorageHandle {
    pub fn new(
        id: StorageId,
        strategy_type: StorageType,
        inner_handle: Box<dyn Any + Send + Sync>,
        metadata: StorageMetadata,
    ) -> Self {
        Self {
            id,
            strategy_type,
            inner_handle,
            metadata,
            ref_count: Arc::new(AtomicUsize::new(1)),
            performance_tracker: PerformanceTracker::new(),
        }
    }
}

// Note: StorageHandle cannot implement Clone due to the inner_handle trait object
// Use Arc<StorageHandle> if shared ownership is needed

impl Drop for StorageHandle {
    fn drop(&mut self) {
        if self.ref_count.fetch_sub(1, Ordering::SeqCst) == 1 {
            // Last reference, cleanup resources
            // In a real implementation, this would notify the storage manager
            // to potentially clean up the underlying storage
        }
    }
}

/// Storage identifier
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct StorageId(pub u64);

/// Storage metadata
#[derive(Debug, Clone)]
pub struct StorageMetadata {
    /// Creation timestamp
    pub created_at: Instant,
    /// Last accessed timestamp
    pub last_accessed: Instant,
    /// Total size in bytes
    pub size: usize,
    /// Access count
    pub access_count: u64,
}

impl StorageMetadata {
    pub fn new(size: usize) -> Self {
        let now = Instant::now();
        Self {
            created_at: now,
            last_accessed: now,
            size,
            access_count: 0,
        }
    }
}

/// Performance tracker for monitoring storage operations
#[derive(Debug, Clone)]
pub struct PerformanceTracker {
    /// Read operation times
    pub read_times: Vec<std::time::Duration>,
    /// Write operation times
    pub write_times: Vec<std::time::Duration>,
    /// Total bytes read
    pub bytes_read: u64,
    /// Total bytes written
    pub bytes_written: u64,
}

impl PerformanceTracker {
    pub fn new() -> Self {
        Self {
            read_times: Vec::new(),
            write_times: Vec::new(),
            bytes_read: 0,
            bytes_written: 0,
        }
    }

    pub fn record_read(&mut self, duration: std::time::Duration, bytes: u64) {
        self.read_times.push(duration);
        self.bytes_read += bytes;
    }

    pub fn record_write(&mut self, duration: std::time::Duration, bytes: u64) {
        self.write_times.push(duration);
        self.bytes_written += bytes;
    }

    pub fn average_read_time(&self) -> Option<std::time::Duration> {
        if self.read_times.is_empty() {
            None
        } else {
            let total: std::time::Duration = self.read_times.iter().sum();
            Some(total / self.read_times.len() as u32)
        }
    }

    pub fn average_write_time(&self) -> Option<std::time::Duration> {
        if self.write_times.is_empty() {
            None
        } else {
            let total: std::time::Duration = self.write_times.iter().sum();
            Some(total / self.write_times.len() as u32)
        }
    }
}

/// Atomic memory statistics
#[derive(Debug)]
pub struct AtomicMemoryStats {
    /// Total allocated memory
    pub total_allocated: AtomicUsize,
    /// Peak memory usage
    pub peak_usage: AtomicUsize,
    /// Current active allocations
    pub active_allocations: AtomicUsize,
    /// Total number of allocation operations
    pub allocation_count: AtomicUsize,
    /// Total number of deallocation operations
    pub deallocation_count: AtomicUsize,
}

impl AtomicMemoryStats {
    pub fn new() -> Self {
        Self {
            total_allocated: AtomicUsize::new(0),
            peak_usage: AtomicUsize::new(0),
            active_allocations: AtomicUsize::new(0),
            allocation_count: AtomicUsize::new(0),
            deallocation_count: AtomicUsize::new(0),
        }
    }

    pub fn record_allocation(&self, size: usize) {
        self.total_allocated.fetch_add(size, Ordering::SeqCst);
        self.active_allocations.fetch_add(1, Ordering::SeqCst);
        self.allocation_count.fetch_add(1, Ordering::SeqCst);

        // Update peak usage
        let current = self.total_allocated.load(Ordering::SeqCst);
        self.peak_usage.fetch_max(current, Ordering::SeqCst);
    }

    pub fn record_deallocation(&self, size: usize) {
        self.total_allocated.fetch_sub(size, Ordering::SeqCst);
        self.active_allocations.fetch_sub(1, Ordering::SeqCst);
        self.deallocation_count.fetch_add(1, Ordering::SeqCst);
    }
}

impl Default for AtomicMemoryStats {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_storage_requirements_default() {
        let req = StorageRequirements::default();
        assert_eq!(req.estimated_size, 1024 * 1024);
        assert_eq!(req.performance_priority, PerformancePriority::Balanced);
    }

    #[test]
    fn test_data_chunk() {
        let data = vec![1, 2, 3, 4, 5];
        let chunk = DataChunk::new(data.clone());
        assert_eq!(chunk.len(), 5);
        assert_eq!(chunk.data, data);
    }

    #[test]
    fn test_chunk_range() {
        let range = ChunkRange::new(10, 20);
        assert_eq!(range.len(), 10);
        assert!(!range.is_empty());

        let empty_range = ChunkRange::new(20, 10);
        assert!(empty_range.is_empty());
    }

    #[test]
    fn test_storage_handle_creation() {
        let handle = StorageHandle::new(
            StorageId(1),
            StorageType::InMemory,
            Box::new(42u32),
            StorageMetadata::new(1024),
        );

        assert_eq!(handle.ref_count.load(Ordering::SeqCst), 1);
        assert_eq!(handle.id, StorageId(1));
        assert_eq!(handle.strategy_type, StorageType::InMemory);
        assert_eq!(handle.metadata.size, 1024);
    }

    #[test]
    fn test_performance_tracker() {
        let mut tracker = PerformanceTracker::new();

        tracker.record_read(std::time::Duration::from_millis(10), 1024);
        tracker.record_read(std::time::Duration::from_millis(20), 2048);

        assert_eq!(tracker.bytes_read, 3072);
        let avg_time = tracker
            .average_read_time()
            .expect("operation should succeed");
        assert_eq!(avg_time, std::time::Duration::from_millis(15));
    }

    #[test]
    fn test_atomic_memory_stats() {
        let stats = AtomicMemoryStats::new();

        stats.record_allocation(1024);
        assert_eq!(stats.total_allocated.load(Ordering::SeqCst), 1024);
        assert_eq!(stats.active_allocations.load(Ordering::SeqCst), 1);

        stats.record_allocation(2048);
        assert_eq!(stats.total_allocated.load(Ordering::SeqCst), 3072);
        assert_eq!(stats.peak_usage.load(Ordering::SeqCst), 3072);

        stats.record_deallocation(1024);
        assert_eq!(stats.total_allocated.load(Ordering::SeqCst), 2048);
        assert_eq!(stats.active_allocations.load(Ordering::SeqCst), 1);
    }
}