voirs_conversion/
cache.rs

1//! Advanced Caching System for VoiRS Voice Conversion
2//!
3//! This module provides sophisticated caching mechanisms to optimize performance
4//! by reducing redundant computations and improving memory access patterns.
5
6use crate::prelude::*;
7use serde::{Deserialize, Serialize};
8use std::collections::{BTreeMap, HashMap, VecDeque};
9use std::hash::{Hash, Hasher};
10use std::sync::{Arc, Mutex, RwLock};
11use std::time::{Duration, Instant, SystemTime};
12
13/// Multi-level cache system for voice conversion operations
14#[derive(Debug)]
15pub struct ConversionCacheSystem {
16    /// L1 cache for frequently accessed small items
17    l1_cache: Arc<RwLock<LruCache<String, CachedItem>>>,
18    /// L2 cache for larger or less frequently accessed items
19    l2_cache: Arc<RwLock<LruCache<String, CachedItem>>>,
20    /// Persistent cache for expensive computations
21    persistent_cache: Arc<RwLock<HashMap<String, PersistentCacheItem>>>,
22    /// Cache configuration
23    config: CacheConfig,
24    /// Cache statistics and metrics
25    stats: Arc<Mutex<CacheStatistics>>,
26    /// Cache policies for different data types
27    policies: HashMap<CacheItemType, CachePolicy>,
28}
29
30/// Configuration for the cache system
31#[derive(Debug, Clone)]
32pub struct CacheConfig {
33    /// Maximum size of L1 cache in bytes
34    pub l1_max_size: usize,
35    /// Maximum size of L2 cache in bytes
36    pub l2_max_size: usize,
37    /// Maximum number of items in L1 cache
38    pub l1_max_items: usize,
39    /// Maximum number of items in L2 cache
40    pub l2_max_items: usize,
41    /// TTL for cached items
42    pub default_ttl: Duration,
43    /// Enable compression for large items
44    pub enable_compression: bool,
45    /// Compression threshold in bytes
46    pub compression_threshold: usize,
47    /// Enable cache persistence
48    pub enable_persistence: bool,
49    /// Maximum size of persistent cache
50    pub persistent_max_size: usize,
51}
52
53impl Default for CacheConfig {
54    fn default() -> Self {
55        Self {
56            l1_max_size: 50 * 1024 * 1024,  // 50MB
57            l2_max_size: 200 * 1024 * 1024, // 200MB
58            l1_max_items: 1000,
59            l2_max_items: 5000,
60            default_ttl: Duration::from_secs(300), // 5 minutes
61            enable_compression: true,
62            compression_threshold: 1024 * 1024, // 1MB
63            enable_persistence: true,
64            persistent_max_size: 1024 * 1024 * 1024, // 1GB
65        }
66    }
67}
68
69/// Cache policy for different types of data
70#[derive(Debug, Clone)]
71pub struct CachePolicy {
72    /// Priority level (higher = more important to keep)
73    pub priority: CachePriority,
74    /// Time-to-live for this type of data
75    pub ttl: Duration,
76    /// Whether this type should be compressed
77    pub compress: bool,
78    /// Whether this type should persist across sessions
79    pub persist: bool,
80    /// Maximum size for individual items of this type
81    pub max_item_size: usize,
82}
83
84/// Priority levels for cache items
85#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
86pub enum CachePriority {
87    /// Low priority items are evicted first when space is needed
88    Low = 1,
89    /// Medium priority items have moderate eviction resistance
90    Medium = 2,
91    /// High priority items are kept in cache longer
92    High = 3,
93    /// Critical priority items should remain cached as long as possible
94    Critical = 4,
95}
96
97/// Types of cacheable items
98#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
99pub enum CacheItemType {
100    /// Model parameters and weights
101    ModelData,
102    /// Preprocessed audio features
103    AudioFeatures,
104    /// Conversion results for small audio clips
105    ConversionResults,
106    /// HRTF data and spatial processing
107    SpatialData,
108    /// Quality assessment results
109    QualityMetrics,
110    /// Configuration and metadata
111    Metadata,
112    /// Temporary intermediate results
113    Intermediate,
114}
115
116/// Cached item with metadata
117#[derive(Debug, Clone)]
118pub struct CachedItem {
119    /// The cached data
120    pub data: CachedData,
121    /// When the item was created
122    pub created_at: Instant,
123    /// When the item was last accessed
124    pub last_accessed: Instant,
125    /// Number of times accessed
126    pub access_count: usize,
127    /// Time-to-live for this item
128    pub ttl: Duration,
129    /// Priority of this item
130    pub priority: CachePriority,
131    /// Type of cached item
132    pub item_type: CacheItemType,
133    /// Size in bytes
134    pub size: usize,
135    /// Whether the item is compressed
136    pub compressed: bool,
137}
138
139/// Different types of cached data
140#[derive(Debug, Clone)]
141pub enum CachedData {
142    /// Raw binary data
143    Binary(Vec<u8>),
144    /// Audio samples
145    Audio(Vec<f32>),
146    /// Model parameters
147    ModelParams(Vec<f32>),
148    /// Text data
149    Text(String),
150    /// Structured data (JSON)
151    Structured(serde_json::Value),
152    /// Compressed data
153    Compressed(Vec<u8>),
154}
155
156/// Persistent cache item with extended metadata
157#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct PersistentCacheItem {
159    /// The cached data (serialized)
160    pub data: Vec<u8>,
161    /// Creation timestamp
162    pub created_at: SystemTime,
163    /// Last access timestamp
164    pub last_accessed: SystemTime,
165    /// Access count
166    pub access_count: usize,
167    /// Item type
168    pub item_type: CacheItemType,
169    /// Original size before compression
170    pub original_size: usize,
171    /// Compressed size
172    pub compressed_size: usize,
173    /// Hash of the original data for integrity checking
174    pub data_hash: u64,
175}
176
177/// Cache statistics and metrics
178#[derive(Debug, Default, Clone)]
179pub struct CacheStatistics {
180    /// L1 cache statistics
181    pub l1_stats: CacheLevelStats,
182    /// L2 cache statistics
183    pub l2_stats: CacheLevelStats,
184    /// Persistent cache statistics
185    pub persistent_stats: CacheLevelStats,
186    /// Global statistics
187    pub total_requests: usize,
188    /// Global hit rate
189    pub global_hit_rate: f64,
190    /// Memory usage by cache level
191    pub memory_usage: HashMap<String, usize>,
192    /// Performance metrics
193    pub performance_metrics: PerformanceMetrics,
194}
195
196/// Statistics for a single cache level
197#[derive(Debug, Default, Clone)]
198pub struct CacheLevelStats {
199    /// Number of cache hits
200    pub hits: usize,
201    /// Number of cache misses
202    pub misses: usize,
203    /// Number of evictions
204    pub evictions: usize,
205    /// Current number of items
206    pub current_items: usize,
207    /// Current size in bytes
208    pub current_size: usize,
209    /// Number of compressed items
210    pub compressed_items: usize,
211    /// Bytes saved through compression
212    pub bytes_saved: usize,
213    /// Hit rate (0.0 to 1.0)
214    pub hit_rate: f64,
215    /// Average access time
216    pub avg_access_time: Duration,
217}
218
219/// Performance metrics for cache operations
220#[derive(Debug, Default, Clone)]
221pub struct PerformanceMetrics {
222    /// Average time to store an item
223    pub avg_store_time: Duration,
224    /// Average time to retrieve an item
225    pub avg_retrieve_time: Duration,
226    /// Average compression time
227    pub avg_compression_time: Duration,
228    /// Average decompression time
229    pub avg_decompression_time: Duration,
230    /// Compression ratio achieved
231    pub compression_ratio: f64,
232}
233
234/// LRU (Least Recently Used) cache implementation with size-based eviction
235#[derive(Debug)]
236pub struct LruCache<K, V> {
237    /// Maximum capacity in number of items
238    max_items: usize,
239    /// Maximum size in bytes
240    max_size: usize,
241    /// Current size in bytes
242    current_size: usize,
243    /// Cache entries
244    entries: HashMap<K, V>,
245    /// Access order (most recent first)
246    access_order: VecDeque<K>,
247}
248
249impl<K: Clone + Eq + Hash, V> LruCache<K, V> {
250    /// Create a new LRU cache with specified capacity limits
251    pub fn new(max_items: usize, max_size: usize) -> Self {
252        Self {
253            max_items,
254            max_size,
255            current_size: 0,
256            entries: HashMap::new(),
257            access_order: VecDeque::new(),
258        }
259    }
260
261    /// Get an item from the cache
262    pub fn get(&mut self, key: &K) -> Option<&V> {
263        if self.entries.contains_key(key) {
264            self.move_to_front(key.clone());
265            self.entries.get(key)
266        } else {
267            None
268        }
269    }
270
271    /// Insert an item into the cache
272    pub fn insert(&mut self, key: K, value: V, size: usize) -> bool {
273        // Check if item is too large for cache
274        if size > self.max_size {
275            return false;
276        }
277
278        // If key already exists, update it
279        if self.entries.contains_key(&key) {
280            self.entries.insert(key.clone(), value);
281            self.move_to_front(key);
282            return true;
283        }
284
285        // Ensure we have space
286        while (self.entries.len() >= self.max_items || self.current_size + size > self.max_size)
287            && !self.entries.is_empty()
288        {
289            self.evict_lru();
290        }
291
292        // Insert new item
293        self.entries.insert(key.clone(), value);
294        self.access_order.push_front(key);
295        self.current_size += size;
296        true
297    }
298
299    /// Remove an item from the cache
300    pub fn remove(&mut self, key: &K) -> Option<V> {
301        if let Some(value) = self.entries.remove(key) {
302            self.access_order.retain(|k| k != key);
303            Some(value)
304        } else {
305            None
306        }
307    }
308
309    /// Get the current number of items
310    pub fn len(&self) -> usize {
311        self.entries.len()
312    }
313
314    /// Check if the cache is empty
315    pub fn is_empty(&self) -> bool {
316        self.entries.is_empty()
317    }
318
319    /// Get current size in bytes
320    pub fn current_size(&self) -> usize {
321        self.current_size
322    }
323
324    /// Clear all items from the cache
325    pub fn clear(&mut self) {
326        self.entries.clear();
327        self.access_order.clear();
328        self.current_size = 0;
329    }
330
331    /// Move an item to the front of the access order
332    fn move_to_front(&mut self, key: K) {
333        self.access_order.retain(|k| k != &key);
334        self.access_order.push_front(key);
335    }
336
337    /// Evict the least recently used item from the cache
338    fn evict_lru(&mut self) {
339        if let Some(key) = self.access_order.pop_back() {
340            self.entries.remove(&key);
341            // Note: We can't easily track individual item sizes here,
342            // so we approximate. In a real implementation, we'd store size with each item.
343            self.current_size = self
344                .current_size
345                .saturating_sub(self.current_size / self.entries.len().max(1));
346        }
347    }
348}
349
350impl Default for ConversionCacheSystem {
351    fn default() -> Self {
352        Self::new()
353    }
354}
355
356impl ConversionCacheSystem {
357    /// Create a new cache system with default configuration
358    pub fn new() -> Self {
359        Self::with_config(CacheConfig::default())
360    }
361
362    /// Create a new cache system with custom configuration
363    pub fn with_config(config: CacheConfig) -> Self {
364        let mut policies = HashMap::new();
365
366        // Set up default policies for different item types
367        policies.insert(
368            CacheItemType::ModelData,
369            CachePolicy {
370                priority: CachePriority::Critical,
371                ttl: Duration::from_secs(3600), // 1 hour
372                compress: true,
373                persist: true,
374                max_item_size: 50 * 1024 * 1024, // 50MB
375            },
376        );
377
378        policies.insert(
379            CacheItemType::AudioFeatures,
380            CachePolicy {
381                priority: CachePriority::High,
382                ttl: Duration::from_secs(600), // 10 minutes
383                compress: true,
384                persist: false,
385                max_item_size: 10 * 1024 * 1024, // 10MB
386            },
387        );
388
389        policies.insert(
390            CacheItemType::ConversionResults,
391            CachePolicy {
392                priority: CachePriority::Medium,
393                ttl: Duration::from_secs(300), // 5 minutes
394                compress: false,
395                persist: false,
396                max_item_size: 5 * 1024 * 1024, // 5MB
397            },
398        );
399
400        policies.insert(
401            CacheItemType::Intermediate,
402            CachePolicy {
403                priority: CachePriority::Low,
404                ttl: Duration::from_secs(60), // 1 minute
405                compress: false,
406                persist: false,
407                max_item_size: 1024 * 1024, // 1MB
408            },
409        );
410
411        Self {
412            l1_cache: Arc::new(RwLock::new(LruCache::new(
413                config.l1_max_items,
414                config.l1_max_size,
415            ))),
416            l2_cache: Arc::new(RwLock::new(LruCache::new(
417                config.l2_max_items,
418                config.l2_max_size,
419            ))),
420            persistent_cache: Arc::new(RwLock::new(HashMap::new())),
421            config: config.clone(),
422            stats: Arc::new(Mutex::new(CacheStatistics::default())),
423            policies,
424        }
425    }
426
427    /// Store an item in the cache
428    pub fn store(&self, key: String, data: CachedData, item_type: CacheItemType) -> Result<()> {
429        let start_time = Instant::now();
430
431        let default_policy = CachePolicy {
432            priority: CachePriority::Medium,
433            ttl: self.config.default_ttl,
434            compress: false,
435            persist: false,
436            max_item_size: 10 * 1024 * 1024,
437        };
438        let policy = self.policies.get(&item_type).unwrap_or(&default_policy);
439
440        let (processed_data, size, compressed) = self.process_data_for_storage(data, policy)?;
441
442        let item = CachedItem {
443            data: processed_data,
444            created_at: Instant::now(),
445            last_accessed: Instant::now(),
446            access_count: 0,
447            ttl: policy.ttl,
448            priority: policy.priority,
449            item_type,
450            size,
451            compressed,
452        };
453
454        // Determine which cache level to use
455        let stored =
456            if size <= self.config.l1_max_size / 10 && policy.priority >= CachePriority::High {
457                // Store in L1 cache for small, important items
458                let mut l1 = self.l1_cache.write().expect("L1 cache lock poisoned");
459                l1.insert(key.clone(), item.clone(), size)
460            } else if size <= self.config.l2_max_size / 10 {
461                // Store in L2 cache for larger items
462                let mut l2 = self.l2_cache.write().expect("L2 cache lock poisoned");
463                l2.insert(key.clone(), item.clone(), size)
464            } else {
465                false
466            };
467
468        if !stored && policy.persist && self.config.enable_persistence {
469            // Store in persistent cache
470            self.store_persistent(key, item)?;
471        }
472
473        // Update statistics
474        let store_time = start_time.elapsed();
475        self.update_store_stats(store_time);
476
477        Ok(())
478    }
479
480    /// Retrieve an item from the cache
481    pub fn retrieve(&self, key: &str) -> Option<CachedData> {
482        let start_time = Instant::now();
483
484        // Try L1 cache first
485        {
486            let mut l1 = self.l1_cache.write().expect("L1 cache lock poisoned");
487            if let Some(item) = l1.get(&key.to_string()) {
488                if !self.is_expired(item) {
489                    let result = self.process_data_for_retrieval(&item.data, item.compressed);
490                    self.update_retrieve_stats(start_time.elapsed(), true, "L1");
491                    return result;
492                }
493            }
494        }
495
496        // Try L2 cache
497        {
498            let mut l2 = self.l2_cache.write().expect("L2 cache lock poisoned");
499            if let Some(item) = l2.get(&key.to_string()) {
500                if !self.is_expired(item) {
501                    let result = self.process_data_for_retrieval(&item.data, item.compressed);
502                    self.update_retrieve_stats(start_time.elapsed(), true, "L2");
503                    return result;
504                }
505            }
506        }
507
508        // Try persistent cache
509        if self.config.enable_persistence {
510            let persistent = self
511                .persistent_cache
512                .read()
513                .expect("Persistent cache lock poisoned");
514            if let Some(persistent_item) = persistent.get(key) {
515                if !self.is_persistent_expired(persistent_item) {
516                    // Deserialize and decompress if needed
517                    if let Ok(cached_data) = self.deserialize_persistent_data(persistent_item) {
518                        self.update_retrieve_stats(start_time.elapsed(), true, "Persistent");
519                        return Some(cached_data);
520                    }
521                }
522            }
523        }
524
525        // Cache miss
526        self.update_retrieve_stats(start_time.elapsed(), false, "None");
527        None
528    }
529
530    /// Remove an item from all cache levels
531    pub fn remove(&self, key: &str) {
532        {
533            let mut l1 = self.l1_cache.write().expect("L1 cache lock poisoned");
534            l1.remove(&key.to_string());
535        }
536        {
537            let mut l2 = self.l2_cache.write().expect("L2 cache lock poisoned");
538            l2.remove(&key.to_string());
539        }
540        if self.config.enable_persistence {
541            let mut persistent = self
542                .persistent_cache
543                .write()
544                .expect("Persistent cache lock poisoned");
545            persistent.remove(key);
546        }
547    }
548
549    /// Clear all caches
550    pub fn clear_all(&self) {
551        {
552            let mut l1 = self.l1_cache.write().expect("L1 cache lock poisoned");
553            l1.clear();
554        }
555        {
556            let mut l2 = self.l2_cache.write().expect("L2 cache lock poisoned");
557            l2.clear();
558        }
559        if self.config.enable_persistence {
560            let mut persistent = self
561                .persistent_cache
562                .write()
563                .expect("Persistent cache lock poisoned");
564            persistent.clear();
565        }
566    }
567
568    /// Get cache statistics
569    pub fn get_statistics(&self) -> CacheStatistics {
570        let mut stats = self.stats.lock().expect("Cache stats lock poisoned");
571
572        // Update current cache level stats
573        {
574            let l1 = self.l1_cache.read().expect("L1 cache lock poisoned");
575            stats.l1_stats.current_items = l1.len();
576            stats.l1_stats.current_size = l1.current_size();
577        }
578        {
579            let l2 = self.l2_cache.read().expect("L2 cache lock poisoned");
580            stats.l2_stats.current_items = l2.len();
581            stats.l2_stats.current_size = l2.current_size();
582        }
583        if self.config.enable_persistence {
584            let persistent = self
585                .persistent_cache
586                .read()
587                .expect("Persistent cache lock poisoned");
588            stats.persistent_stats.current_items = persistent.len();
589            stats.persistent_stats.current_size =
590                persistent.values().map(|item| item.compressed_size).sum();
591        }
592
593        // Calculate global hit rate
594        let total_hits = stats.l1_stats.hits + stats.l2_stats.hits + stats.persistent_stats.hits;
595        let total_misses =
596            stats.l1_stats.misses + stats.l2_stats.misses + stats.persistent_stats.misses;
597        let total_requests = total_hits + total_misses;
598
599        if total_requests > 0 {
600            stats.global_hit_rate = total_hits as f64 / total_requests as f64;
601        }
602
603        stats.clone()
604    }
605
606    /// Optimize cache by removing expired items and reorganizing
607    pub fn optimize(&self) {
608        self.cleanup_expired_items();
609        self.rebalance_caches();
610        self.compress_underutilized_items();
611    }
612
613    /// Create a cache key from conversion parameters
614    pub fn create_cache_key(
615        &self,
616        conversion_type: &ConversionType,
617        audio_hash: u64,
618        target_hash: u64,
619        quality_level: u8,
620    ) -> String {
621        format!(
622            "conv_{}_{:016x}_{:016x}_q{}",
623            self.conversion_type_to_string(conversion_type),
624            audio_hash,
625            target_hash,
626            quality_level
627        )
628    }
629
630    /// Generate hash for audio data
631    pub fn hash_audio_data(&self, audio: &[f32]) -> u64 {
632        use std::collections::hash_map::DefaultHasher;
633
634        let mut hasher = DefaultHasher::new();
635
636        // Hash length and some sample points for efficiency
637        audio.len().hash(&mut hasher);
638
639        if audio.len() <= 1000 {
640            // Hash all samples for small audio
641            for &sample in audio {
642                ((sample * 10000.0) as i32).hash(&mut hasher);
643            }
644        } else {
645            // Hash samples at regular intervals for large audio
646            let step = audio.len() / 100;
647            for i in (0..audio.len()).step_by(step) {
648                ((audio[i] * 10000.0) as i32).hash(&mut hasher);
649            }
650        }
651
652        hasher.finish()
653    }
654
655    // Private helper methods
656
657    fn process_data_for_storage(
658        &self,
659        data: CachedData,
660        policy: &CachePolicy,
661    ) -> Result<(CachedData, usize, bool)> {
662        let size = self.estimate_data_size(&data);
663
664        if policy.compress
665            && self.config.enable_compression
666            && size > self.config.compression_threshold
667        {
668            // Compress the data
669            let compressed_data = self.compress_data(&data)?;
670            let compressed_size = compressed_data.len();
671            Ok((
672                CachedData::Compressed(compressed_data),
673                compressed_size,
674                true,
675            ))
676        } else {
677            Ok((data, size, false))
678        }
679    }
680
681    fn process_data_for_retrieval(
682        &self,
683        data: &CachedData,
684        compressed: bool,
685    ) -> Option<CachedData> {
686        if compressed {
687            if let CachedData::Compressed(compressed_data) = data {
688                self.decompress_data(compressed_data).ok()
689            } else {
690                None
691            }
692        } else {
693            Some(data.clone())
694        }
695    }
696
697    fn compress_data(&self, data: &CachedData) -> Result<Vec<u8>> {
698        let serialized = serde_json::to_vec(data).map_err(|e| {
699            Error::processing(format!("Failed to serialize data for compression: {e}"))
700        })?;
701
702        // Use flate2 for real compression with high compression level for cache efficiency
703        use flate2::{write::GzEncoder, Compression};
704        use std::io::Write;
705
706        let mut encoder = GzEncoder::new(Vec::new(), Compression::best());
707        encoder
708            .write_all(&serialized)
709            .map_err(|e| Error::processing(format!("Failed to compress data: {e}")))?;
710        encoder
711            .finish()
712            .map_err(|e| Error::processing(format!("Failed to finalize compression: {e}")))
713    }
714
715    fn decompress_data(&self, compressed: &[u8]) -> Result<CachedData> {
716        // Use flate2 for real decompression
717        use flate2::read::GzDecoder;
718        use std::io::Read;
719
720        let mut decoder = GzDecoder::new(compressed);
721        let mut decompressed = Vec::new();
722        decoder
723            .read_to_end(&mut decompressed)
724            .map_err(|e| Error::processing(format!("Failed to decompress data: {e}")))?;
725
726        serde_json::from_slice(&decompressed)
727            .map_err(|e| Error::processing(format!("Failed to deserialize decompressed data: {e}")))
728    }
729
730    fn compress_data_max(&self, data: &CachedData) -> Result<Vec<u8>> {
731        let serialized = serde_json::to_vec(data).map_err(|e| {
732            Error::processing(format!("Failed to serialize data for max compression: {e}"))
733        })?;
734
735        // Use maximum compression level for long-term storage
736        use flate2::{write::GzEncoder, Compression};
737        use std::io::Write;
738
739        let mut encoder = GzEncoder::new(Vec::new(), Compression::best());
740        encoder.write_all(&serialized).map_err(|e| {
741            Error::processing(format!("Failed to compress data with max compression: {e}"))
742        })?;
743        encoder
744            .finish()
745            .map_err(|e| Error::processing(format!("Failed to finalize max compression: {e}")))
746    }
747
748    fn estimate_data_size(&self, data: &CachedData) -> usize {
749        match data {
750            CachedData::Binary(bytes) => bytes.len(),
751            CachedData::Audio(samples) => samples.len() * std::mem::size_of::<f32>(),
752            CachedData::ModelParams(params) => params.len() * std::mem::size_of::<f32>(),
753            CachedData::Text(text) => text.len(),
754            CachedData::Structured(value) => serde_json::to_string(value).unwrap_or_default().len(),
755            CachedData::Compressed(bytes) => bytes.len(),
756        }
757    }
758
759    fn is_expired(&self, item: &CachedItem) -> bool {
760        item.created_at.elapsed() > item.ttl
761    }
762
763    fn is_persistent_expired(&self, item: &PersistentCacheItem) -> bool {
764        let age = SystemTime::now()
765            .duration_since(item.created_at)
766            .unwrap_or_default();
767        age > Duration::from_secs(86400) // 24 hours for persistent items
768    }
769
770    fn store_persistent(&self, key: String, item: CachedItem) -> Result<()> {
771        let serialized_data = serde_json::to_vec(&item.data)
772            .map_err(|e| Error::processing(format!("Failed to serialize persistent data: {e}")))?;
773
774        let compressed_data = self.compress_data(&item.data)?;
775
776        let persistent_item = PersistentCacheItem {
777            data: compressed_data.clone(),
778            created_at: SystemTime::now(),
779            last_accessed: SystemTime::now(),
780            access_count: 0,
781            item_type: item.item_type,
782            original_size: serialized_data.len(),
783            compressed_size: compressed_data.len(),
784            data_hash: self.hash_data(&serialized_data),
785        };
786
787        let mut persistent = self
788            .persistent_cache
789            .write()
790            .expect("Persistent cache lock poisoned");
791        persistent.insert(key, persistent_item);
792
793        Ok(())
794    }
795
796    fn deserialize_persistent_data(&self, item: &PersistentCacheItem) -> Result<CachedData> {
797        self.decompress_data(&item.data)
798    }
799
800    fn hash_data(&self, data: &[u8]) -> u64 {
801        use std::collections::hash_map::DefaultHasher;
802        let mut hasher = DefaultHasher::new();
803        data.hash(&mut hasher);
804        hasher.finish()
805    }
806
807    fn cleanup_expired_items(&self) {
808        // L1 cache cleanup
809        {
810            let mut l1 = self.l1_cache.write().expect("L1 cache lock poisoned");
811            let keys_to_remove: Vec<String> = l1
812                .entries
813                .iter()
814                .filter(|(_, item)| self.is_expired(item))
815                .map(|(k, _)| k.clone())
816                .collect();
817
818            for key in keys_to_remove {
819                l1.remove(&key);
820            }
821        }
822
823        // L2 cache cleanup
824        {
825            let mut l2 = self.l2_cache.write().expect("L2 cache lock poisoned");
826            let keys_to_remove: Vec<String> = l2
827                .entries
828                .iter()
829                .filter(|(_, item)| self.is_expired(item))
830                .map(|(k, _)| k.clone())
831                .collect();
832
833            for key in keys_to_remove {
834                l2.remove(&key);
835            }
836        }
837
838        // Persistent cache cleanup
839        if self.config.enable_persistence {
840            let mut persistent = self
841                .persistent_cache
842                .write()
843                .expect("Persistent cache lock poisoned");
844            let keys_to_remove: Vec<String> = persistent
845                .iter()
846                .filter(|(_, item)| self.is_persistent_expired(item))
847                .map(|(k, _)| k.clone())
848                .collect();
849
850            for key in keys_to_remove {
851                persistent.remove(&key);
852            }
853        }
854    }
855
856    fn rebalance_caches(&self) {
857        let now = Instant::now();
858        let l1_available_space = {
859            let l1 = self.l1_cache.read().expect("L1 cache lock poisoned");
860            self.config.l1_max_size.saturating_sub(l1.current_size())
861        };
862
863        // Only rebalance if we have significant space available
864        if l1_available_space > 1024 * 1024 {
865            // 1MB threshold
866            let mut candidates_for_promotion = Vec::new();
867
868            // Collect high-access items from L2 for potential promotion to L1
869            {
870                let l2 = self.l2_cache.read().expect("L2 cache lock poisoned");
871                for (key, item) in l2.entries.iter() {
872                    // Calculate access frequency (accesses per minute)
873                    let age_minutes = now.duration_since(item.created_at).as_secs() / 60;
874                    let access_frequency = if age_minutes > 0 {
875                        item.access_count as f64 / age_minutes as f64
876                    } else {
877                        item.access_count as f64
878                    };
879
880                    // Consider items with high access frequency and recent access
881                    let recently_accessed =
882                        now.duration_since(item.last_accessed) < Duration::from_secs(300); // 5 minutes
883                    let high_priority = item.priority >= CachePriority::High;
884                    let frequently_accessed = access_frequency > 2.0; // More than 2 accesses per minute
885
886                    if (frequently_accessed || high_priority)
887                        && recently_accessed
888                        && item.size <= l1_available_space
889                    {
890                        candidates_for_promotion.push((
891                            key.clone(),
892                            item.clone(),
893                            access_frequency,
894                        ));
895                    }
896                }
897            }
898
899            // Sort candidates by access frequency (highest first)
900            candidates_for_promotion
901                .sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
902
903            // Promote candidates to L1, respecting space constraints
904            let mut space_used = 0;
905            for (key, item, _) in candidates_for_promotion {
906                let item_size = item.size;
907                if space_used + item_size > l1_available_space {
908                    break;
909                }
910
911                // Move item from L2 to L1
912                {
913                    let mut l2 = self.l2_cache.write().expect("L2 cache lock poisoned");
914                    if l2.remove(&key).is_some() {
915                        space_used += item_size;
916
917                        let mut l1 = self.l1_cache.write().expect("L1 cache lock poisoned");
918                        l1.insert(key, item, item_size);
919                    }
920                }
921            }
922        }
923
924        // Also demote least accessed L1 items to L2 if L1 is near capacity
925        let l1_utilization = {
926            let l1 = self.l1_cache.read().expect("L1 cache lock poisoned");
927            l1.current_size() as f64 / self.config.l1_max_size as f64
928        };
929
930        if l1_utilization > 0.9 {
931            // If L1 is more than 90% full
932            let mut candidates_for_demotion = Vec::new();
933
934            {
935                let l1 = self.l1_cache.read().expect("L1 cache lock poisoned");
936                for (key, item) in l1.entries.iter() {
937                    let age_minutes = now.duration_since(item.created_at).as_secs() / 60;
938                    let access_frequency = if age_minutes > 0 {
939                        item.access_count as f64 / age_minutes as f64
940                    } else {
941                        item.access_count as f64
942                    };
943
944                    let not_recently_accessed =
945                        now.duration_since(item.last_accessed) > Duration::from_secs(600); // 10 minutes
946                    let low_priority = item.priority <= CachePriority::Medium;
947                    let infrequently_accessed = access_frequency < 0.5; // Less than 0.5 accesses per minute
948
949                    if (infrequently_accessed || low_priority) && not_recently_accessed {
950                        candidates_for_demotion.push((key.clone(), item.clone(), access_frequency));
951                    }
952                }
953            }
954
955            // Sort candidates by access frequency (lowest first)
956            candidates_for_demotion
957                .sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal));
958
959            // Demote candidates from L1 to L2
960            let l2_available_space = {
961                let l2 = self.l2_cache.read().expect("L2 cache lock poisoned");
962                self.config.l2_max_size.saturating_sub(l2.current_size())
963            };
964
965            let mut space_used = 0;
966            for (key, item, _) in candidates_for_demotion {
967                let item_size = item.size;
968                if space_used + item_size > l2_available_space {
969                    break;
970                }
971
972                // Move item from L1 to L2
973                {
974                    let mut l1 = self.l1_cache.write().expect("L1 cache lock poisoned");
975                    if l1.remove(&key).is_some() {
976                        space_used += item_size;
977
978                        let mut l2 = self.l2_cache.write().expect("L2 cache lock poisoned");
979                        l2.insert(key, item, item_size);
980                    }
981                }
982            }
983        }
984    }
985
986    fn compress_underutilized_items(&self) {
987        let now = Instant::now();
988        let compression_threshold = Duration::from_secs(1800); // 30 minutes without access
989        let min_size_for_compression = self.config.compression_threshold;
990
991        // Compress underutilized items in L2 cache
992        {
993            let mut l2 = self.l2_cache.write().expect("L2 cache lock poisoned");
994            let mut items_to_compress = Vec::new();
995
996            for (key, item) in l2.entries.iter() {
997                let time_since_access = now.duration_since(item.last_accessed);
998                let should_compress = !item.compressed
999                    && time_since_access > compression_threshold
1000                    && item.size >= min_size_for_compression
1001                    && item.access_count < 5; // Don't compress frequently used items
1002
1003                if should_compress {
1004                    items_to_compress.push(key.clone());
1005                }
1006            }
1007
1008            // Compress the identified items
1009            for key in items_to_compress {
1010                if let Some(mut item) = l2.entries.get_mut(&key) {
1011                    if let Ok(compressed_data) = self.compress_data(&item.data) {
1012                        let original_size = item.size;
1013                        item.data = CachedData::Compressed(compressed_data);
1014                        item.size = match &item.data {
1015                            CachedData::Compressed(data) => data.len(),
1016                            _ => item.size,
1017                        };
1018                        item.compressed = true;
1019
1020                        // Update statistics
1021                        let mut stats = self.stats.lock().expect("Cache stats lock poisoned");
1022                        stats.l2_stats.compressed_items += 1;
1023                        stats.l2_stats.bytes_saved += original_size.saturating_sub(item.size);
1024                    }
1025                }
1026            }
1027        }
1028
1029        // Also compress underutilized items in persistent cache
1030        if self.config.enable_persistence {
1031            let mut persistent = self
1032                .persistent_cache
1033                .write()
1034                .expect("Persistent cache lock poisoned");
1035            let mut items_to_recompress = Vec::new();
1036
1037            for (key, item) in persistent.iter() {
1038                let time_since_access = SystemTime::now()
1039                    .duration_since(item.last_accessed)
1040                    .unwrap_or(Duration::from_secs(0));
1041
1042                // Recompress with better compression for very old items
1043                let should_recompress = time_since_access > Duration::from_secs(7200) // 2 hours
1044                    && item.access_count < 3
1045                    && item.compressed_size > 1024; // Only recompress larger items
1046
1047                if should_recompress {
1048                    items_to_recompress.push(key.clone());
1049                }
1050            }
1051
1052            for key in items_to_recompress {
1053                if let Some(item) = persistent.get_mut(&key) {
1054                    // Decompress, then recompress with maximum compression
1055                    if let Ok(decompressed_data) = self.decompress_data(&item.data) {
1056                        // Use maximum compression for long-term storage
1057                        if let Ok(recompressed_data) = self.compress_data_max(&decompressed_data) {
1058                            let old_size = item.compressed_size;
1059                            item.data = recompressed_data;
1060                            item.compressed_size = item.data.len();
1061
1062                            // Update statistics
1063                            let mut stats = self.stats.lock().expect("Cache stats lock poisoned");
1064                            stats.persistent_stats.bytes_saved +=
1065                                old_size.saturating_sub(item.compressed_size);
1066                        }
1067                    }
1068                }
1069            }
1070        }
1071    }
1072
1073    fn conversion_type_to_string(&self, conversion_type: &ConversionType) -> &'static str {
1074        match conversion_type {
1075            ConversionType::PitchShift => "pitch",
1076            ConversionType::SpeedTransformation => "speed",
1077            ConversionType::SpeakerConversion => "speaker",
1078            ConversionType::AgeTransformation => "age",
1079            ConversionType::GenderTransformation => "gender",
1080            ConversionType::VoiceMorphing => "morphing",
1081            ConversionType::EmotionalTransformation => "emotion",
1082            ConversionType::ZeroShotConversion => "zero_shot",
1083            ConversionType::Custom(_) => "custom",
1084            ConversionType::PassThrough => "passthrough",
1085        }
1086    }
1087
1088    fn update_store_stats(&self, duration: Duration) {
1089        let mut stats = self.stats.lock().expect("Cache stats lock poisoned");
1090        // Update average store time using exponential moving average
1091        let alpha = 0.1;
1092        let current_avg = stats.performance_metrics.avg_store_time.as_nanos() as f64;
1093        let new_avg = current_avg * (1.0 - alpha) + duration.as_nanos() as f64 * alpha;
1094        stats.performance_metrics.avg_store_time = Duration::from_nanos(new_avg as u64);
1095    }
1096
1097    fn update_retrieve_stats(&self, duration: Duration, hit: bool, cache_level: &str) {
1098        let mut stats = self.stats.lock().expect("Cache stats lock poisoned");
1099
1100        // Update retrieve time
1101        let alpha = 0.1;
1102        let current_avg = stats.performance_metrics.avg_retrieve_time.as_nanos() as f64;
1103        let new_avg = current_avg * (1.0 - alpha) + duration.as_nanos() as f64 * alpha;
1104        stats.performance_metrics.avg_retrieve_time = Duration::from_nanos(new_avg as u64);
1105
1106        // Update hit/miss stats for the appropriate cache level
1107        match cache_level {
1108            "L1" => {
1109                if hit {
1110                    stats.l1_stats.hits += 1;
1111                } else {
1112                    stats.l1_stats.misses += 1;
1113                }
1114                let total = stats.l1_stats.hits + stats.l1_stats.misses;
1115                stats.l1_stats.hit_rate = stats.l1_stats.hits as f64 / total as f64;
1116            }
1117            "L2" => {
1118                if hit {
1119                    stats.l2_stats.hits += 1;
1120                } else {
1121                    stats.l2_stats.misses += 1;
1122                }
1123                let total = stats.l2_stats.hits + stats.l2_stats.misses;
1124                stats.l2_stats.hit_rate = stats.l2_stats.hits as f64 / total as f64;
1125            }
1126            "Persistent" => {
1127                if hit {
1128                    stats.persistent_stats.hits += 1;
1129                } else {
1130                    stats.persistent_stats.misses += 1;
1131                }
1132                let total = stats.persistent_stats.hits + stats.persistent_stats.misses;
1133                stats.persistent_stats.hit_rate = stats.persistent_stats.hits as f64 / total as f64;
1134            }
1135            _ => {}
1136        }
1137
1138        stats.total_requests += 1;
1139    }
1140}
1141
1142// Implement serde for CachedData
1143impl Serialize for CachedData {
1144    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
1145    where
1146        S: serde::Serializer,
1147    {
1148        use serde::ser::SerializeStruct;
1149
1150        match self {
1151            CachedData::Binary(data) => {
1152                let mut state = serializer.serialize_struct("CachedData", 2)?;
1153                state.serialize_field("type", "Binary")?;
1154                state.serialize_field("data", data)?;
1155                state.end()
1156            }
1157            CachedData::Audio(data) => {
1158                let mut state = serializer.serialize_struct("CachedData", 2)?;
1159                state.serialize_field("type", "Audio")?;
1160                state.serialize_field("data", data)?;
1161                state.end()
1162            }
1163            CachedData::ModelParams(data) => {
1164                let mut state = serializer.serialize_struct("CachedData", 2)?;
1165                state.serialize_field("type", "ModelParams")?;
1166                state.serialize_field("data", data)?;
1167                state.end()
1168            }
1169            CachedData::Text(data) => {
1170                let mut state = serializer.serialize_struct("CachedData", 2)?;
1171                state.serialize_field("type", "Text")?;
1172                state.serialize_field("data", data)?;
1173                state.end()
1174            }
1175            CachedData::Structured(data) => {
1176                let mut state = serializer.serialize_struct("CachedData", 2)?;
1177                state.serialize_field("type", "Structured")?;
1178                state.serialize_field("data", data)?;
1179                state.end()
1180            }
1181            CachedData::Compressed(data) => {
1182                let mut state = serializer.serialize_struct("CachedData", 2)?;
1183                state.serialize_field("type", "Compressed")?;
1184                state.serialize_field("data", data)?;
1185                state.end()
1186            }
1187        }
1188    }
1189}
1190
1191impl<'de> Deserialize<'de> for CachedData {
1192    fn deserialize<D>(deserializer: D) -> std::result::Result<CachedData, D::Error>
1193    where
1194        D: serde::Deserializer<'de>,
1195    {
1196        use serde::de::{self, MapAccess, Visitor};
1197        use std::fmt;
1198
1199        struct CachedDataVisitor;
1200
1201        impl<'de> Visitor<'de> for CachedDataVisitor {
1202            type Value = CachedData;
1203
1204            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
1205                formatter.write_str("a CachedData struct")
1206            }
1207
1208            fn visit_map<V>(self, mut map: V) -> std::result::Result<CachedData, V::Error>
1209            where
1210                V: MapAccess<'de>,
1211            {
1212                let mut data_type: Option<String> = None;
1213                let mut data: Option<serde_json::Value> = None;
1214
1215                while let Some(key) = map.next_key()? {
1216                    match key {
1217                        "type" => {
1218                            if data_type.is_some() {
1219                                return Err(de::Error::duplicate_field("type"));
1220                            }
1221                            data_type = Some(map.next_value()?);
1222                        }
1223                        "data" => {
1224                            if data.is_some() {
1225                                return Err(de::Error::duplicate_field("data"));
1226                            }
1227                            data = Some(map.next_value()?);
1228                        }
1229                        _ => {
1230                            let _: serde_json::Value = map.next_value()?;
1231                        }
1232                    }
1233                }
1234
1235                let data_type = data_type.ok_or_else(|| de::Error::missing_field("type"))?;
1236                let data = data.ok_or_else(|| de::Error::missing_field("data"))?;
1237
1238                match data_type.as_str() {
1239                    "Binary" => Ok(CachedData::Binary(
1240                        serde_json::from_value(data).map_err(de::Error::custom)?,
1241                    )),
1242                    "Audio" => Ok(CachedData::Audio(
1243                        serde_json::from_value(data).map_err(de::Error::custom)?,
1244                    )),
1245                    "ModelParams" => Ok(CachedData::ModelParams(
1246                        serde_json::from_value(data).map_err(de::Error::custom)?,
1247                    )),
1248                    "Text" => Ok(CachedData::Text(
1249                        serde_json::from_value(data).map_err(de::Error::custom)?,
1250                    )),
1251                    "Structured" => Ok(CachedData::Structured(data)),
1252                    "Compressed" => Ok(CachedData::Compressed(
1253                        serde_json::from_value(data).map_err(de::Error::custom)?,
1254                    )),
1255                    _ => Err(de::Error::unknown_variant(
1256                        &data_type,
1257                        &[
1258                            "Binary",
1259                            "Audio",
1260                            "ModelParams",
1261                            "Text",
1262                            "Structured",
1263                            "Compressed",
1264                        ],
1265                    )),
1266                }
1267            }
1268        }
1269
1270        deserializer.deserialize_struct("CachedData", &["type", "data"], CachedDataVisitor)
1271    }
1272}
1273
1274#[cfg(test)]
1275mod tests {
1276    use super::*;
1277
1278    #[test]
1279    fn test_lru_cache_basic_operations() {
1280        let mut cache = LruCache::new(3, 1000);
1281
1282        assert!(cache.insert("key1".to_string(), "value1".to_string(), 10));
1283        assert!(cache.insert("key2".to_string(), "value2".to_string(), 10));
1284        assert!(cache.insert("key3".to_string(), "value3".to_string(), 10));
1285
1286        assert_eq!(cache.len(), 3);
1287        assert_eq!(cache.get(&"key1".to_string()), Some(&"value1".to_string()));
1288
1289        // This should evict key2 (least recently used)
1290        assert!(cache.insert("key4".to_string(), "value4".to_string(), 10));
1291        assert_eq!(cache.len(), 3);
1292        assert_eq!(cache.get(&"key2".to_string()), None);
1293    }
1294
1295    #[test]
1296    fn test_conversion_cache_system() {
1297        let cache = ConversionCacheSystem::new();
1298
1299        let audio_data = CachedData::Audio(vec![0.1, 0.2, 0.3, 0.4]);
1300
1301        // Store data
1302        cache
1303            .store(
1304                "test_key".to_string(),
1305                audio_data.clone(),
1306                CacheItemType::AudioFeatures,
1307            )
1308            .unwrap();
1309
1310        // Retrieve data
1311        let retrieved = cache.retrieve("test_key");
1312        assert!(retrieved.is_some());
1313
1314        // Test cache miss
1315        let missing = cache.retrieve("nonexistent_key");
1316        assert!(missing.is_none());
1317    }
1318
1319    #[test]
1320    fn test_cache_key_generation() {
1321        let cache = ConversionCacheSystem::new();
1322
1323        let key1 = cache.create_cache_key(
1324            &ConversionType::PitchShift,
1325            0x1234567890abcdef,
1326            0xfedcba0987654321,
1327            5,
1328        );
1329
1330        let key2 = cache.create_cache_key(
1331            &ConversionType::PitchShift,
1332            0x1234567890abcdef,
1333            0xfedcba0987654321,
1334            5,
1335        );
1336
1337        assert_eq!(key1, key2); // Same parameters should generate same key
1338
1339        let key3 = cache.create_cache_key(
1340            &ConversionType::SpeedTransformation,
1341            0x1234567890abcdef,
1342            0xfedcba0987654321,
1343            5,
1344        );
1345
1346        assert_ne!(key1, key3); // Different conversion type should generate different key
1347    }
1348
1349    #[test]
1350    fn test_audio_hashing() {
1351        let cache = ConversionCacheSystem::new();
1352
1353        let audio1 = vec![0.1, 0.2, 0.3, 0.4, 0.5];
1354        let audio2 = vec![0.1, 0.2, 0.3, 0.4, 0.5];
1355        let audio3 = vec![0.1, 0.2, 0.3, 0.4, 0.6]; // Different last sample
1356
1357        let hash1 = cache.hash_audio_data(&audio1);
1358        let hash2 = cache.hash_audio_data(&audio2);
1359        let hash3 = cache.hash_audio_data(&audio3);
1360
1361        assert_eq!(hash1, hash2);
1362        assert_ne!(hash1, hash3);
1363    }
1364
1365    #[test]
1366    fn test_cache_statistics() {
1367        let cache = ConversionCacheSystem::new();
1368
1369        // Perform some operations
1370        let audio_data = CachedData::Audio(vec![0.1, 0.2, 0.3]);
1371        cache
1372            .store("key1".to_string(), audio_data, CacheItemType::AudioFeatures)
1373            .unwrap();
1374
1375        let _retrieved = cache.retrieve("key1"); // Hit
1376        let _missing = cache.retrieve("key2"); // Miss
1377
1378        let stats = cache.get_statistics();
1379        assert!(stats.total_requests > 0);
1380        assert!(stats.l1_stats.hits > 0 || stats.l2_stats.hits > 0);
1381    }
1382
1383    #[test]
1384    fn test_cache_optimization() {
1385        let cache = ConversionCacheSystem::new();
1386
1387        // Store some data
1388        let audio_data = CachedData::Audio(vec![0.1; 1000]);
1389        cache
1390            .store("key1".to_string(), audio_data, CacheItemType::AudioFeatures)
1391            .unwrap();
1392
1393        // Run optimization
1394        cache.optimize();
1395
1396        // Cache should still function normally after optimization
1397        let retrieved = cache.retrieve("key1");
1398        assert!(retrieved.is_some());
1399    }
1400}
voirs_conversion/cache.rs

voirs_conversion/
cache.rs