chie_core/
content_aware_cache.rs

1//! Content-aware cache sizing with intelligent memory management.
2//!
3//! This module provides adaptive cache sizing based on content characteristics,
4//! access patterns, and system resource availability.
5//!
6//! # Example
7//!
8//! ```
9//! use chie_core::content_aware_cache::{ContentAwareCache, CacheContentMetrics, ContentType};
10//!
11//! # fn example() {
12//! let mut cache = ContentAwareCache::new(100 * 1024 * 1024); // 100MB
13//!
14//! // Add content with metrics
15//! let metrics = CacheContentMetrics {
16//!     content_type: ContentType::VideoChunk,
17//!     size_bytes: 256 * 1024,
18//!     access_frequency: 10,
19//!     priority: 5,
20//! };
21//!
22//! cache.insert("video:chunk1".to_string(), vec![0u8; 256 * 1024], metrics);
23//!
24//! // Cache automatically adjusts size based on content characteristics
25//! println!("Current cache size: {} bytes", cache.current_size());
26//! # }
27//! ```
28
29use std::collections::{HashMap, VecDeque};
30use std::time::{Instant, SystemTime, UNIX_EPOCH};
31
32/// Maximum number of historical access records to keep.
33const MAX_ACCESS_HISTORY: usize = 1000;
34
35/// Content type classification for cache sizing.
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
37pub enum ContentType {
38    /// Small metadata entries.
39    Metadata,
40    /// Image chunks.
41    ImageChunk,
42    /// Video chunks.
43    VideoChunk,
44    /// Audio chunks.
45    AudioChunk,
46    /// Document chunks.
47    DocumentChunk,
48    /// Generic data.
49    Generic,
50}
51
52impl ContentType {
53    /// Get the base priority weight for this content type.
54    #[must_use]
55    #[inline]
56    pub const fn priority_weight(&self) -> f64 {
57        match self {
58            Self::Metadata => 2.0, // Metadata is small and important
59            Self::ImageChunk => 1.2,
60            Self::VideoChunk => 1.0,
61            Self::AudioChunk => 1.1,
62            Self::DocumentChunk => 1.3,
63            Self::Generic => 1.0,
64        }
65    }
66
67    /// Get the ideal cache retention multiplier for this type.
68    #[must_use]
69    #[inline]
70    pub const fn retention_multiplier(&self) -> f64 {
71        match self {
72            Self::Metadata => 3.0, // Keep metadata longer
73            Self::ImageChunk => 1.5,
74            Self::VideoChunk => 1.0,
75            Self::AudioChunk => 1.2,
76            Self::DocumentChunk => 1.8,
77            Self::Generic => 1.0,
78        }
79    }
80}
81
82/// Metrics for content-aware caching decisions.
83#[derive(Debug, Clone)]
84pub struct CacheContentMetrics {
85    /// Type of content.
86    pub content_type: ContentType,
87    /// Size in bytes.
88    pub size_bytes: usize,
89    /// Access frequency (accesses per time unit).
90    pub access_frequency: u32,
91    /// Manual priority (0-10, higher is more important).
92    pub priority: u8,
93}
94
95/// Cached entry with metadata.
96#[derive(Debug)]
97struct CacheEntry {
98    data: Vec<u8>,
99    metrics: CacheContentMetrics,
100    access_count: u64,
101    last_access: Instant,
102    inserted_at: Instant,
103    hit_rate: f64,
104}
105
106impl CacheEntry {
107    fn new(data: Vec<u8>, metrics: CacheContentMetrics) -> Self {
108        Self {
109            data,
110            metrics,
111            access_count: 0,
112            last_access: Instant::now(),
113            inserted_at: Instant::now(),
114            hit_rate: 0.0,
115        }
116    }
117
118    fn access(&mut self) {
119        self.access_count += 1;
120        self.last_access = Instant::now();
121
122        // Update hit rate (exponential moving average)
123        let time_since_insert = self.inserted_at.elapsed().as_secs_f64().max(1.0);
124        self.hit_rate = self.access_count as f64 / time_since_insert;
125    }
126
127    /// Calculate the value score for this entry (higher = more valuable).
128    fn value_score(&self) -> f64 {
129        let type_weight = self.metrics.content_type.priority_weight();
130        let priority_weight = (self.metrics.priority as f64 / 10.0) * 2.0;
131        let recency_weight = {
132            let seconds_since_access = self.last_access.elapsed().as_secs_f64();
133            1.0 / (1.0 + seconds_since_access / 3600.0) // Decay over hours
134        };
135        let hit_rate_weight = self.hit_rate.min(10.0) / 10.0;
136        let size_penalty = 1.0 / (1.0 + (self.metrics.size_bytes as f64 / 1024.0 / 1024.0));
137
138        (type_weight + priority_weight + recency_weight + hit_rate_weight) * size_penalty
139    }
140}
141
142/// Access history record for adaptive sizing.
143#[derive(Debug, Clone)]
144#[allow(dead_code)]
145struct AccessRecord {
146    timestamp: u64,
147    hit: bool,
148    content_type: ContentType,
149}
150
151/// Content-aware cache with intelligent sizing.
152pub struct ContentAwareCache {
153    entries: HashMap<String, CacheEntry>,
154    max_size_bytes: usize,
155    current_size_bytes: usize,
156    access_history: VecDeque<AccessRecord>,
157    total_accesses: u64,
158    total_hits: u64,
159    size_per_type: HashMap<ContentType, usize>,
160}
161
162impl ContentAwareCache {
163    /// Create a new content-aware cache with a maximum size.
164    #[must_use]
165    pub fn new(max_size_bytes: usize) -> Self {
166        Self {
167            entries: HashMap::new(),
168            max_size_bytes,
169            current_size_bytes: 0,
170            access_history: VecDeque::with_capacity(MAX_ACCESS_HISTORY),
171            total_accesses: 0,
172            total_hits: 0,
173            size_per_type: HashMap::new(),
174        }
175    }
176
177    /// Insert content into the cache.
178    pub fn insert(&mut self, key: String, data: Vec<u8>, metrics: CacheContentMetrics) {
179        let size = data.len();
180
181        // Remove old entry if exists
182        if let Some(old_entry) = self.entries.remove(&key) {
183            self.current_size_bytes -= old_entry.data.len();
184            *self
185                .size_per_type
186                .entry(old_entry.metrics.content_type)
187                .or_insert(0) -= old_entry.data.len();
188        }
189
190        // Evict entries if necessary
191        while self.current_size_bytes + size > self.max_size_bytes && !self.entries.is_empty() {
192            self.evict_lowest_value();
193        }
194
195        // Insert new entry
196        if self.current_size_bytes + size <= self.max_size_bytes {
197            self.current_size_bytes += size;
198            *self.size_per_type.entry(metrics.content_type).or_insert(0) += size;
199            self.entries.insert(key, CacheEntry::new(data, metrics));
200        }
201    }
202
203    /// Get content from the cache (returns a clone).
204    #[must_use]
205    pub fn get(&mut self, key: &str) -> Option<Vec<u8>> {
206        self.total_accesses += 1;
207
208        let content_type = if let Some(entry) = self.entries.get(key) {
209            entry.metrics.content_type
210        } else {
211            ContentType::Generic
212        };
213
214        if let Some(entry) = self.entries.get_mut(key) {
215            entry.access();
216            self.total_hits += 1;
217            let data = entry.data.clone();
218            self.record_access(true, content_type);
219            Some(data)
220        } else {
221            self.record_access(false, content_type);
222            None
223        }
224    }
225
226    /// Remove content from the cache.
227    #[must_use]
228    pub fn remove(&mut self, key: &str) -> Option<Vec<u8>> {
229        if let Some(entry) = self.entries.remove(key) {
230            self.current_size_bytes -= entry.data.len();
231            *self
232                .size_per_type
233                .entry(entry.metrics.content_type)
234                .or_insert(0) -= entry.data.len();
235            Some(entry.data)
236        } else {
237            None
238        }
239    }
240
241    /// Clear all entries from the cache.
242    pub fn clear(&mut self) {
243        self.entries.clear();
244        self.current_size_bytes = 0;
245        self.size_per_type.clear();
246    }
247
248    /// Evict the entry with the lowest value score.
249    fn evict_lowest_value(&mut self) {
250        let mut lowest_key: Option<String> = None;
251        let mut lowest_score = f64::MAX;
252
253        for (key, entry) in &self.entries {
254            let score = entry.value_score();
255            if score < lowest_score {
256                lowest_score = score;
257                lowest_key = Some(key.clone());
258            }
259        }
260
261        if let Some(key) = lowest_key {
262            let _ = self.remove(&key);
263        }
264    }
265
266    /// Record an access for adaptive sizing.
267    fn record_access(&mut self, hit: bool, content_type: ContentType) {
268        if self.access_history.len() >= MAX_ACCESS_HISTORY {
269            self.access_history.pop_front();
270        }
271
272        self.access_history.push_back(AccessRecord {
273            timestamp: current_timestamp(),
274            hit,
275            content_type,
276        });
277    }
278
279    /// Get the current cache size in bytes.
280    #[must_use]
281    #[inline]
282    pub const fn current_size(&self) -> usize {
283        self.current_size_bytes
284    }
285
286    /// Get the maximum cache size in bytes.
287    #[must_use]
288    #[inline]
289    pub const fn max_size(&self) -> usize {
290        self.max_size_bytes
291    }
292
293    /// Get the cache hit rate.
294    #[must_use]
295    #[inline]
296    pub fn hit_rate(&self) -> f64 {
297        if self.total_accesses == 0 {
298            0.0
299        } else {
300            self.total_hits as f64 / self.total_accesses as f64
301        }
302    }
303
304    /// Get the number of entries in the cache.
305    #[must_use]
306    #[inline]
307    pub fn entry_count(&self) -> usize {
308        self.entries.len()
309    }
310
311    /// Get cache usage percentage.
312    #[must_use]
313    #[inline]
314    pub fn usage_percentage(&self) -> f64 {
315        (self.current_size_bytes as f64 / self.max_size_bytes as f64) * 100.0
316    }
317
318    /// Get size allocated to each content type.
319    #[must_use]
320    #[inline]
321    pub fn size_by_type(&self, content_type: ContentType) -> usize {
322        *self.size_per_type.get(&content_type).unwrap_or(&0)
323    }
324
325    /// Adjust cache size dynamically based on performance.
326    pub fn adjust_size(&mut self, new_max_size: usize) {
327        self.max_size_bytes = new_max_size;
328
329        // Evict entries if new size is smaller
330        while self.current_size_bytes > self.max_size_bytes && !self.entries.is_empty() {
331            self.evict_lowest_value();
332        }
333    }
334
335    /// Get recommended cache size based on access patterns.
336    #[must_use]
337    pub fn recommended_size(&self) -> usize {
338        if self.access_history.is_empty() {
339            return self.max_size_bytes;
340        }
341
342        let hit_rate = self.hit_rate();
343
344        // If hit rate is high, current size is good
345        // If hit rate is low, recommend increase
346        let multiplier = if hit_rate > 0.8 {
347            1.0 // Good hit rate
348        } else if hit_rate > 0.6 {
349            1.2 // Could be better
350        } else if hit_rate > 0.4 {
351            1.5 // Needs more space
352        } else {
353            2.0 // Very low hit rate
354        };
355
356        let recommended = (self.current_size_bytes as f64 * multiplier) as usize;
357        recommended.min(self.max_size_bytes * 2) // Cap at 2x current max
358    }
359
360    /// Get cache statistics.
361    #[must_use]
362    pub fn stats(&self) -> ContentCacheStats {
363        ContentCacheStats {
364            total_accesses: self.total_accesses,
365            total_hits: self.total_hits,
366            hit_rate: self.hit_rate(),
367            current_size_bytes: self.current_size_bytes,
368            max_size_bytes: self.max_size_bytes,
369            entry_count: self.entries.len(),
370            usage_percentage: self.usage_percentage(),
371        }
372    }
373}
374
375/// Cache statistics.
376#[derive(Debug, Clone)]
377pub struct ContentCacheStats {
378    /// Total cache accesses.
379    pub total_accesses: u64,
380    /// Total cache hits.
381    pub total_hits: u64,
382    /// Current hit rate.
383    pub hit_rate: f64,
384    /// Current cache size in bytes.
385    pub current_size_bytes: usize,
386    /// Maximum cache size in bytes.
387    pub max_size_bytes: usize,
388    /// Number of entries.
389    pub entry_count: usize,
390    /// Cache usage percentage.
391    pub usage_percentage: f64,
392}
393
394/// Get current Unix timestamp.
395#[inline]
396fn current_timestamp() -> u64 {
397    SystemTime::now()
398        .duration_since(UNIX_EPOCH)
399        .unwrap_or_default()
400        .as_secs()
401}
402
403#[cfg(test)]
404mod tests {
405    use super::*;
406
407    #[test]
408    fn test_basic_insert_and_get() {
409        let mut cache = ContentAwareCache::new(1024);
410
411        let metrics = CacheContentMetrics {
412            content_type: ContentType::Metadata,
413            size_bytes: 100,
414            access_frequency: 5,
415            priority: 8,
416        };
417
418        cache.insert("key1".to_string(), vec![1u8; 100], metrics);
419        assert_eq!(cache.current_size(), 100);
420
421        let data = cache.get("key1");
422        assert!(data.is_some());
423        assert_eq!(data.unwrap().len(), 100);
424    }
425
426    #[test]
427    fn test_cache_eviction() {
428        let mut cache = ContentAwareCache::new(200);
429
430        let metrics1 = CacheContentMetrics {
431            content_type: ContentType::Metadata,
432            size_bytes: 100,
433            access_frequency: 10,
434            priority: 9,
435        };
436
437        let metrics2 = CacheContentMetrics {
438            content_type: ContentType::Generic,
439            size_bytes: 100,
440            access_frequency: 1,
441            priority: 1,
442        };
443
444        cache.insert("high_value".to_string(), vec![1u8; 100], metrics1);
445        cache.insert("low_value".to_string(), vec![2u8; 100], metrics2);
446
447        // Cache is full, insert another entry
448        let metrics3 = CacheContentMetrics {
449            content_type: ContentType::VideoChunk,
450            size_bytes: 100,
451            access_frequency: 5,
452            priority: 5,
453        };
454
455        cache.insert("medium_value".to_string(), vec![3u8; 100], metrics3);
456
457        // Low value entry should be evicted
458        assert!(cache.get("low_value").is_none());
459        assert!(cache.get("high_value").is_some());
460    }
461
462    #[test]
463    fn test_hit_rate_calculation() {
464        let mut cache = ContentAwareCache::new(1024);
465
466        let metrics = CacheContentMetrics {
467            content_type: ContentType::Metadata,
468            size_bytes: 100,
469            access_frequency: 5,
470            priority: 5,
471        };
472
473        cache.insert("key1".to_string(), vec![1u8; 100], metrics);
474
475        // 3 hits, 2 misses
476        let _ = cache.get("key1");
477        let _ = cache.get("key1");
478        let _ = cache.get("key1");
479        let _ = cache.get("key2");
480        let _ = cache.get("key3");
481
482        assert!((cache.hit_rate() - 0.6).abs() < 0.01);
483    }
484
485    #[test]
486    fn test_content_type_priority() {
487        let weight_metadata = ContentType::Metadata.priority_weight();
488        let weight_generic = ContentType::Generic.priority_weight();
489
490        assert!(weight_metadata > weight_generic);
491    }
492
493    #[test]
494    fn test_cache_clear() {
495        let mut cache = ContentAwareCache::new(1024);
496
497        let metrics = CacheContentMetrics {
498            content_type: ContentType::Metadata,
499            size_bytes: 100,
500            access_frequency: 5,
501            priority: 5,
502        };
503
504        cache.insert("key1".to_string(), vec![1u8; 100], metrics.clone());
505        cache.insert("key2".to_string(), vec![2u8; 100], metrics);
506
507        assert_eq!(cache.entry_count(), 2);
508
509        cache.clear();
510
511        assert_eq!(cache.entry_count(), 0);
512        assert_eq!(cache.current_size(), 0);
513    }
514
515    #[test]
516    fn test_dynamic_resize() {
517        let mut cache = ContentAwareCache::new(300);
518
519        let metrics = CacheContentMetrics {
520            content_type: ContentType::Metadata,
521            size_bytes: 100,
522            access_frequency: 5,
523            priority: 5,
524        };
525
526        cache.insert("key1".to_string(), vec![1u8; 100], metrics.clone());
527        cache.insert("key2".to_string(), vec![2u8; 100], metrics.clone());
528        cache.insert("key3".to_string(), vec![3u8; 100], metrics);
529
530        assert_eq!(cache.entry_count(), 3);
531
532        // Shrink cache
533        cache.adjust_size(150);
534
535        // Should evict some entries
536        assert!(cache.entry_count() < 3);
537        assert!(cache.current_size() <= 150);
538    }
539
540    #[test]
541    fn test_size_by_type() {
542        let mut cache = ContentAwareCache::new(1024);
543
544        let metrics_meta = CacheContentMetrics {
545            content_type: ContentType::Metadata,
546            size_bytes: 100,
547            access_frequency: 5,
548            priority: 5,
549        };
550
551        let metrics_video = CacheContentMetrics {
552            content_type: ContentType::VideoChunk,
553            size_bytes: 200,
554            access_frequency: 5,
555            priority: 5,
556        };
557
558        cache.insert("meta1".to_string(), vec![1u8; 100], metrics_meta);
559        cache.insert("video1".to_string(), vec![2u8; 200], metrics_video);
560
561        assert_eq!(cache.size_by_type(ContentType::Metadata), 100);
562        assert_eq!(cache.size_by_type(ContentType::VideoChunk), 200);
563    }
564
565    #[test]
566    fn test_usage_percentage() {
567        let mut cache = ContentAwareCache::new(1000);
568
569        let metrics = CacheContentMetrics {
570            content_type: ContentType::Metadata,
571            size_bytes: 250,
572            access_frequency: 5,
573            priority: 5,
574        };
575
576        cache.insert("key1".to_string(), vec![1u8; 250], metrics);
577
578        assert!((cache.usage_percentage() - 25.0).abs() < 0.1);
579    }
580
581    #[test]
582    fn test_remove() {
583        let mut cache = ContentAwareCache::new(1024);
584
585        let metrics = CacheContentMetrics {
586            content_type: ContentType::Metadata,
587            size_bytes: 100,
588            access_frequency: 5,
589            priority: 5,
590        };
591
592        cache.insert("key1".to_string(), vec![1u8; 100], metrics);
593        assert_eq!(cache.current_size(), 100);
594
595        let removed = cache.remove("key1");
596        assert!(removed.is_some());
597        assert_eq!(cache.current_size(), 0);
598    }
599
600    #[test]
601    fn test_stats() {
602        let mut cache = ContentAwareCache::new(1024);
603
604        let metrics = CacheContentMetrics {
605            content_type: ContentType::Metadata,
606            size_bytes: 100,
607            access_frequency: 5,
608            priority: 5,
609        };
610
611        cache.insert("key1".to_string(), vec![1u8; 100], metrics);
612        let _ = cache.get("key1");
613        let _ = cache.get("key2");
614
615        let stats = cache.stats();
616        assert_eq!(stats.total_accesses, 2);
617        assert_eq!(stats.total_hits, 1);
618        assert_eq!(stats.entry_count, 1);
619    }
620}