scirs2_sparse/adaptive_memory_compression/
cache.rs

1//! Block cache management for adaptive memory compression
2//!
3//! This module handles caching of frequently accessed compressed blocks
4//! with LRU eviction and access pattern tracking.
5
6use super::access_tracking::AccessType;
7use std::collections::{HashMap, VecDeque};
8
9/// Block identifier
10#[derive(Debug, Clone, Hash, PartialEq, Eq)]
11pub struct BlockId {
12    pub matrixid: u64,
13    pub block_row: usize,
14    pub block_col: usize,
15}
16
17impl std::fmt::Display for BlockId {
18    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
19        write!(f, "{}_{}-{}", self.matrixid, self.block_row, self.block_col)
20    }
21}
22
23impl BlockId {
24    /// Create a new block identifier
25    pub fn new(matrix_id: u64, block_row: usize, block_col: usize) -> Self {
26        Self {
27            matrixid: matrix_id,
28            block_row,
29            block_col,
30        }
31    }
32
33    /// Convert BlockId to u64 for serialization (using a hash-like approach)
34    pub fn to_u64(&self) -> u64 {
35        // Simple hash combining the fields
36        self.matrixid
37            .wrapping_mul(1000000)
38            .wrapping_add((self.block_row as u64) * 1000)
39            .wrapping_add(self.block_col as u64)
40    }
41
42    /// Create BlockId from u64 (for deserialization)
43    pub fn from_u64(value: u64) -> Self {
44        // This is a simplified reverse operation - in practice you'd want a proper bijection
45        let matrixid = value / 1000000;
46        let remainder = value % 1000000;
47        let block_row = (remainder / 1000) as usize;
48        let block_col = (remainder % 1000) as usize;
49        Self {
50            matrixid,
51            block_row,
52            block_col,
53        }
54    }
55
56    /// Get a unique string representation
57    pub fn as_string(&self) -> String {
58        format!("{}_{}-{}", self.matrixid, self.block_row, self.block_col)
59    }
60
61    /// Parse BlockId from string representation
62    pub fn from_string(s: &str) -> Result<Self, String> {
63        let parts: Vec<&str> = s.split('_').collect();
64        if parts.len() != 2 {
65            return Err("Invalid BlockId string format".to_string());
66        }
67
68        let matrix_id = parts[0].parse::<u64>().map_err(|_| "Invalid matrix ID")?;
69
70        let coords: Vec<&str> = parts[1].split('-').collect();
71        if coords.len() != 2 {
72            return Err("Invalid coordinate format".to_string());
73        }
74
75        let block_row = coords[0]
76            .parse::<usize>()
77            .map_err(|_| "Invalid block row")?;
78        let block_col = coords[1]
79            .parse::<usize>()
80            .map_err(|_| "Invalid block column")?;
81
82        Ok(Self {
83            matrixid: matrix_id,
84            block_row,
85            block_col,
86        })
87    }
88}
89
90/// Cached block information
91#[derive(Debug, Clone)]
92#[allow(dead_code)]
93pub(crate) struct CachedBlock {
94    pub data: Vec<u8>,
95    pub compressed: bool,
96    pub access_count: usize,
97    pub last_access: u64,
98    pub compression_level: u8,
99    pub original_size: usize,
100    pub compressed_size: usize,
101}
102
103impl CachedBlock {
104    /// Create a new cached block
105    pub fn new(data: Vec<u8>, compressed: bool, compression_level: u8) -> Self {
106        let size = data.len();
107        Self {
108            data,
109            compressed,
110            access_count: 1,
111            last_access: Self::current_timestamp(),
112            compression_level,
113            original_size: if compressed { 0 } else { size },
114            compressed_size: if compressed { size } else { 0 },
115        }
116    }
117
118    /// Update access information
119    pub fn update_access(&mut self) {
120        self.access_count += 1;
121        self.last_access = Self::current_timestamp();
122    }
123
124    /// Get current timestamp
125    fn current_timestamp() -> u64 {
126        std::time::SystemTime::now()
127            .duration_since(std::time::UNIX_EPOCH)
128            .unwrap_or_default()
129            .as_secs()
130    }
131
132    /// Get compression ratio
133    pub fn compression_ratio(&self) -> f64 {
134        if self.original_size == 0 {
135            1.0
136        } else {
137            self.compressed_size as f64 / self.original_size as f64
138        }
139    }
140
141    /// Get access frequency (accesses per second)
142    pub fn access_frequency(&self) -> f64 {
143        let current_time = Self::current_timestamp();
144        let time_diff = current_time.saturating_sub(self.last_access).max(1);
145        self.access_count as f64 / time_diff as f64
146    }
147
148    /// Get data size in bytes
149    pub fn size(&self) -> usize {
150        self.data.len()
151    }
152
153    /// Check if block is recently accessed
154    pub fn is_recently_accessed(&self, threshold_seconds: u64) -> bool {
155        let current_time = Self::current_timestamp();
156        current_time.saturating_sub(self.last_access) < threshold_seconds
157    }
158}
159
160/// Block cache for frequently accessed data
161#[derive(Debug)]
162#[allow(dead_code)]
163pub(crate) struct BlockCache {
164    pub cache: HashMap<BlockId, CachedBlock>,
165    pub access_order: VecDeque<BlockId>,
166    _maxsize: usize,
167    pub current_size: usize,
168    max_cache_size: usize,
169    hit_count: usize,
170    miss_count: usize,
171}
172
173impl BlockCache {
174    /// Create a new block cache
175    pub fn new(max_size: usize) -> Self {
176        Self {
177            cache: HashMap::new(),
178            access_order: VecDeque::new(),
179            _maxsize: max_size,
180            current_size: 0,
181            max_cache_size: max_size,
182            hit_count: 0,
183            miss_count: 0,
184        }
185    }
186
187    /// Insert a block into the cache
188    pub fn insert(&mut self, block_id: BlockId, block: CachedBlock) {
189        let block_size = block.size();
190
191        // Remove existing block if present
192        if let Some(existing) = self.cache.remove(&block_id) {
193            self.current_size = self.current_size.saturating_sub(existing.size());
194            // Remove from access order
195            if let Some(pos) = self.access_order.iter().position(|id| id == &block_id) {
196                self.access_order.remove(pos);
197            }
198        }
199
200        // Evict blocks if necessary
201        while self.current_size + block_size > self.max_cache_size && !self.access_order.is_empty()
202        {
203            self.evict_lru();
204        }
205
206        // Insert new block
207        if block_size <= self.max_cache_size {
208            self.cache.insert(block_id.clone(), block);
209            self.access_order.push_back(block_id);
210            self.current_size += block_size;
211        }
212    }
213
214    /// Get a block from the cache
215    pub fn get(&mut self, block_id: &BlockId) -> Option<&CachedBlock> {
216        if let Some(block) = self.cache.get(block_id) {
217            self.hit_count += 1;
218            // Move to back of access order (most recently used)
219            if let Some(pos) = self.access_order.iter().position(|id| id == block_id) {
220                self.access_order.remove(pos);
221                self.access_order.push_back(block_id.clone());
222            }
223            Some(block)
224        } else {
225            self.miss_count += 1;
226            None
227        }
228    }
229
230    /// Get a mutable reference to a block
231    pub fn get_mut(&mut self, block_id: &BlockId) -> Option<&mut CachedBlock> {
232        if self.cache.contains_key(block_id) {
233            self.hit_count += 1;
234            // Move to back of access order (most recently used)
235            if let Some(pos) = self.access_order.iter().position(|id| id == block_id) {
236                self.access_order.remove(pos);
237                self.access_order.push_back(block_id.clone());
238            }
239            self.cache.get_mut(block_id)
240        } else {
241            self.miss_count += 1;
242            None
243        }
244    }
245
246    /// Remove a block from the cache
247    pub fn remove(&mut self, block_id: &BlockId) -> Option<CachedBlock> {
248        if let Some(block) = self.cache.remove(block_id) {
249            self.current_size = self.current_size.saturating_sub(block.size());
250            // Remove from access order
251            if let Some(pos) = self.access_order.iter().position(|id| id == block_id) {
252                self.access_order.remove(pos);
253            }
254            Some(block)
255        } else {
256            None
257        }
258    }
259
260    /// Check if a block exists in the cache
261    pub fn contains(&self, block_id: &BlockId) -> bool {
262        self.cache.contains_key(block_id)
263    }
264
265    /// Evict the least recently used block
266    fn evict_lru(&mut self) {
267        if let Some(lru_id) = self.access_order.pop_front() {
268            if let Some(block) = self.cache.remove(&lru_id) {
269                self.current_size = self.current_size.saturating_sub(block.size());
270            }
271        }
272    }
273
274    /// Clear all blocks from the cache
275    pub fn clear(&mut self) {
276        self.cache.clear();
277        self.access_order.clear();
278        self.current_size = 0;
279        self.hit_count = 0;
280        self.miss_count = 0;
281    }
282
283    /// Get cache statistics
284    pub fn get_stats(&self) -> CacheStats {
285        let total_accesses = self.hit_count + self.miss_count;
286        let hit_rate = if total_accesses > 0 {
287            self.hit_count as f64 / total_accesses as f64
288        } else {
289            0.0
290        };
291
292        CacheStats {
293            total_blocks: self.cache.len(),
294            current_size_bytes: self.current_size,
295            max_size_bytes: self.max_cache_size,
296            hit_count: self.hit_count,
297            miss_count: self.miss_count,
298            hit_rate,
299            utilization: self.current_size as f64 / self.max_cache_size as f64,
300        }
301    }
302
303    /// Get blocks sorted by access frequency
304    pub fn get_most_accessed_blocks(&self, limit: usize) -> Vec<(BlockId, usize)> {
305        let mut blocks: Vec<_> = self
306            .cache
307            .iter()
308            .map(|(id, block)| (id.clone(), block.access_count))
309            .collect();
310
311        blocks.sort_by(|a, b| b.1.cmp(&a.1));
312        blocks.truncate(limit);
313        blocks
314    }
315
316    /// Get total number of blocks in cache
317    pub fn len(&self) -> usize {
318        self.cache.len()
319    }
320
321    /// Check if cache is empty
322    pub fn is_empty(&self) -> bool {
323        self.cache.is_empty()
324    }
325
326    /// Get current memory usage ratio
327    pub fn memory_usage_ratio(&self) -> f64 {
328        self.current_size as f64 / self.max_cache_size as f64
329    }
330
331    /// Force eviction of old blocks based on age
332    pub fn evict_old_blocks(&mut self, max_age_seconds: u64) {
333        let current_time = std::time::SystemTime::now()
334            .duration_since(std::time::UNIX_EPOCH)
335            .unwrap_or_default()
336            .as_secs();
337
338        let old_blocks: Vec<BlockId> = self
339            .cache
340            .iter()
341            .filter(|(_, block)| current_time.saturating_sub(block.last_access) > max_age_seconds)
342            .map(|(id, _)| id.clone())
343            .collect();
344
345        for block_id in old_blocks {
346            self.remove(&block_id);
347        }
348    }
349
350    /// Prefetch blocks based on spatial locality
351    pub fn suggest_prefetch_candidates(
352        &self,
353        current_block: &BlockId,
354        lookahead: usize,
355    ) -> Vec<BlockId> {
356        let mut candidates = Vec::new();
357
358        // Suggest adjacent blocks
359        for row_offset in 0..=lookahead {
360            for col_offset in 0..=lookahead {
361                if row_offset == 0 && col_offset == 0 {
362                    continue; // Skip current block
363                }
364
365                let candidate = BlockId {
366                    matrixid: current_block.matrixid,
367                    block_row: current_block.block_row.saturating_add(row_offset),
368                    block_col: current_block.block_col.saturating_add(col_offset),
369                };
370
371                if !self.contains(&candidate) {
372                    candidates.push(candidate);
373                }
374            }
375        }
376
377        candidates
378    }
379}
380
381/// Cache statistics
382#[derive(Debug, Clone)]
383pub struct CacheStats {
384    pub total_blocks: usize,
385    pub current_size_bytes: usize,
386    pub max_size_bytes: usize,
387    pub hit_count: usize,
388    pub miss_count: usize,
389    pub hit_rate: f64,
390    pub utilization: f64,
391}
392
393impl CacheStats {
394    /// Get a human-readable summary
395    pub fn summary(&self) -> String {
396        format!(
397            "Cache: {} blocks, {:.1}% utilized, {:.1}% hit rate ({}/{} accesses)",
398            self.total_blocks,
399            self.utilization * 100.0,
400            self.hit_rate * 100.0,
401            self.hit_count,
402            self.hit_count + self.miss_count
403        )
404    }
405}