scirs2_sparse/adaptive_memory_compression/
access_tracking.rs

1//! Access pattern tracking for adaptive memory compression
2//!
3//! This module tracks how blocks are accessed to optimize compression
4//! strategies and caching decisions based on temporal and spatial locality.
5
6use super::cache::BlockId;
7use std::collections::{HashMap, VecDeque};
8
9/// Type of access
10#[derive(Debug, Clone, Copy)]
11#[allow(dead_code)]
12pub enum AccessType {
13    Read,
14    Write,
15    ReadWrite,
16}
17
18/// Access pattern tracking
19#[derive(Debug)]
20#[allow(dead_code)]
21pub(crate) struct AccessTracker {
22    pub access_patterns: HashMap<BlockId, AccessPattern>,
23    pub temporal_patterns: VecDeque<AccessEvent>,
24    pub spatial_patterns: HashMap<usize, Vec<usize>>, // row -> accessed columns
25    pub sequential_threshold: usize,
26    max_temporal_history: usize,
27    max_spatial_history: usize,
28}
29
30/// Access pattern for a block
31#[derive(Debug, Clone)]
32#[allow(dead_code)]
33pub(crate) struct AccessPattern {
34    pub access_count: usize,
35    pub last_access: u64,
36    pub access_frequency: f64,
37    pub sequential_accesses: usize,
38    pub random_accesses: usize,
39    pub temporal_locality: f64,
40    pub spatial_locality: f64,
41    first_access: u64,
42    access_intervals: VecDeque<u64>,
43}
44
45/// Access event for pattern analysis
46#[derive(Debug, Clone)]
47#[allow(dead_code)]
48pub(crate) struct AccessEvent {
49    pub blockid: BlockId,
50    pub timestamp: u64,
51    pub access_type: AccessType,
52    duration: Option<u64>,
53    bytes_accessed: usize,
54}
55
56impl AccessTracker {
57    /// Create a new access tracker
58    pub fn new() -> Self {
59        Self {
60            access_patterns: HashMap::new(),
61            temporal_patterns: VecDeque::new(),
62            spatial_patterns: HashMap::new(),
63            sequential_threshold: 3,
64            max_temporal_history: 10000,
65            max_spatial_history: 1000,
66        }
67    }
68
69    /// Record an access event
70    pub fn record_access(
71        &mut self,
72        block_id: BlockId,
73        access_type: AccessType,
74        bytes_accessed: usize,
75    ) {
76        let timestamp = Self::current_timestamp();
77
78        // Record temporal event
79        let event = AccessEvent {
80            blockid: block_id.clone(),
81            timestamp,
82            access_type,
83            duration: None,
84            bytes_accessed,
85        };
86
87        self.temporal_patterns.push_back(event);
88
89        // Trim temporal history
90        while self.temporal_patterns.len() > self.max_temporal_history {
91            self.temporal_patterns.pop_front();
92        }
93
94        // Update spatial patterns
95        self.spatial_patterns
96            .entry(block_id.block_row)
97            .or_default()
98            .push(block_id.block_col);
99
100        // Trim spatial history
101        for column_list in self.spatial_patterns.values_mut() {
102            if column_list.len() > self.max_spatial_history {
103                column_list.drain(0..column_list.len() - self.max_spatial_history);
104            }
105        }
106
107        // Update or create access pattern
108        let pattern = self
109            .access_patterns
110            .entry(block_id.clone())
111            .or_insert_with(|| AccessPattern::new(timestamp));
112
113        pattern.update_access(timestamp, bytes_accessed);
114        pattern.update_locality_metrics(&self.temporal_patterns, &self.spatial_patterns, &block_id);
115        pattern.classify_access_type(
116            &self.temporal_patterns,
117            &block_id,
118            self.sequential_threshold,
119        );
120    }
121
122    /// Get access pattern for a block
123    pub fn get_pattern(&self, block_id: &BlockId) -> Option<&AccessPattern> {
124        self.access_patterns.get(block_id)
125    }
126
127    /// Predict next likely accessed blocks
128    pub fn predict_next_accesses(&self, current_block: &BlockId, lookahead: usize) -> Vec<BlockId> {
129        let mut predictions = Vec::new();
130
131        // Spatial prediction based on recent access patterns
132        if let Some(recent_columns) = self.spatial_patterns.get(&current_block.block_row) {
133            let recent_cols: Vec<usize> = recent_columns
134                .iter()
135                .rev()
136                .take(lookahead)
137                .copied()
138                .collect();
139
140            for &col in &recent_cols {
141                if col != current_block.block_col {
142                    predictions.push(BlockId {
143                        matrixid: current_block.matrixid,
144                        block_row: current_block.block_row,
145                        block_col: col,
146                    });
147                }
148            }
149        }
150
151        // Temporal prediction based on sequential patterns
152        if let Some(pattern) = self.get_pattern(current_block) {
153            if pattern.is_sequential() {
154                // Predict next sequential blocks
155                for i in 1..=lookahead {
156                    predictions.push(BlockId {
157                        matrixid: current_block.matrixid,
158                        block_row: current_block.block_row,
159                        block_col: current_block.block_col + i,
160                    });
161                }
162            }
163        }
164
165        predictions.truncate(lookahead);
166        predictions
167    }
168
169    /// Get blocks with high access frequency
170    pub fn get_hot_blocks(&self, limit: usize) -> Vec<(BlockId, f64)> {
171        let mut blocks: Vec<_> = self
172            .access_patterns
173            .iter()
174            .map(|(id, pattern)| (id.clone(), pattern.access_frequency))
175            .collect();
176
177        blocks.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
178        blocks.truncate(limit);
179        blocks
180    }
181
182    /// Get blocks with low access frequency (candidates for eviction)
183    pub fn get_cold_blocks(&self, limit: usize) -> Vec<(BlockId, f64)> {
184        let mut blocks: Vec<_> = self
185            .access_patterns
186            .iter()
187            .map(|(id, pattern)| (id.clone(), pattern.access_frequency))
188            .collect();
189
190        blocks.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
191        blocks.truncate(limit);
192        blocks
193    }
194
195    /// Analyze overall access patterns
196    pub fn analyze_patterns(&self) -> AccessAnalysis {
197        let total_blocks = self.access_patterns.len();
198        let total_accesses: usize = self.access_patterns.values().map(|p| p.access_count).sum();
199
200        let sequential_blocks = self
201            .access_patterns
202            .values()
203            .filter(|p| p.is_sequential())
204            .count();
205
206        let random_blocks = total_blocks - sequential_blocks;
207
208        let avg_temporal_locality = if total_blocks > 0 {
209            self.access_patterns
210                .values()
211                .map(|p| p.temporal_locality)
212                .sum::<f64>()
213                / total_blocks as f64
214        } else {
215            0.0
216        };
217
218        let avg_spatial_locality = if total_blocks > 0 {
219            self.access_patterns
220                .values()
221                .map(|p| p.spatial_locality)
222                .sum::<f64>()
223                / total_blocks as f64
224        } else {
225            0.0
226        };
227
228        AccessAnalysis {
229            total_blocks,
230            total_accesses,
231            sequential_blocks,
232            random_blocks,
233            avg_temporal_locality,
234            avg_spatial_locality,
235            hot_blocks_threshold: self.calculate_hot_blocks_threshold(),
236            cold_blocks_threshold: self.calculate_cold_blocks_threshold(),
237        }
238    }
239
240    /// Calculate threshold for hot blocks
241    fn calculate_hot_blocks_threshold(&self) -> f64 {
242        if self.access_patterns.is_empty() {
243            return 0.0;
244        }
245
246        let frequencies: Vec<f64> = self
247            .access_patterns
248            .values()
249            .map(|p| p.access_frequency)
250            .collect();
251
252        // Use 90th percentile as hot threshold
253        let mut sorted_freq = frequencies;
254        sorted_freq.sort_by(|a, b| a.partial_cmp(b).unwrap());
255        let index = (sorted_freq.len() as f64 * 0.9) as usize;
256        sorted_freq.get(index).copied().unwrap_or(0.0)
257    }
258
259    /// Calculate threshold for cold blocks
260    fn calculate_cold_blocks_threshold(&self) -> f64 {
261        if self.access_patterns.is_empty() {
262            return 0.0;
263        }
264
265        let frequencies: Vec<f64> = self
266            .access_patterns
267            .values()
268            .map(|p| p.access_frequency)
269            .collect();
270
271        // Use 10th percentile as cold threshold
272        let mut sorted_freq = frequencies;
273        sorted_freq.sort_by(|a, b| a.partial_cmp(b).unwrap());
274        let index = (sorted_freq.len() as f64 * 0.1) as usize;
275        sorted_freq.get(index).copied().unwrap_or(0.0)
276    }
277
278    /// Get current timestamp
279    fn current_timestamp() -> u64 {
280        std::time::SystemTime::now()
281            .duration_since(std::time::UNIX_EPOCH)
282            .unwrap_or_default()
283            .as_secs()
284    }
285
286    /// Clear old access patterns
287    pub fn cleanup_old_patterns(&mut self, max_age_seconds: u64) {
288        let current_time = Self::current_timestamp();
289        let cutoff_time = current_time.saturating_sub(max_age_seconds);
290
291        // Remove old access patterns
292        self.access_patterns
293            .retain(|_, pattern| pattern.last_access >= cutoff_time);
294
295        // Remove old temporal events
296        self.temporal_patterns
297            .retain(|event| event.timestamp >= cutoff_time);
298    }
299
300    /// Get access statistics
301    pub fn get_statistics(&self) -> AccessStatistics {
302        let total_events = self.temporal_patterns.len();
303        let read_events = self
304            .temporal_patterns
305            .iter()
306            .filter(|e| matches!(e.access_type, AccessType::Read | AccessType::ReadWrite))
307            .count();
308        let write_events = self
309            .temporal_patterns
310            .iter()
311            .filter(|e| matches!(e.access_type, AccessType::Write | AccessType::ReadWrite))
312            .count();
313
314        AccessStatistics {
315            total_tracked_blocks: self.access_patterns.len(),
316            total_access_events: total_events,
317            read_events,
318            write_events,
319            spatial_localities: self.spatial_patterns.len(),
320            avg_accesses_per_block: if self.access_patterns.is_empty() {
321                0.0
322            } else {
323                self.access_patterns
324                    .values()
325                    .map(|p| p.access_count as f64)
326                    .sum::<f64>()
327                    / self.access_patterns.len() as f64
328            },
329        }
330    }
331}
332
333impl AccessPattern {
334    /// Create a new access pattern
335    fn new(timestamp: u64) -> Self {
336        Self {
337            access_count: 0,
338            last_access: timestamp,
339            access_frequency: 0.0,
340            sequential_accesses: 0,
341            random_accesses: 0,
342            temporal_locality: 0.0,
343            spatial_locality: 0.0,
344            first_access: timestamp,
345            access_intervals: VecDeque::new(),
346        }
347    }
348
349    /// Update access information
350    fn update_access(&mut self, timestamp: u64, _bytes_accessed: usize) {
351        if self.access_count > 0 {
352            let interval = timestamp.saturating_sub(self.last_access);
353            self.access_intervals.push_back(interval);
354
355            // Keep only recent intervals for frequency calculation
356            while self.access_intervals.len() > 100 {
357                self.access_intervals.pop_front();
358            }
359        }
360
361        self.access_count += 1;
362        self.last_access = timestamp;
363
364        // Update access frequency
365        self.update_frequency(timestamp);
366    }
367
368    /// Update access frequency
369    fn update_frequency(&mut self, current_timestamp: u64) {
370        let time_span = current_timestamp.saturating_sub(self.first_access).max(1);
371        self.access_frequency = self.access_count as f64 / time_span as f64;
372    }
373
374    /// Update locality metrics
375    fn update_locality_metrics(
376        &mut self,
377        temporal_patterns: &VecDeque<AccessEvent>,
378        spatial_patterns: &HashMap<usize, Vec<usize>>,
379        block_id: &BlockId,
380    ) {
381        // Calculate temporal locality based on recent accesses
382        let recent_accesses: Vec<_> = temporal_patterns
383            .iter()
384            .rev()
385            .take(10)
386            .filter(|e| e.blockid == *block_id)
387            .collect();
388
389        if recent_accesses.len() > 1 {
390            let intervals: Vec<u64> = recent_accesses
391                .windows(2)
392                .map(|window| window[0].timestamp.saturating_sub(window[1].timestamp))
393                .collect();
394
395            let avg_interval = intervals.iter().sum::<u64>() as f64 / intervals.len() as f64;
396            self.temporal_locality = 1.0 / (1.0 + avg_interval / 100.0); // Normalize to 0-1
397        }
398
399        // Calculate spatial locality based on nearby block accesses
400        if let Some(row_accesses) = spatial_patterns.get(&block_id.block_row) {
401            let nearby_accesses = row_accesses
402                .iter()
403                .filter(|&&col| {
404                    let distance = (col as i32 - block_id.block_col as i32).abs();
405                    distance <= 5 // Within 5 blocks is considered spatial locality
406                })
407                .count();
408
409            self.spatial_locality = nearby_accesses as f64 / row_accesses.len().max(1) as f64;
410        }
411    }
412
413    /// Classify access type based on pattern
414    fn classify_access_type(
415        &mut self,
416        temporal_patterns: &VecDeque<AccessEvent>,
417        block_id: &BlockId,
418        sequential_threshold: usize,
419    ) {
420        // Look for sequential access patterns
421        let recent_same_row: Vec<_> = temporal_patterns
422            .iter()
423            .rev()
424            .take(sequential_threshold * 2)
425            .filter(|e| e.blockid.block_row == block_id.block_row)
426            .collect();
427
428        if recent_same_row.len() >= sequential_threshold {
429            let columns: Vec<usize> = recent_same_row
430                .iter()
431                .map(|e| e.blockid.block_col)
432                .collect();
433
434            let is_sequential = columns
435                .windows(2)
436                .all(|window| window[1] == window[0] + 1 || window[1] == window[0]);
437
438            if is_sequential {
439                self.sequential_accesses += 1;
440            } else {
441                self.random_accesses += 1;
442            }
443        }
444    }
445
446    /// Check if this pattern indicates sequential access
447    pub fn is_sequential(&self) -> bool {
448        if self.sequential_accesses + self.random_accesses == 0 {
449            return false;
450        }
451        let sequential_ratio = self.sequential_accesses as f64
452            / (self.sequential_accesses + self.random_accesses) as f64;
453        sequential_ratio > 0.7
454    }
455
456    /// Get access rate (accesses per second)
457    pub fn access_rate(&self) -> f64 {
458        self.access_frequency
459    }
460
461    /// Get predictability score
462    pub fn predictability_score(&self) -> f64 {
463        (self.temporal_locality + self.spatial_locality) / 2.0
464    }
465}
466
467impl AccessEvent {
468    /// Create a new access event
469    pub fn new(block_id: BlockId, access_type: AccessType, bytes_accessed: usize) -> Self {
470        Self {
471            blockid: block_id,
472            timestamp: std::time::SystemTime::now()
473                .duration_since(std::time::UNIX_EPOCH)
474                .unwrap_or_default()
475                .as_secs(),
476            access_type,
477            duration: None,
478            bytes_accessed,
479        }
480    }
481
482    /// Set access duration
483    pub fn with_duration(mut self, duration: u64) -> Self {
484        self.duration = Some(duration);
485        self
486    }
487
488    /// Get access bandwidth (bytes per second)
489    pub fn bandwidth(&self) -> Option<f64> {
490        self.duration.map(|d| {
491            if d > 0 {
492                self.bytes_accessed as f64 / d as f64
493            } else {
494                0.0
495            }
496        })
497    }
498}
499
500/// Analysis results for access patterns
501#[derive(Debug, Clone)]
502pub struct AccessAnalysis {
503    pub total_blocks: usize,
504    pub total_accesses: usize,
505    pub sequential_blocks: usize,
506    pub random_blocks: usize,
507    pub avg_temporal_locality: f64,
508    pub avg_spatial_locality: f64,
509    pub hot_blocks_threshold: f64,
510    pub cold_blocks_threshold: f64,
511}
512
513/// Statistics for access tracking
514#[derive(Debug, Clone)]
515pub struct AccessStatistics {
516    pub total_tracked_blocks: usize,
517    pub total_access_events: usize,
518    pub read_events: usize,
519    pub write_events: usize,
520    pub spatial_localities: usize,
521    pub avg_accesses_per_block: f64,
522}
523
524impl Default for AccessTracker {
525    fn default() -> Self {
526        Self::new()
527    }
528}