Skip to main content

rust_guardian/cache/
mod.rs

1//! File hash caching for incremental checks
2//!
3//! Architecture: Infrastructure Layer - Cache provides performance optimization without affecting domain logic
4//! - FileCache acts as a repository for file metadata and analysis results
5//! - Hash-based validation ensures cache coherence with minimal overhead
6//! - Domain objects remain pure while infrastructure handles caching concerns
7
8use crate::domain::violations::{GuardianError, GuardianResult};
9use serde::{Deserialize, Serialize};
10use sha2::{Digest, Sha256};
11use std::collections::HashMap;
12use std::fs::{self, File};
13use std::io::prelude::*;
14use std::path::{Path, PathBuf};
15use std::time::{SystemTime, UNIX_EPOCH};
16
17/// Cache for storing file analysis results and metadata
18#[derive(Debug)]
19pub struct FileCache {
20    /// Path to the cache file
21    cache_path: PathBuf,
22    /// In-memory cache data
23    data: CacheData,
24    /// Whether the cache has been modified
25    dirty: bool,
26}
27
28/// Serializable cache data structure
29#[derive(Debug, Clone, Serialize, Deserialize, Default)]
30struct CacheData {
31    /// Cache format version for migration support
32    version: u32,
33    /// Configuration fingerprint when cache was created
34    config_fingerprint: Option<String>,
35    /// Cached file entries
36    files: HashMap<PathBuf, FileEntry>,
37    /// Cache metadata
38    metadata: CacheMetadata,
39}
40
41/// Metadata about the cache itself
42#[derive(Debug, Clone, Serialize, Deserialize)]
43struct CacheMetadata {
44    /// When the cache was created
45    created_at: u64,
46    /// When the cache was last updated
47    updated_at: u64,
48    /// Number of cache hits since creation
49    hits: u64,
50    /// Number of cache misses since creation
51    misses: u64,
52}
53
54/// Cached information about a single file
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct FileEntry {
57    /// SHA-256 hash of file content
58    pub content_hash: String,
59    /// File size in bytes
60    pub size: u64,
61    /// Last modified timestamp
62    pub modified_at: u64,
63    /// Number of violations found in this file
64    pub violation_count: usize,
65    /// When this file was last analyzed
66    pub analyzed_at: u64,
67    /// Configuration fingerprint when analysis was done
68    pub config_fingerprint: String,
69}
70
71impl FileCache {
72    /// Create a new file cache with the given cache file path
73    pub fn new<P: AsRef<Path>>(cache_path: P) -> Self {
74        Self {
75            cache_path: cache_path.as_ref().to_path_buf(),
76            data: CacheData::default(),
77            dirty: false,
78        }
79    }
80
81    /// Load cache from disk, creating it if it doesn't exist
82    pub fn load(&mut self) -> GuardianResult<()> {
83        if self.cache_path.exists() {
84            let content = fs::read_to_string(&self.cache_path)
85                .map_err(|e| GuardianError::cache(format!("Failed to read cache file: {e}")))?;
86
87            self.data = serde_json::from_str(&content)
88                .map_err(|e| GuardianError::cache(format!("Failed to parse cache file: {e}")))?;
89
90            // Migrate cache format if needed
91            self.migrate_if_needed()?;
92        } else {
93            // Create new cache
94            self.data = CacheData {
95                version: 1,
96                config_fingerprint: None,
97                files: HashMap::new(),
98                metadata: CacheMetadata {
99                    created_at: current_timestamp(),
100                    updated_at: current_timestamp(),
101                    hits: 0,
102                    misses: 0,
103                },
104            };
105            self.dirty = true;
106        }
107
108        // Self-validation after load
109        self.verify_integrity_on_operation()?;
110        Ok(())
111    }
112
113    /// Save cache to disk if it has been modified
114    pub fn save(&mut self) -> GuardianResult<()> {
115        if !self.dirty {
116            return Ok(());
117        }
118
119        // Self-validation before save
120        self.verify_integrity_on_operation()?;
121
122        // Update metadata
123        self.data.metadata.updated_at = current_timestamp();
124
125        // Ensure cache directory exists
126        if let Some(parent) = self.cache_path.parent() {
127            fs::create_dir_all(parent).map_err(|e| {
128                GuardianError::cache(format!("Failed to create cache directory: {e}"))
129            })?;
130        }
131
132        // Serialize and write cache
133        let content = serde_json::to_string_pretty(&self.data)
134            .map_err(|e| GuardianError::cache(format!("Failed to serialize cache: {e}")))?;
135
136        fs::write(&self.cache_path, content)
137            .map_err(|e| GuardianError::cache(format!("Failed to write cache file: {e}")))?;
138
139        self.dirty = false;
140        Ok(())
141    }
142
143    /// Check if a file needs to be re-analyzed
144    pub fn needs_analysis<P: AsRef<Path>>(
145        &mut self,
146        file_path: P,
147        config_fingerprint: &str,
148    ) -> GuardianResult<bool> {
149        let file_path = file_path.as_ref();
150
151        // Get current file metadata
152        let metadata = fs::metadata(file_path).map_err(|e| {
153            GuardianError::cache(format!(
154                "Failed to get file metadata for {}: {}",
155                file_path.display(),
156                e
157            ))
158        })?;
159
160        let current_size = metadata.len();
161        let current_modified = metadata
162            .modified()
163            .map_err(|e| GuardianError::cache(format!("Failed to get modification time: {e}")))?
164            .duration_since(UNIX_EPOCH)
165            .map_err(|e| {
166                GuardianError::cache(format!("Invalid system time before Unix epoch: {e}"))
167            })?
168            .as_secs();
169
170        // Check if we have a cache entry
171        if let Some(entry) = self.data.files.get(file_path) {
172            // Check if file has been modified
173            if entry.size != current_size || entry.modified_at != current_modified {
174                self.data.metadata.misses += 1;
175                self.dirty = true;
176                return Ok(true);
177            }
178
179            // Check if configuration has changed
180            if entry.config_fingerprint != config_fingerprint {
181                self.data.metadata.misses += 1;
182                self.dirty = true;
183                return Ok(true);
184            }
185
186            // Verify content hash to be absolutely sure
187            let current_hash = self.calculate_file_hash(file_path)?;
188            if entry.content_hash != current_hash {
189                self.data.metadata.misses += 1;
190                self.dirty = true;
191                return Ok(true);
192            }
193
194            // Cache hit!
195            self.data.metadata.hits += 1;
196            self.dirty = true;
197            Ok(false)
198        } else {
199            // No cache entry - needs analysis
200            self.data.metadata.misses += 1;
201            self.dirty = true;
202            Ok(true)
203        }
204    }
205
206    /// Update cache entry for a file after analysis
207    pub fn update_entry<P: AsRef<Path>>(
208        &mut self,
209        file_path: P,
210        violation_count: usize,
211        config_fingerprint: &str,
212    ) -> GuardianResult<()> {
213        let file_path = file_path.as_ref();
214
215        // Get current file metadata
216        let metadata = fs::metadata(file_path)
217            .map_err(|e| GuardianError::cache(format!("Failed to get file metadata: {e}")))?;
218
219        let content_hash = self.calculate_file_hash(file_path)?;
220
221        let entry = FileEntry {
222            content_hash,
223            size: metadata.len(),
224            modified_at: metadata
225                .modified()
226                .map_err(|e| GuardianError::cache(format!("Failed to get modification time: {e}")))?
227                .duration_since(UNIX_EPOCH)
228                .map_err(|e| {
229                    GuardianError::cache(format!("Invalid system time before Unix epoch: {e}"))
230                })?
231                .as_secs(),
232            violation_count,
233            analyzed_at: current_timestamp(),
234            config_fingerprint: config_fingerprint.to_string(),
235        };
236
237        self.data.files.insert(file_path.to_path_buf(), entry);
238        self.dirty = true;
239
240        Ok(())
241    }
242
243    /// Get cache statistics
244    pub fn statistics(&self) -> CacheStatistics {
245        CacheStatistics {
246            total_files: self.data.files.len(),
247            cache_hits: self.data.metadata.hits,
248            cache_misses: self.data.metadata.misses,
249            hit_rate: if self.data.metadata.hits + self.data.metadata.misses > 0 {
250                (self.data.metadata.hits as f64)
251                    / ((self.data.metadata.hits + self.data.metadata.misses) as f64)
252            } else {
253                0.0
254            },
255            created_at: self.data.metadata.created_at,
256            updated_at: self.data.metadata.updated_at,
257        }
258    }
259
260    /// Clear the entire cache
261    pub fn clear(&mut self) -> GuardianResult<()> {
262        self.data.files.clear();
263        self.data.metadata.hits = 0;
264        self.data.metadata.misses = 0;
265        self.data.metadata.updated_at = current_timestamp();
266        self.dirty = true;
267
268        // Remove cache file if it exists
269        if self.cache_path.exists() {
270            fs::remove_file(&self.cache_path)
271                .map_err(|e| GuardianError::cache(format!("Failed to remove cache file: {e}")))?;
272        }
273
274        Ok(())
275    }
276
277    /// Remove cache entries for files that no longer exist
278    pub fn cleanup(&mut self) -> GuardianResult<usize> {
279        let mut removed = 0;
280        let mut to_remove = Vec::new();
281
282        for file_path in self.data.files.keys() {
283            if !file_path.exists() {
284                to_remove.push(file_path.clone());
285            }
286        }
287
288        for file_path in to_remove {
289            self.data.files.remove(&file_path);
290            removed += 1;
291        }
292
293        if removed > 0 {
294            self.dirty = true;
295        }
296
297        Ok(removed)
298    }
299
300    /// Update configuration fingerprint
301    pub fn set_config_fingerprint(&mut self, fingerprint: String) {
302        if self.data.config_fingerprint.as_ref() != Some(&fingerprint) {
303            self.data.config_fingerprint = Some(fingerprint);
304            self.dirty = true;
305        }
306    }
307
308    /// Calculate SHA-256 hash of file content
309    fn calculate_file_hash<P: AsRef<Path>>(&self, file_path: P) -> GuardianResult<String> {
310        let mut file = File::open(&file_path)
311            .map_err(|e| GuardianError::cache(format!("Failed to open file for hashing: {e}")))?;
312
313        let mut hasher = Sha256::new();
314        let mut buffer = [0; 8192];
315
316        loop {
317            let bytes_read = file.read(&mut buffer).map_err(|e| {
318                GuardianError::cache(format!("Failed to read file for hashing: {e}"))
319            })?;
320
321            if bytes_read == 0 {
322                break;
323            }
324
325            hasher.update(&buffer[..bytes_read]);
326        }
327
328        Ok(format!("{:x}", hasher.finalize()))
329    }
330
331    /// Migrate cache format if needed
332    fn migrate_if_needed(&mut self) -> GuardianResult<()> {
333        const CURRENT_VERSION: u32 = 1;
334
335        if self.data.version < CURRENT_VERSION {
336            tracing::info!(
337                "Migrating cache from version {} to {}",
338                self.data.version,
339                CURRENT_VERSION
340            );
341
342            match self.data.version {
343                0 => {
344                    // Migration from version 0 to 1
345                    // Add any migration logic here
346                    self.data.version = 1;
347                    self.dirty = true;
348                }
349                _ => {
350                    return Err(GuardianError::cache(format!(
351                        "Unsupported cache version: {}. Please delete the cache file.",
352                        self.data.version
353                    )));
354                }
355            }
356        }
357
358        Ok(())
359    }
360}
361
362impl Default for CacheMetadata {
363    fn default() -> Self {
364        let now = current_timestamp();
365        Self {
366            created_at: now,
367            updated_at: now,
368            hits: 0,
369            misses: 0,
370        }
371    }
372}
373
374/// Cache performance statistics
375#[derive(Debug, Clone)]
376pub struct CacheStatistics {
377    pub total_files: usize,
378    pub cache_hits: u64,
379    pub cache_misses: u64,
380    pub hit_rate: f64,
381    pub created_at: u64,
382    pub updated_at: u64,
383}
384
385impl CacheStatistics {
386    /// Format statistics for display
387    pub fn format_display(&self) -> String {
388        format!(
389            "Cache: {} files, {:.1}% hit rate ({} hits, {} misses)",
390            self.total_files,
391            self.hit_rate * 100.0,
392            self.cache_hits,
393            self.cache_misses
394        )
395    }
396}
397
398/// Get current timestamp as seconds since Unix epoch
399fn current_timestamp() -> u64 {
400    SystemTime::now()
401        .duration_since(UNIX_EPOCH)
402        .expect(
403            "System time should be after Unix epoch - this indicates a serious system clock issue",
404        )
405        .as_secs()
406}
407
408impl FileCache {
409    /// Self-validation - Infrastructure validates its own coherence
410    ///
411    /// Architecture Principle: Self-validating infrastructure - Components ensure their own correctness
412    /// - Cache validates its invariants during normal operation
413    /// - File hash verification maintains cache coherence
414    /// - Metadata consistency checked on each operation
415    /// - No external test dependencies - system is self-aware
416    pub fn validate_cache_coherence(&self) -> GuardianResult<()> {
417        // Validate cache metadata invariants
418        if self.data.metadata.hits + self.data.metadata.misses > 0 {
419            let calculated_hit_rate = (self.data.metadata.hits as f64)
420                / ((self.data.metadata.hits + self.data.metadata.misses) as f64);
421
422            if !(0.0..=1.0).contains(&calculated_hit_rate) {
423                return Err(GuardianError::cache(
424                    "Cache hit rate coherence violation - cache integrity compromised".to_string(),
425                ));
426            }
427        }
428
429        // Validate temporal coherence - created_at should be <= updated_at
430        if self.data.metadata.created_at > self.data.metadata.updated_at {
431            return Err(GuardianError::cache(
432                "Temporal coherence violation - cache timeline is inconsistent".to_string(),
433            ));
434        }
435
436        // Validate file entry coherence for existing files
437        for (file_path, entry) in &self.data.files {
438            if file_path.exists() {
439                // Verify file metadata coherence if file still exists
440                if let Ok(metadata) = std::fs::metadata(file_path) {
441                    if entry.size != metadata.len() {
442                        tracing::warn!(
443                            "File size mismatch detected for {}: cached {} vs actual {}",
444                            file_path.display(),
445                            entry.size,
446                            metadata.len()
447                        );
448                    }
449                }
450            }
451        }
452
453        tracing::debug!(
454            "Cache coherence validated: {} files, {:.1}% hit rate",
455            self.data.files.len(),
456            if self.data.metadata.hits + self.data.metadata.misses > 0 {
457                (self.data.metadata.hits as f64)
458                    / ((self.data.metadata.hits + self.data.metadata.misses) as f64)
459                    * 100.0
460            } else {
461                0.0
462            }
463        );
464
465        Ok(())
466    }
467
468    /// Integrity verification during normal operations
469    ///
470    /// Architecture Principle: Continuous self-monitoring - Infrastructure monitors its own health
471    fn verify_integrity_on_operation(&self) -> GuardianResult<()> {
472        // Quick integrity checks that run during normal operations
473        if self.data.version == 0 {
474            return Err(GuardianError::cache(
475                "Cache version coherence violation - invalid version state".to_string(),
476            ));
477        }
478
479        // Ensure statistics are coherent
480        if self.data.metadata.hits > u64::MAX / 2 || self.data.metadata.misses > u64::MAX / 2 {
481            tracing::warn!("Cache statistics approaching overflow - cache may need reset");
482        }
483
484        Ok(())
485    }
486}