1use crate::domain::violations::{GuardianError, GuardianResult};
9use serde::{Deserialize, Serialize};
10use sha2::{Digest, Sha256};
11use std::collections::HashMap;
12use std::fs::{self, File};
13use std::io::prelude::*;
14use std::path::{Path, PathBuf};
15use std::time::{SystemTime, UNIX_EPOCH};
16
17#[derive(Debug)]
19pub struct FileCache {
20 cache_path: PathBuf,
22 data: CacheData,
24 dirty: bool,
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize, Default)]
30struct CacheData {
31 version: u32,
33 config_fingerprint: Option<String>,
35 files: HashMap<PathBuf, FileEntry>,
37 metadata: CacheMetadata,
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
43struct CacheMetadata {
44 created_at: u64,
46 updated_at: u64,
48 hits: u64,
50 misses: u64,
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct FileEntry {
57 pub content_hash: String,
59 pub size: u64,
61 pub modified_at: u64,
63 pub violation_count: usize,
65 pub analyzed_at: u64,
67 pub config_fingerprint: String,
69}
70
71impl FileCache {
72 pub fn new<P: AsRef<Path>>(cache_path: P) -> Self {
74 Self {
75 cache_path: cache_path.as_ref().to_path_buf(),
76 data: CacheData::default(),
77 dirty: false,
78 }
79 }
80
81 pub fn load(&mut self) -> GuardianResult<()> {
83 if self.cache_path.exists() {
84 let content = fs::read_to_string(&self.cache_path)
85 .map_err(|e| GuardianError::cache(format!("Failed to read cache file: {e}")))?;
86
87 self.data = serde_json::from_str(&content)
88 .map_err(|e| GuardianError::cache(format!("Failed to parse cache file: {e}")))?;
89
90 self.migrate_if_needed()?;
92 } else {
93 self.data = CacheData {
95 version: 1,
96 config_fingerprint: None,
97 files: HashMap::new(),
98 metadata: CacheMetadata {
99 created_at: current_timestamp(),
100 updated_at: current_timestamp(),
101 hits: 0,
102 misses: 0,
103 },
104 };
105 self.dirty = true;
106 }
107
108 self.verify_integrity_on_operation()?;
110 Ok(())
111 }
112
113 pub fn save(&mut self) -> GuardianResult<()> {
115 if !self.dirty {
116 return Ok(());
117 }
118
119 self.verify_integrity_on_operation()?;
121
122 self.data.metadata.updated_at = current_timestamp();
124
125 if let Some(parent) = self.cache_path.parent() {
127 fs::create_dir_all(parent).map_err(|e| {
128 GuardianError::cache(format!("Failed to create cache directory: {e}"))
129 })?;
130 }
131
132 let content = serde_json::to_string_pretty(&self.data)
134 .map_err(|e| GuardianError::cache(format!("Failed to serialize cache: {e}")))?;
135
136 fs::write(&self.cache_path, content)
137 .map_err(|e| GuardianError::cache(format!("Failed to write cache file: {e}")))?;
138
139 self.dirty = false;
140 Ok(())
141 }
142
143 pub fn needs_analysis<P: AsRef<Path>>(
145 &mut self,
146 file_path: P,
147 config_fingerprint: &str,
148 ) -> GuardianResult<bool> {
149 let file_path = file_path.as_ref();
150
151 let metadata = fs::metadata(file_path).map_err(|e| {
153 GuardianError::cache(format!(
154 "Failed to get file metadata for {}: {}",
155 file_path.display(),
156 e
157 ))
158 })?;
159
160 let current_size = metadata.len();
161 let current_modified = metadata
162 .modified()
163 .map_err(|e| GuardianError::cache(format!("Failed to get modification time: {e}")))?
164 .duration_since(UNIX_EPOCH)
165 .map_err(|e| {
166 GuardianError::cache(format!("Invalid system time before Unix epoch: {e}"))
167 })?
168 .as_secs();
169
170 if let Some(entry) = self.data.files.get(file_path) {
172 if entry.size != current_size || entry.modified_at != current_modified {
174 self.data.metadata.misses += 1;
175 self.dirty = true;
176 return Ok(true);
177 }
178
179 if entry.config_fingerprint != config_fingerprint {
181 self.data.metadata.misses += 1;
182 self.dirty = true;
183 return Ok(true);
184 }
185
186 let current_hash = self.calculate_file_hash(file_path)?;
188 if entry.content_hash != current_hash {
189 self.data.metadata.misses += 1;
190 self.dirty = true;
191 return Ok(true);
192 }
193
194 self.data.metadata.hits += 1;
196 self.dirty = true;
197 Ok(false)
198 } else {
199 self.data.metadata.misses += 1;
201 self.dirty = true;
202 Ok(true)
203 }
204 }
205
206 pub fn update_entry<P: AsRef<Path>>(
208 &mut self,
209 file_path: P,
210 violation_count: usize,
211 config_fingerprint: &str,
212 ) -> GuardianResult<()> {
213 let file_path = file_path.as_ref();
214
215 let metadata = fs::metadata(file_path)
217 .map_err(|e| GuardianError::cache(format!("Failed to get file metadata: {e}")))?;
218
219 let content_hash = self.calculate_file_hash(file_path)?;
220
221 let entry = FileEntry {
222 content_hash,
223 size: metadata.len(),
224 modified_at: metadata
225 .modified()
226 .map_err(|e| GuardianError::cache(format!("Failed to get modification time: {e}")))?
227 .duration_since(UNIX_EPOCH)
228 .map_err(|e| {
229 GuardianError::cache(format!("Invalid system time before Unix epoch: {e}"))
230 })?
231 .as_secs(),
232 violation_count,
233 analyzed_at: current_timestamp(),
234 config_fingerprint: config_fingerprint.to_string(),
235 };
236
237 self.data.files.insert(file_path.to_path_buf(), entry);
238 self.dirty = true;
239
240 Ok(())
241 }
242
243 pub fn statistics(&self) -> CacheStatistics {
245 CacheStatistics {
246 total_files: self.data.files.len(),
247 cache_hits: self.data.metadata.hits,
248 cache_misses: self.data.metadata.misses,
249 hit_rate: if self.data.metadata.hits + self.data.metadata.misses > 0 {
250 (self.data.metadata.hits as f64)
251 / ((self.data.metadata.hits + self.data.metadata.misses) as f64)
252 } else {
253 0.0
254 },
255 created_at: self.data.metadata.created_at,
256 updated_at: self.data.metadata.updated_at,
257 }
258 }
259
260 pub fn clear(&mut self) -> GuardianResult<()> {
262 self.data.files.clear();
263 self.data.metadata.hits = 0;
264 self.data.metadata.misses = 0;
265 self.data.metadata.updated_at = current_timestamp();
266 self.dirty = true;
267
268 if self.cache_path.exists() {
270 fs::remove_file(&self.cache_path)
271 .map_err(|e| GuardianError::cache(format!("Failed to remove cache file: {e}")))?;
272 }
273
274 Ok(())
275 }
276
277 pub fn cleanup(&mut self) -> GuardianResult<usize> {
279 let mut removed = 0;
280 let mut to_remove = Vec::new();
281
282 for file_path in self.data.files.keys() {
283 if !file_path.exists() {
284 to_remove.push(file_path.clone());
285 }
286 }
287
288 for file_path in to_remove {
289 self.data.files.remove(&file_path);
290 removed += 1;
291 }
292
293 if removed > 0 {
294 self.dirty = true;
295 }
296
297 Ok(removed)
298 }
299
300 pub fn set_config_fingerprint(&mut self, fingerprint: String) {
302 if self.data.config_fingerprint.as_ref() != Some(&fingerprint) {
303 self.data.config_fingerprint = Some(fingerprint);
304 self.dirty = true;
305 }
306 }
307
308 fn calculate_file_hash<P: AsRef<Path>>(&self, file_path: P) -> GuardianResult<String> {
310 let mut file = File::open(&file_path)
311 .map_err(|e| GuardianError::cache(format!("Failed to open file for hashing: {e}")))?;
312
313 let mut hasher = Sha256::new();
314 let mut buffer = [0; 8192];
315
316 loop {
317 let bytes_read = file.read(&mut buffer).map_err(|e| {
318 GuardianError::cache(format!("Failed to read file for hashing: {e}"))
319 })?;
320
321 if bytes_read == 0 {
322 break;
323 }
324
325 hasher.update(&buffer[..bytes_read]);
326 }
327
328 Ok(format!("{:x}", hasher.finalize()))
329 }
330
331 fn migrate_if_needed(&mut self) -> GuardianResult<()> {
333 const CURRENT_VERSION: u32 = 1;
334
335 if self.data.version < CURRENT_VERSION {
336 tracing::info!(
337 "Migrating cache from version {} to {}",
338 self.data.version,
339 CURRENT_VERSION
340 );
341
342 match self.data.version {
343 0 => {
344 self.data.version = 1;
347 self.dirty = true;
348 }
349 _ => {
350 return Err(GuardianError::cache(format!(
351 "Unsupported cache version: {}. Please delete the cache file.",
352 self.data.version
353 )));
354 }
355 }
356 }
357
358 Ok(())
359 }
360}
361
362impl Default for CacheMetadata {
363 fn default() -> Self {
364 let now = current_timestamp();
365 Self {
366 created_at: now,
367 updated_at: now,
368 hits: 0,
369 misses: 0,
370 }
371 }
372}
373
374#[derive(Debug, Clone)]
376pub struct CacheStatistics {
377 pub total_files: usize,
378 pub cache_hits: u64,
379 pub cache_misses: u64,
380 pub hit_rate: f64,
381 pub created_at: u64,
382 pub updated_at: u64,
383}
384
385impl CacheStatistics {
386 pub fn format_display(&self) -> String {
388 format!(
389 "Cache: {} files, {:.1}% hit rate ({} hits, {} misses)",
390 self.total_files,
391 self.hit_rate * 100.0,
392 self.cache_hits,
393 self.cache_misses
394 )
395 }
396}
397
398fn current_timestamp() -> u64 {
400 SystemTime::now()
401 .duration_since(UNIX_EPOCH)
402 .expect(
403 "System time should be after Unix epoch - this indicates a serious system clock issue",
404 )
405 .as_secs()
406}
407
408impl FileCache {
409 pub fn validate_cache_coherence(&self) -> GuardianResult<()> {
417 if self.data.metadata.hits + self.data.metadata.misses > 0 {
419 let calculated_hit_rate = (self.data.metadata.hits as f64)
420 / ((self.data.metadata.hits + self.data.metadata.misses) as f64);
421
422 if !(0.0..=1.0).contains(&calculated_hit_rate) {
423 return Err(GuardianError::cache(
424 "Cache hit rate coherence violation - cache integrity compromised".to_string(),
425 ));
426 }
427 }
428
429 if self.data.metadata.created_at > self.data.metadata.updated_at {
431 return Err(GuardianError::cache(
432 "Temporal coherence violation - cache timeline is inconsistent".to_string(),
433 ));
434 }
435
436 for (file_path, entry) in &self.data.files {
438 if file_path.exists() {
439 if let Ok(metadata) = std::fs::metadata(file_path) {
441 if entry.size != metadata.len() {
442 tracing::warn!(
443 "File size mismatch detected for {}: cached {} vs actual {}",
444 file_path.display(),
445 entry.size,
446 metadata.len()
447 );
448 }
449 }
450 }
451 }
452
453 tracing::debug!(
454 "Cache coherence validated: {} files, {:.1}% hit rate",
455 self.data.files.len(),
456 if self.data.metadata.hits + self.data.metadata.misses > 0 {
457 (self.data.metadata.hits as f64)
458 / ((self.data.metadata.hits + self.data.metadata.misses) as f64)
459 * 100.0
460 } else {
461 0.0
462 }
463 );
464
465 Ok(())
466 }
467
468 fn verify_integrity_on_operation(&self) -> GuardianResult<()> {
472 if self.data.version == 0 {
474 return Err(GuardianError::cache(
475 "Cache version coherence violation - invalid version state".to_string(),
476 ));
477 }
478
479 if self.data.metadata.hits > u64::MAX / 2 || self.data.metadata.misses > u64::MAX / 2 {
481 tracing::warn!("Cache statistics approaching overflow - cache may need reset");
482 }
483
484 Ok(())
485 }
486}