1use anyhow::{Context, Result};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use std::fs;
5use std::path::{Path, PathBuf};
6use std::time::{SystemTime, UNIX_EPOCH};
7
8#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct DirtyInfo {
11 pub timestamp: u64,
13 pub expected_files: Option<usize>,
15}
16
17impl DirtyInfo {
18 pub fn new() -> Self {
20 let timestamp = SystemTime::now()
21 .duration_since(UNIX_EPOCH)
22 .map(|d| d.as_secs())
23 .unwrap_or(0);
24 Self {
25 timestamp,
26 expected_files: None,
27 }
28 }
29
30 pub fn with_expected_files(expected_files: usize) -> Self {
32 let mut info = Self::new();
33 info.expected_files = Some(expected_files);
34 info
35 }
36
37 pub fn is_stale(&self, max_age_secs: u64) -> bool {
39 let now = SystemTime::now()
40 .duration_since(UNIX_EPOCH)
41 .map(|d| d.as_secs())
42 .unwrap_or(0);
43 now.saturating_sub(self.timestamp) >= max_age_secs
44 }
45
46 pub fn age_secs(&self) -> u64 {
48 let now = SystemTime::now()
49 .duration_since(UNIX_EPOCH)
50 .map(|d| d.as_secs())
51 .unwrap_or(0);
52 now.saturating_sub(self.timestamp)
53 }
54}
55
56impl Default for DirtyInfo {
57 fn default() -> Self {
58 Self::new()
59 }
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize, Default)]
64pub struct HashCache {
65 pub roots: HashMap<String, HashMap<String, String>>,
67 #[serde(default)]
70 pub dirty_roots: HashMap<String, DirtyInfo>,
71}
72
73#[derive(Debug, Deserialize)]
75struct LegacyHashCache {
76 roots: HashMap<String, HashMap<String, String>>,
77 #[serde(default)]
78 dirty_roots: std::collections::HashSet<String>,
79}
80
81impl HashCache {
82 pub fn load(cache_path: &Path) -> Result<Self> {
85 if !cache_path.exists() {
86 tracing::debug!("Cache file not found, starting with empty cache");
87 return Ok(Self::default());
88 }
89
90 let content = fs::read_to_string(cache_path).context("Failed to read cache file")?;
91
92 if let Ok(cache) = serde_json::from_str::<HashCache>(&content) {
94 tracing::info!("Loaded cache with {} indexed roots", cache.roots.len());
95 return Ok(cache);
96 }
97
98 if let Ok(legacy) = serde_json::from_str::<LegacyHashCache>(&content) {
100 tracing::info!(
101 "Migrating cache from legacy format ({} roots, {} dirty roots)",
102 legacy.roots.len(),
103 legacy.dirty_roots.len()
104 );
105
106 let dirty_roots: HashMap<String, DirtyInfo> = legacy
108 .dirty_roots
109 .into_iter()
110 .map(|root| (root, DirtyInfo::new()))
111 .collect();
112
113 let cache = HashCache {
114 roots: legacy.roots,
115 dirty_roots,
116 };
117
118 if let Err(e) = cache.save(cache_path) {
120 tracing::warn!("Failed to save migrated cache: {}", e);
121 } else {
122 tracing::info!("Successfully migrated cache to new format");
123 }
124
125 return Ok(cache);
126 }
127
128 anyhow::bail!("Failed to parse cache file as either new or legacy format")
130 }
131
132 pub fn save(&self, cache_path: &Path) -> Result<()> {
134 if let Some(parent) = cache_path.parent() {
136 fs::create_dir_all(parent).context("Failed to create cache directory")?;
137 }
138
139 let content = serde_json::to_string_pretty(self).context("Failed to serialize cache")?;
140
141 fs::write(cache_path, content).context("Failed to write cache file")?;
142
143 tracing::debug!("Saved cache to {:?}", cache_path);
144 Ok(())
145 }
146
147 pub fn get_root(&self, root: &str) -> Option<&HashMap<String, String>> {
149 self.roots.get(root)
150 }
151
152 pub fn update_root(&mut self, root: String, hashes: HashMap<String, String>) {
154 self.roots.insert(root, hashes);
155 }
156
157 pub fn remove_root(&mut self, root: &str) {
159 self.roots.remove(root);
160 self.dirty_roots.remove(root);
161 }
162
163 pub fn mark_dirty(&mut self, root: &str) {
166 self.dirty_roots.insert(root.to_string(), DirtyInfo::new());
167 }
168
169 pub fn mark_dirty_with_info(&mut self, root: &str, expected_files: usize) {
171 self.dirty_roots.insert(
172 root.to_string(),
173 DirtyInfo::with_expected_files(expected_files),
174 );
175 }
176
177 pub fn clear_dirty(&mut self, root: &str) {
180 self.dirty_roots.remove(root);
181 }
182
183 pub fn is_dirty(&self, root: &str) -> bool {
185 self.dirty_roots.contains_key(root)
186 }
187
188 pub fn get_dirty_info(&self, root: &str) -> Option<&DirtyInfo> {
190 self.dirty_roots.get(root)
191 }
192
193 pub fn get_dirty_roots(&self) -> &HashMap<String, DirtyInfo> {
195 &self.dirty_roots
196 }
197
198 pub fn has_dirty_roots(&self) -> bool {
200 !self.dirty_roots.is_empty()
201 }
202
203 pub fn is_dirty_stale(&self, root: &str, max_age_secs: u64) -> bool {
206 self.dirty_roots
207 .get(root)
208 .is_some_and(|info| info.is_stale(max_age_secs))
209 }
210
211 pub fn dirty_age_secs(&self, root: &str) -> Option<u64> {
213 self.dirty_roots.get(root).map(|info| info.age_secs())
214 }
215
216 pub fn clear_stale_dirty_flags(&mut self, max_age_secs: u64) -> usize {
219 let stale_roots: Vec<String> = self
220 .dirty_roots
221 .iter()
222 .filter(|(_, info)| info.is_stale(max_age_secs))
223 .map(|(root, _)| root.clone())
224 .collect();
225
226 let count = stale_roots.len();
227 for root in stale_roots {
228 tracing::info!(
229 "Clearing stale dirty flag for '{}' (age: {} seconds)",
230 root,
231 self.dirty_roots
232 .get(&root)
233 .map(|i| i.age_secs())
234 .unwrap_or(0)
235 );
236 self.dirty_roots.remove(&root);
237 }
238 count
239 }
240
241 pub fn default_path() -> PathBuf {
243 crate::paths::PlatformPaths::default_hash_cache_path()
244 }
245}
246
247#[cfg(test)]
248mod tests {
249 use super::*;
250 use tempfile::NamedTempFile;
251
252 #[test]
253 fn test_cache_serialization() {
254 let mut cache = HashCache::default();
255 let mut hashes = HashMap::new();
256 hashes.insert("file1.rs".to_string(), "hash1".to_string());
257 hashes.insert("file2.rs".to_string(), "hash2".to_string());
258 cache.update_root("/test/path".to_string(), hashes);
259
260 let json = serde_json::to_string(&cache).unwrap();
261 let deserialized: HashCache = serde_json::from_str(&json).unwrap();
262
263 assert_eq!(cache.roots.len(), deserialized.roots.len());
264 assert_eq!(
265 cache.roots.get("/test/path"),
266 deserialized.roots.get("/test/path")
267 );
268 }
269
270 #[test]
271 fn test_cache_save_load() {
272 let temp_file = NamedTempFile::new().unwrap();
273 let cache_path = temp_file.path().to_path_buf();
274
275 let mut cache = HashCache::default();
277 let mut hashes = HashMap::new();
278 hashes.insert("file1.rs".to_string(), "hash1".to_string());
279 cache.update_root("/test/path".to_string(), hashes);
280
281 cache.save(&cache_path).unwrap();
282
283 let loaded = HashCache::load(&cache_path).unwrap();
285 assert_eq!(cache.roots.len(), loaded.roots.len());
286 assert_eq!(
287 cache.roots.get("/test/path"),
288 loaded.roots.get("/test/path")
289 );
290 }
291
292 #[test]
293 fn test_cache_operations() {
294 let mut cache = HashCache::default();
295
296 let mut hashes = HashMap::new();
298 hashes.insert("file1.rs".to_string(), "hash1".to_string());
299 cache.update_root("/test/path".to_string(), hashes);
300
301 assert!(cache.get_root("/test/path").is_some());
303 assert!(cache.get_root("/nonexistent").is_none());
304
305 cache.remove_root("/test/path");
307 assert!(cache.get_root("/test/path").is_none());
308 }
309
310 #[test]
311 fn test_load_nonexistent_cache() {
312 let result = HashCache::load(Path::new("/nonexistent/path/cache.json"));
313 assert!(result.is_ok());
314 assert_eq!(result.unwrap().roots.len(), 0);
315 }
316
317 #[test]
318 fn test_load_corrupted_cache() {
319 let temp_file = NamedTempFile::new().unwrap();
320 let cache_path = temp_file.path().to_path_buf();
321
322 fs::write(&cache_path, "{ invalid json }").unwrap();
324
325 let result = HashCache::load(&cache_path);
326 assert!(result.is_err());
327 }
328
329 #[test]
330 fn test_save_creates_parent_directory() {
331 let temp_dir = tempfile::tempdir().unwrap();
332 let cache_path = temp_dir.path().join("subdir").join("cache.json");
333
334 let cache = HashCache::default();
335 cache.save(&cache_path).unwrap();
336
337 assert!(cache_path.exists());
338 }
339
340 #[test]
341 fn test_default_path() {
342 let path = HashCache::default_path();
343 assert!(path.to_string_lossy().contains("project-rag"));
344 assert!(path.to_string_lossy().contains("hash_cache.json"));
345 }
346
347 #[test]
348 fn test_update_root_replaces_existing() {
349 let mut cache = HashCache::default();
350
351 let mut hashes1 = HashMap::new();
353 hashes1.insert("file1.rs".to_string(), "hash1".to_string());
354 cache.update_root("/test/path".to_string(), hashes1);
355
356 let mut hashes2 = HashMap::new();
358 hashes2.insert("file2.rs".to_string(), "hash2".to_string());
359 cache.update_root("/test/path".to_string(), hashes2);
360
361 let root_hashes = cache.get_root("/test/path").unwrap();
362 assert_eq!(root_hashes.len(), 1);
363 assert!(root_hashes.contains_key("file2.rs"));
364 assert!(!root_hashes.contains_key("file1.rs"));
365 }
366
367 #[test]
368 fn test_multiple_roots() {
369 let mut cache = HashCache::default();
370
371 let mut hashes1 = HashMap::new();
372 hashes1.insert("file1.rs".to_string(), "hash1".to_string());
373 cache.update_root("/path1".to_string(), hashes1);
374
375 let mut hashes2 = HashMap::new();
376 hashes2.insert("file2.rs".to_string(), "hash2".to_string());
377 cache.update_root("/path2".to_string(), hashes2);
378
379 assert_eq!(cache.roots.len(), 2);
380 assert!(cache.get_root("/path1").is_some());
381 assert!(cache.get_root("/path2").is_some());
382 }
383
384 #[test]
385 fn test_empty_cache_operations() {
386 let cache = HashCache::default();
387 assert!(cache.get_root("/any/path").is_none());
388 assert_eq!(cache.roots.len(), 0);
389 }
390
391 #[test]
392 fn test_remove_root_nonexistent() {
393 let mut cache = HashCache::default();
394 cache.remove_root("/nonexistent");
395 assert_eq!(cache.roots.len(), 0);
397 }
398
399 #[test]
400 fn test_dirty_flag_operations() {
401 let mut cache = HashCache::default();
402
403 assert!(!cache.is_dirty("/test/path"));
405 assert!(!cache.has_dirty_roots());
406 assert!(cache.get_dirty_roots().is_empty());
407
408 cache.mark_dirty("/test/path");
410 assert!(cache.is_dirty("/test/path"));
411 assert!(cache.has_dirty_roots());
412 assert!(cache.get_dirty_roots().contains_key("/test/path"));
413
414 let info = cache.get_dirty_info("/test/path").unwrap();
416 assert!(info.timestamp > 0);
417 assert!(info.expected_files.is_none());
418
419 cache.clear_dirty("/test/path");
421 assert!(!cache.is_dirty("/test/path"));
422 assert!(!cache.has_dirty_roots());
423 }
424
425 #[test]
426 fn test_dirty_flag_with_expected_files() {
427 let mut cache = HashCache::default();
428
429 cache.mark_dirty_with_info("/test/path", 100);
430 assert!(cache.is_dirty("/test/path"));
431
432 let info = cache.get_dirty_info("/test/path").unwrap();
433 assert_eq!(info.expected_files, Some(100));
434 }
435
436 #[test]
437 fn test_dirty_flag_staleness() {
438 let mut cache = HashCache::default();
439
440 cache.mark_dirty("/test/path");
441
442 assert!(!cache.is_dirty_stale("/test/path", 3600));
444
445 let age = cache.dirty_age_secs("/test/path").unwrap();
447 assert!(age < 5); assert!(cache.is_dirty_stale("/test/path", 0));
451 }
452
453 #[test]
454 fn test_clear_stale_dirty_flags() {
455 let mut cache = HashCache::default();
456
457 cache.mark_dirty("/path1");
458 cache.mark_dirty("/path2");
459
460 let cleared = cache.clear_stale_dirty_flags(0);
462 assert_eq!(cleared, 2);
463 assert!(!cache.has_dirty_roots());
464 }
465
466 #[test]
467 fn test_dirty_flag_persistence() {
468 let temp_file = NamedTempFile::new().unwrap();
469 let cache_path = temp_file.path().to_path_buf();
470
471 let mut cache = HashCache::default();
473 cache.mark_dirty("/test/path");
474 cache.save(&cache_path).unwrap();
475
476 let loaded = HashCache::load(&cache_path).unwrap();
478 assert!(loaded.is_dirty("/test/path"));
479 assert!(loaded.has_dirty_roots());
480 }
481
482 #[test]
483 fn test_remove_root_clears_dirty() {
484 let mut cache = HashCache::default();
485
486 let mut hashes = HashMap::new();
488 hashes.insert("file1.rs".to_string(), "hash1".to_string());
489 cache.update_root("/test/path".to_string(), hashes);
490 cache.mark_dirty("/test/path");
491
492 assert!(cache.is_dirty("/test/path"));
493 assert!(cache.get_root("/test/path").is_some());
494
495 cache.remove_root("/test/path");
497 assert!(!cache.is_dirty("/test/path"));
498 assert!(cache.get_root("/test/path").is_none());
499 }
500
501 #[test]
502 fn test_multiple_dirty_roots() {
503 let mut cache = HashCache::default();
504
505 cache.mark_dirty("/path1");
506 cache.mark_dirty("/path2");
507 cache.mark_dirty("/path3");
508
509 assert!(cache.is_dirty("/path1"));
510 assert!(cache.is_dirty("/path2"));
511 assert!(cache.is_dirty("/path3"));
512 assert_eq!(cache.get_dirty_roots().len(), 3);
513
514 cache.clear_dirty("/path2");
515 assert!(cache.is_dirty("/path1"));
516 assert!(!cache.is_dirty("/path2"));
517 assert!(cache.is_dirty("/path3"));
518 assert_eq!(cache.get_dirty_roots().len(), 2);
519 }
520
521 #[test]
522 fn test_dirty_flag_idempotent() {
523 let mut cache = HashCache::default();
524
525 cache.mark_dirty("/test/path");
527 cache.mark_dirty("/test/path");
528 cache.mark_dirty("/test/path");
529 assert_eq!(cache.get_dirty_roots().len(), 1);
530
531 cache.clear_dirty("/test/path");
533 cache.clear_dirty("/test/path");
534 assert!(!cache.is_dirty("/test/path"));
535 }
536
537 #[test]
538 fn test_dirty_flag_with_old_cache_format() {
539 let temp_file = NamedTempFile::new().unwrap();
541 let cache_path = temp_file.path().to_path_buf();
542
543 let old_format = r#"{"roots":{"/test/path":{"file1.rs":"hash1"}}}"#;
545 fs::write(&cache_path, old_format).unwrap();
546
547 let loaded = HashCache::load(&cache_path).unwrap();
549 assert!(loaded.get_root("/test/path").is_some());
550 assert!(!loaded.has_dirty_roots());
551 assert!(!loaded.is_dirty("/test/path"));
552 }
553
554 #[test]
555 fn test_dirty_flag_migration_from_hashset() {
556 let temp_file = NamedTempFile::new().unwrap();
559 let cache_path = temp_file.path().to_path_buf();
560
561 let old_format =
563 r#"{"roots":{"/test/path":{"file1.rs":"hash1"}},"dirty_roots":["/test/path"]}"#;
564 fs::write(&cache_path, old_format).unwrap();
565
566 let loaded = HashCache::load(&cache_path).unwrap();
568
569 assert!(loaded.get_root("/test/path").is_some());
571 assert!(loaded.is_dirty("/test/path"));
572 assert!(loaded.has_dirty_roots());
573
574 let info = loaded.get_dirty_info("/test/path").unwrap();
576 assert!(info.timestamp > 0);
577
578 let reloaded = HashCache::load(&cache_path).unwrap();
580 assert!(reloaded.is_dirty("/test/path"));
581 }
582
583 #[test]
584 fn test_dirty_info_default() {
585 let info = DirtyInfo::default();
586 assert!(info.timestamp > 0);
587 assert!(info.expected_files.is_none());
588 }
589
590 #[test]
591 fn test_dirty_info_with_expected_files() {
592 let info = DirtyInfo::with_expected_files(50);
593 assert!(info.timestamp > 0);
594 assert_eq!(info.expected_files, Some(50));
595 }
596}