code_digest/core/
cache.rs

1//! File caching functionality for eliminating redundant I/O
2//!
3//! This module provides a thread-safe cache for file contents using `Arc<str>`
4//! for cheap cloning across threads.
5
6use anyhow::Result;
7use dashmap::DashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10
11/// Thread-safe file content cache
12pub struct FileCache {
13    cache: DashMap<PathBuf, Arc<str>>,
14}
15
16impl FileCache {
17    /// Create a new empty cache
18    pub fn new() -> Self {
19        FileCache { cache: DashMap::new() }
20    }
21
22    /// Get file content from cache or load from disk
23    pub fn get_or_load(&self, path: &Path) -> Result<Arc<str>> {
24        // Canonicalize path to avoid cache misses from different representations
25        let canonical_path = path.canonicalize()?;
26
27        // Check if already cached
28        if let Some(content) = self.cache.get(&canonical_path) {
29            return Ok(content.clone());
30        }
31
32        // Load from disk
33        let content = std::fs::read_to_string(&canonical_path)?;
34        let arc_content: Arc<str> = Arc::from(content.as_str());
35
36        // Store in cache
37        self.cache.insert(canonical_path, arc_content.clone());
38
39        Ok(arc_content)
40    }
41
42    /// Get cache statistics
43    pub fn stats(&self) -> CacheStats {
44        CacheStats { entries: self.cache.len() }
45    }
46}
47
48impl Default for FileCache {
49    fn default() -> Self {
50        Self::new()
51    }
52}
53
54/// Cache statistics
55#[derive(Debug, Clone)]
56pub struct CacheStats {
57    pub entries: usize,
58}
59
60#[cfg(test)]
61mod tests {
62    use super::*;
63    use std::fs;
64    use tempfile::TempDir;
65
66    #[test]
67    fn test_cache_hit_returns_same_content() {
68        let temp_dir = TempDir::new().unwrap();
69        let file_path = temp_dir.path().join("test.txt");
70        let content = "Hello, cache!";
71        fs::write(&file_path, content).unwrap();
72
73        let cache = FileCache::new();
74
75        // First access - cache miss
76        let content1 = cache.get_or_load(&file_path).unwrap();
77        assert_eq!(&*content1, content);
78
79        // Second access - cache hit
80        let content2 = cache.get_or_load(&file_path).unwrap();
81        assert_eq!(&*content2, content);
82
83        // Should be the same Arc
84        assert!(Arc::ptr_eq(&content1, &content2));
85    }
86
87    #[test]
88    fn test_cache_miss_loads_from_disk() {
89        let temp_dir = TempDir::new().unwrap();
90        let file_path = temp_dir.path().join("test.txt");
91        let content = "Content from disk";
92        fs::write(&file_path, content).unwrap();
93
94        let cache = FileCache::new();
95        let loaded = cache.get_or_load(&file_path).unwrap();
96
97        assert_eq!(&*loaded, content);
98        assert_eq!(cache.stats().entries, 1);
99    }
100
101    #[test]
102    fn test_non_existent_file_returns_error() {
103        let temp_dir = TempDir::new().unwrap();
104        let file_path = temp_dir.path().join("does_not_exist.txt");
105
106        let cache = FileCache::new();
107        let result = cache.get_or_load(&file_path);
108
109        assert!(result.is_err());
110        assert_eq!(cache.stats().entries, 0);
111    }
112
113    #[test]
114    fn test_canonicalized_paths() {
115        let temp_dir = TempDir::new().unwrap();
116        let file_path = temp_dir.path().join("test.txt");
117        fs::write(&file_path, "content").unwrap();
118
119        let cache = FileCache::new();
120
121        // Access with different path representations
122        let _content1 = cache.get_or_load(&file_path).unwrap();
123        let relative_path =
124            PathBuf::from(".").join(file_path.strip_prefix("/").unwrap_or(&file_path));
125
126        // This might fail on canonicalization, which is fine
127        if let Ok(content2) = cache.get_or_load(&relative_path) {
128            // If it succeeds, should still only have one entry
129            assert_eq!(cache.stats().entries, 1);
130            assert_eq!(&*content2, "content");
131        }
132    }
133
134    #[test]
135    fn test_concurrent_access() {
136        use std::sync::Arc as StdArc;
137        use std::thread;
138
139        let temp_dir = TempDir::new().unwrap();
140        let file_path = temp_dir.path().join("concurrent.txt");
141        fs::write(&file_path, "concurrent content").unwrap();
142
143        let cache = StdArc::new(FileCache::new());
144        let mut handles = vec![];
145
146        // Spawn multiple threads accessing the same file
147        for _ in 0..10 {
148            let cache_clone = cache.clone();
149            let path_clone = file_path.clone();
150
151            let handle = thread::spawn(move || {
152                let content = cache_clone.get_or_load(&path_clone).unwrap();
153                assert_eq!(&*content, "concurrent content");
154            });
155
156            handles.push(handle);
157        }
158
159        // Wait for all threads
160        for handle in handles {
161            handle.join().unwrap();
162        }
163
164        // Should only have one cache entry
165        assert_eq!(cache.stats().entries, 1);
166    }
167}