Skip to main content

ai_agent/utils/
file_state_cache.rs

1//! File state cache utilities
2//! Translated from /data/home/swei/claudecode/openclaudecode/src/utils/fileStateCache.ts
3
4use lru::LruCache;
5use std::path::Path;
6
7/// File state representing cached file content
8#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
9pub struct FileState {
10    pub content: String,
11    pub timestamp: u64,
12    #[serde(skip_serializing_if = "Option::is_none")]
13    pub offset: Option<u32>,
14    #[serde(skip_serializing_if = "Option::is_none")]
15    pub limit: Option<u32>,
16    /// True when this entry was populated by auto-injection (e.g. AI.md) and
17    /// the injected content did not match disk (stripped HTML comments, stripped
18    /// frontmatter, truncated MEMORY.md). The model has only seen a partial view;
19    /// Edit/Write must require an explicit Read first. `content` here holds the
20    /// RAW disk bytes (for getChangedFiles diffing), not what the model saw.
21    #[serde(skip_serializing_if = "Option::is_none")]
22    pub is_partial_view: Option<bool>,
23}
24
25/// Default max entries for read file state caches
26pub const READ_FILE_STATE_CACHE_SIZE: usize = 100;
27
28/// Default size limit for file state caches (25MB)
29/// This prevents unbounded memory growth from large file contents
30pub const DEFAULT_MAX_CACHE_SIZE_BYTES: usize = 25 * 1024 * 1024;
31
32/// A file state cache that normalizes all path keys before access.
33/// This ensures consistent cache hits regardless of whether callers pass
34/// relative vs absolute paths with redundant segments (e.g. /foo/../bar)
35/// or mixed path separators on Windows (/ vs \).
36pub struct FileStateCache {
37    cache: LruCache<String, FileState>,
38    max_size_bytes: usize,
39}
40
41impl FileStateCache {
42    /// Create a new FileStateCache with the given max entries and max size in bytes
43    pub fn new(max_entries: usize, max_size_bytes: usize) -> Self {
44        Self {
45            cache: LruCache::new(
46                std::num::NonZeroUsize::new(max_entries)
47                    .unwrap_or(std::num::NonZeroUsize::new(1).unwrap()),
48            ),
49            max_size_bytes,
50        }
51    }
52
53    /// Get a value from the cache
54    pub fn get(&mut self, key: &str) -> Option<FileState> {
55        let normalized = normalize_path(key);
56        self.cache.get(&normalized).cloned()
57    }
58
59    /// Set a value in the cache
60    pub fn set(&mut self, key: String, value: FileState) -> &mut Self {
61        let normalized = normalize_path(&key);
62        self.cache.push(normalized, value);
63        self
64    }
65
66    /// Check if the cache contains a key
67    pub fn contains(&mut self, key: &str) -> bool {
68        let normalized = normalize_path(key);
69        self.cache.contains(&normalized)
70    }
71
72    /// Delete a key from the cache
73    pub fn remove(&mut self, key: &str) -> Option<FileState> {
74        let normalized = normalize_path(key);
75        self.cache.pop(&normalized)
76    }
77
78    /// Clear all entries from the cache
79    pub fn clear(&mut self) {
80        self.cache.clear();
81    }
82
83    /// Get the current number of entries in the cache
84    pub fn len(&self) -> usize {
85        self.cache.len()
86    }
87
88    /// Check if the cache is empty
89    pub fn is_empty(&self) -> bool {
90        self.cache.is_empty()
91    }
92
93    /// Get the maximum number of entries
94    pub fn max_entries(&self) -> Option<usize> {
95        self.cache.cap().get().try_into().ok()
96    }
97
98    /// Get the maximum size in bytes
99    pub fn max_size(&self) -> usize {
100        self.max_size_bytes
101    }
102
103    /// Get an iterator over the cache entries
104    pub fn iter(&mut self) -> impl Iterator<Item = (&String, &FileState)> {
105        self.cache.iter()
106    }
107
108    /// Get an iterator over the cache keys
109    pub fn keys(&mut self) -> impl Iterator<Item = &String> {
110        self.cache.iter().map(|(k, _)| k)
111    }
112
113    /// Get an iterator over the cache entries as (key, value) pairs
114    pub fn entries(&mut self) -> impl Iterator<Item = (&String, &FileState)> {
115        self.cache.iter()
116    }
117}
118
119/// Normalize a file path for consistent cache keys
120fn normalize_path(path: &str) -> String {
121    // Use std::path to normalize the path
122    let path_obj = Path::new(path);
123    let components: Vec<String> = path_obj
124        .components()
125        .filter_map(|c| match c {
126            std::path::Component::Normal(s) => Some(s.to_string_lossy().to_string()),
127            std::path::Component::ParentDir => Some("..".to_string()),
128            _ => None,
129        })
130        .collect();
131
132    if components.is_empty() {
133        path.to_string()
134    } else {
135        components.join(std::path::MAIN_SEPARATOR_STR)
136    }
137}
138
139/// Factory function to create a size-limited FileStateCache.
140/// Uses LRU cache's built-in size-based eviction to prevent memory bloat.
141/// Note: Images are not cached (see FileReadTool) so size limit is mainly
142/// for large text files, notebooks, and other editable content.
143pub fn create_file_state_cache_with_size_limit(
144    max_entries: usize,
145    max_size_bytes: usize,
146) -> FileStateCache {
147    FileStateCache::new(max_entries, max_size_bytes)
148}
149
150/// Helper function to convert cache to object (used by compact.rs)
151pub fn cache_to_object(cache: &mut FileStateCache) -> std::collections::HashMap<String, FileState> {
152    cache.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
153}
154
155/// Helper function to get all keys from cache (used by several components)
156pub fn cache_keys(cache: &mut FileStateCache) -> Vec<String> {
157    cache.keys().cloned().collect()
158}
159
160/// Helper function to clone a FileStateCache
161/// Preserves size limit configuration from the source cache
162pub fn clone_file_state_cache(cache: &FileStateCache) -> FileStateCache {
163    let max_entries = cache.max_entries().unwrap_or(READ_FILE_STATE_CACHE_SIZE);
164    let max_size = cache.max_size();
165    FileStateCache::new(max_entries, max_size)
166}
167
168/// Merge two file state caches, with more recent entries (by timestamp) overriding older ones
169pub fn merge_file_state_caches(
170    first: &mut FileStateCache,
171    second: &mut FileStateCache,
172) -> FileStateCache {
173    let max_entries = first.max_entries().unwrap_or(READ_FILE_STATE_CACHE_SIZE);
174    let max_size = first.max_size();
175    let mut merged = FileStateCache::new(max_entries, max_size);
176
177    for (file_path, file_state) in first.entries() {
178        merged.set(file_path.clone(), file_state.clone());
179    }
180
181    for (file_path, file_state) in second.entries() {
182        if let Some(existing) = merged.get(file_path) {
183            // Only override if the new entry is more recent
184            if file_state.timestamp > existing.timestamp {
185                merged.set(file_path.clone(), file_state.clone());
186            }
187        } else {
188            merged.set(file_path.clone(), file_state.clone());
189        }
190    }
191    merged
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    #[test]
199    fn test_file_state_cache_basic() {
200        let mut cache = FileStateCache::new(10, 1000);
201
202        let state = FileState {
203            content: "hello".to_string(),
204            timestamp: 1000,
205            ..Default::default()
206        };
207
208        cache.set("test.txt".to_string(), state.clone());
209
210        let retrieved = cache.get("test.txt");
211        assert!(retrieved.is_some());
212        assert_eq!(retrieved.unwrap().content, "hello");
213    }
214
215    #[test]
216    fn test_file_state_cache_normalize_path() {
217        let mut cache = FileStateCache::new(10, 1000);
218
219        let state = FileState {
220            content: "hello".to_string(),
221            timestamp: 1000,
222            ..Default::default()
223        };
224
225        // Using normalized path should work
226        cache.set("test.txt".to_string(), state.clone());
227
228        assert!(cache.contains("test.txt"));
229    }
230
231    #[test]
232    fn test_read_file_state_cache_size() {
233        assert_eq!(READ_FILE_STATE_CACHE_SIZE, 100);
234    }
235
236    #[test]
237    fn test_default_max_cache_size() {
238        assert_eq!(DEFAULT_MAX_CACHE_SIZE_BYTES, 25 * 1024 * 1024);
239    }
240}