Skip to main content

context_builder/
cache.rs

1//! Cache management for context-builder.
2//!
3//! This module handles caching of project states to enable the auto-diff feature.
4//! It uses a hash of the project path and configuration to avoid cache collisions
5//! between different projects or configurations.
6
7use fs2::FileExt;
8
9use std::fs;
10use std::fs::File;
11
12use std::io::{Read, Write};
13use std::path::{Path, PathBuf};
14
15use crate::config::Config;
16use crate::state::ProjectState;
17
18/// Manages cache operations with file locking to prevent corruption
19pub struct CacheManager {
20    cache_dir: PathBuf,
21    project_hash: String,
22    config_hash: String,
23}
24
25impl CacheManager {
26    /// Create a new cache manager for the given project path and configuration
27    pub fn new(project_path: &Path, config: &Config) -> Self {
28        // Normalize the project path first for consistency
29        let normalized_project_path = Self::normalize_project_path(project_path);
30
31        let project_hash = Self::hash_path(&normalized_project_path);
32        let config_hash = Self::hash_config(config);
33
34        // Ensure cache directory exists relative to normalized project root
35        let cache_dir = normalized_project_path
36            .join(".context-builder")
37            .join("cache");
38        if !cache_dir.exists() {
39            let _ = fs::create_dir_all(&cache_dir);
40        }
41
42        let cache_manager = Self {
43            cache_dir,
44            project_hash,
45            config_hash,
46        };
47
48        // Migrate old cache format if present
49        cache_manager.migrate_old_cache();
50
51        cache_manager
52    }
53
54    /// Normalize project path for consistent hashing and cache directory creation
55    fn normalize_project_path(path: &Path) -> PathBuf {
56        // Always resolve to absolute path first
57        let absolute_path = if path.is_absolute() {
58            path.to_path_buf()
59        } else {
60            match std::env::current_dir() {
61                Ok(cwd) => cwd.join(path),
62                Err(_) => path.to_path_buf(),
63            }
64        };
65
66        // Try to canonicalize for consistency, but normalize the result
67        if let Ok(canonical) = absolute_path.canonicalize() {
68            Self::normalize_path_format(&canonical)
69        } else {
70            absolute_path
71        }
72    }
73
74    /// Generate a hash from the normalized project path
75    fn hash_path(path: &Path) -> String {
76        let path_str = path.to_string_lossy();
77        let hash = xxhash_rust::xxh3::xxh3_64(path_str.as_bytes());
78        format!("{:x}", hash)
79    }
80
81    /// Normalize path format to handle Windows UNC prefixes
82    fn normalize_path_format(path: &Path) -> PathBuf {
83        let path_str = path.to_string_lossy();
84
85        // Remove Windows UNC prefix if present
86        if cfg!(windows) && path_str.starts_with("\\\\?\\") {
87            PathBuf::from(&path_str[4..])
88        } else {
89            path.to_path_buf()
90        }
91    }
92
93    /// Generate a hash from the configuration
94    fn hash_config(config: &Config) -> String {
95        // Build a stable string representation of config for hashing.
96        // IMPORTANT: Must stay in sync with state.rs::compute_config_hash
97        let mut config_str = String::new();
98        if let Some(ref filters) = config.filter {
99            config_str.push_str(&filters.join(","));
100        }
101        config_str.push('|');
102        if let Some(ref ignores) = config.ignore {
103            config_str.push_str(&ignores.join(","));
104        }
105        config_str.push('|');
106        config_str.push_str(&format!(
107            "{:?}|{:?}|{:?}",
108            config.line_numbers, config.auto_diff, config.diff_context_lines
109        ));
110        let hash = xxhash_rust::xxh3::xxh3_64(config_str.as_bytes());
111        format!("{:x}", hash)
112    }
113
114    /// Get the cache file path for this specific project and configuration
115    fn get_cache_path(&self) -> PathBuf {
116        self.cache_dir.join(format!(
117            "state_{}_{}.json",
118            self.project_hash, self.config_hash
119        ))
120    }
121
122    /// Public helper primarily for debugging/tests to inspect the resolved cache path
123    pub fn debug_cache_file_path(&self) -> PathBuf {
124        self.get_cache_path()
125    }
126
127    /// Migrate old markdown-based cache files to new JSON format
128    fn migrate_old_cache(&self) {
129        let old_cache_patterns = ["last_canonical.md", "last_output.md", "current_output.md"];
130
131        for pattern in &old_cache_patterns {
132            let old_cache_path = self.cache_dir.join(pattern);
133            if old_cache_path.exists() {
134                eprintln!("Migrating old cache format: removing {}", pattern);
135                let _ = fs::remove_file(&old_cache_path);
136            }
137        }
138
139        // Also remove any files that look like timestamped outputs from old versions
140        if let Ok(entries) = fs::read_dir(&self.cache_dir) {
141            for entry in entries.flatten() {
142                let file_name = entry.file_name();
143                let name = file_name.to_string_lossy();
144                if name.ends_with(".md") && (name.contains("_20") || name.starts_with("output_")) {
145                    eprintln!("Migrating old cache format: removing {}", name);
146                    let _ = fs::remove_file(entry.path());
147                }
148            }
149        }
150    }
151
152    /// Read the cached project state with file locking
153    pub fn read_cache(&self) -> Result<Option<ProjectState>, Box<dyn std::error::Error>> {
154        let cache_path = self.get_cache_path();
155
156        if !cache_path.exists() {
157            return Ok(None);
158        }
159
160        let file = File::open(&cache_path)?;
161        // Acquire shared lock to prevent reading while writing
162        file.lock_shared()?;
163
164        let mut contents = String::new();
165        let mut file = std::io::BufReader::new(file);
166        file.read_to_string(&mut contents)?;
167
168        // Release lock
169        file.get_ref().unlock()?;
170
171        let state: ProjectState = serde_json::from_str(&contents)?;
172        Ok(Some(state))
173    }
174
175    /// Write the project state to cache with file locking
176    pub fn write_cache(&self, state: &ProjectState) -> Result<(), Box<dyn std::error::Error>> {
177        let cache_path = self.get_cache_path();
178
179        let file = std::fs::OpenOptions::new()
180            .write(true)
181            .create(true)
182            .truncate(false)
183            .open(&cache_path)?;
184        // Acquire exclusive lock BEFORE truncating to prevent TOCTOU races
185        file.lock_exclusive()?;
186        file.set_len(0)?;
187
188        let json = serde_json::to_string_pretty(state)?;
189        let mut file = std::io::BufWriter::new(file);
190        file.write_all(json.as_bytes())?;
191        file.flush()?;
192
193        // Release lock
194        file.get_ref().unlock()?;
195
196        Ok(())
197    }
198}
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203    use std::path::Path;
204    use tempfile::tempdir;
205
206    #[test]
207    fn test_hash_path() {
208        let path1 = Path::new("/project1");
209        let path2 = Path::new("/project2");
210
211        let hash1 = CacheManager::hash_path(path1);
212        let hash2 = CacheManager::hash_path(path2);
213
214        assert_ne!(
215            hash1, hash2,
216            "Different paths should produce different hashes"
217        );
218    }
219
220    #[test]
221    fn test_hash_config() {
222        let config1 = Config {
223            filter: Some(vec!["rs".to_string()]),
224            ignore: Some(vec!["target".to_string()]),
225            line_numbers: Some(true),
226            ..Default::default()
227        };
228
229        let config2 = Config {
230            filter: Some(vec!["md".to_string()]),
231            ignore: Some(vec!["target".to_string()]),
232            line_numbers: Some(true),
233            ..Default::default()
234        };
235
236        let hash1 = CacheManager::hash_config(&config1);
237        let hash2 = CacheManager::hash_config(&config2);
238
239        assert_ne!(
240            hash1, hash2,
241            "Different configs should produce different hashes"
242        );
243    }
244
245    #[test]
246    fn test_cache_operations() {
247        let dir = tempdir().unwrap();
248        let project_path = dir.path().join("test_project");
249        let _ = fs::create_dir(&project_path);
250
251        let config = Config::default();
252        let cache_manager = CacheManager::new(&project_path, &config);
253
254        use crate::state::ProjectMetadata;
255
256        let state = ProjectState {
257            timestamp: "2023-01-01T00:00:00Z".to_string(),
258            config_hash: "test_config_hash".to_string(),
259            files: std::collections::BTreeMap::new(),
260            metadata: ProjectMetadata {
261                project_name: "test".to_string(),
262                file_count: 0,
263                filters: vec![],
264                ignores: vec![],
265                line_numbers: false,
266            },
267        };
268
269        // Write cache
270        assert!(cache_manager.write_cache(&state).is_ok());
271
272        // Read cache
273        let cached_state = cache_manager.read_cache().unwrap();
274        assert!(cached_state.is_some());
275        assert_eq!(cached_state.unwrap().timestamp, state.timestamp);
276    }
277
278    #[test]
279    fn test_old_cache_migration() {
280        let dir = tempdir().unwrap();
281        let project_path = dir.path().join("test_project");
282        let _ = fs::create_dir(&project_path);
283
284        // Create cache directory with old cache files
285        let cache_dir = project_path.join(".context-builder").join("cache");
286        let _ = fs::create_dir_all(&cache_dir);
287
288        let old_files = [
289            "last_canonical.md",
290            "last_output.md",
291            "current_output.md",
292            "output_20230101120000.md",
293        ];
294
295        // Create old cache files
296        for file in &old_files {
297            let old_path = cache_dir.join(file);
298            let _ = fs::write(&old_path, "old cache content");
299            assert!(
300                old_path.exists(),
301                "Old cache file should exist before migration"
302            );
303        }
304
305        // Create cache manager (this should trigger migration)
306        let config = Config::default();
307        let _cache_manager = CacheManager::new(&project_path, &config);
308
309        // Verify old files are removed
310        for file in &old_files {
311            let old_path = cache_dir.join(file);
312            assert!(
313                !old_path.exists(),
314                "Old cache file {} should be removed after migration",
315                file
316            );
317        }
318    }
319
320    #[test]
321    fn test_cache_consistency_across_path_representations() {
322        let dir = tempdir().unwrap();
323        let project_path = dir.path().join("test_project");
324        let _ = fs::create_dir(&project_path);
325
326        let config = Config::default();
327
328        // Test different path representations that should resolve to the same cache
329        let mut paths_to_test = vec![
330            project_path.clone(),
331            project_path.canonicalize().unwrap_or(project_path.clone()),
332        ];
333
334        // If we can create a relative path, test that too
335        if let Ok(current_dir) = std::env::current_dir()
336            && let Ok(relative) = project_path.strip_prefix(&current_dir)
337        {
338            paths_to_test.push(relative.to_path_buf());
339        }
340
341        let mut cache_paths = Vec::new();
342        for path in &paths_to_test {
343            let cache_manager = CacheManager::new(path, &config);
344            cache_paths.push(cache_manager.get_cache_path());
345        }
346
347        // All cache paths should be identical
348        for (i, path1) in cache_paths.iter().enumerate() {
349            for (j, path2) in cache_paths.iter().enumerate() {
350                if i != j {
351                    assert_eq!(
352                        path1, path2,
353                        "Cache paths should be identical for different representations of the same project path"
354                    );
355                }
356            }
357        }
358    }
359
360    #[test]
361    fn test_normalize_path_format() {
362        // Test Windows UNC path normalization
363        if cfg!(windows) {
364            let unc_path = Path::new("\\\\?\\C:\\test\\path");
365            let normalized = CacheManager::normalize_path_format(unc_path);
366            assert_eq!(normalized, PathBuf::from("C:\\test\\path"));
367        }
368
369        // Test normal path (should remain unchanged)
370        let normal_path = Path::new("/normal/path");
371        let normalized = CacheManager::normalize_path_format(normal_path);
372        assert_eq!(normalized, normal_path);
373    }
374
375    #[test]
376    fn test_cache_read_nonexistent_file() {
377        let dir = tempdir().unwrap();
378        let project_path = dir.path().join("nonexistent_project");
379
380        let config = Config::default();
381        let cache_manager = CacheManager::new(&project_path, &config);
382
383        let result = cache_manager.read_cache().unwrap();
384        assert!(result.is_none());
385    }
386
387    #[test]
388    fn test_cache_read_corrupted_file() {
389        let dir = tempdir().unwrap();
390        let project_path = dir.path().join("test_project");
391        let _ = fs::create_dir(&project_path);
392
393        let config = Config::default();
394        let cache_manager = CacheManager::new(&project_path, &config);
395        let cache_path = cache_manager.get_cache_path();
396
397        // Create a corrupted cache file
398        let _ = fs::create_dir_all(cache_path.parent().unwrap());
399        let _ = fs::write(&cache_path, "invalid json content {{{");
400
401        let result = cache_manager.read_cache();
402        assert!(result.is_err());
403    }
404
405    #[test]
406    fn test_cache_write_read_roundtrip() {
407        let dir = tempdir().unwrap();
408        let project_path = dir.path().join("test_project");
409        let _ = fs::create_dir(&project_path);
410
411        let config = Config {
412            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
413            ignore: Some(vec!["target".to_string(), ".git".to_string()]),
414            line_numbers: Some(true),
415            ..Default::default()
416        };
417
418        let cache_manager = CacheManager::new(&project_path, &config);
419
420        use crate::state::ProjectMetadata;
421        use std::collections::BTreeMap;
422
423        let mut files = BTreeMap::new();
424        files.insert(
425            PathBuf::from("test.rs"),
426            crate::state::FileState {
427                content: "fn main() {}".to_string(),
428                size: 12,
429                modified: std::time::SystemTime::UNIX_EPOCH,
430                content_hash: "test_hash".to_string(),
431            },
432        );
433
434        let original_state = ProjectState {
435            timestamp: "2023-01-01T12:00:00Z".to_string(),
436            config_hash: "test_config_hash".to_string(),
437            files,
438            metadata: ProjectMetadata {
439                project_name: "test_project".to_string(),
440                file_count: 1,
441                filters: vec!["rs".to_string(), "toml".to_string()],
442                ignores: vec!["target".to_string(), ".git".to_string()],
443                line_numbers: true,
444            },
445        };
446
447        // Write and read back
448        cache_manager.write_cache(&original_state).unwrap();
449        let cached_state = cache_manager.read_cache().unwrap().unwrap();
450
451        assert_eq!(cached_state.timestamp, original_state.timestamp);
452        assert_eq!(cached_state.config_hash, original_state.config_hash);
453        assert_eq!(cached_state.files.len(), original_state.files.len());
454        assert_eq!(
455            cached_state.metadata.project_name,
456            original_state.metadata.project_name
457        );
458        assert_eq!(
459            cached_state.metadata.file_count,
460            original_state.metadata.file_count
461        );
462        assert_eq!(
463            cached_state.metadata.filters,
464            original_state.metadata.filters
465        );
466        assert_eq!(
467            cached_state.metadata.ignores,
468            original_state.metadata.ignores
469        );
470        assert_eq!(
471            cached_state.metadata.line_numbers,
472            original_state.metadata.line_numbers
473        );
474    }
475
476    #[test]
477    fn test_different_configs_different_cache_files() {
478        let dir = tempdir().unwrap();
479        let project_path = dir.path().join("test_project");
480        let _ = fs::create_dir(&project_path);
481
482        let config1 = Config {
483            filter: Some(vec!["rs".to_string()]),
484            ..Default::default()
485        };
486
487        let config2 = Config {
488            filter: Some(vec!["py".to_string()]),
489            ..Default::default()
490        };
491
492        let cache_manager1 = CacheManager::new(&project_path, &config1);
493        let cache_manager2 = CacheManager::new(&project_path, &config2);
494
495        let cache_path1 = cache_manager1.get_cache_path();
496        let cache_path2 = cache_manager2.get_cache_path();
497
498        assert_ne!(
499            cache_path1, cache_path2,
500            "Different configs should have different cache files"
501        );
502    }
503
504    #[test]
505    fn test_normalize_project_path_absolute() {
506        let temp_dir = tempdir().unwrap();
507        let project_path = temp_dir.path().join("test_project");
508        let _ = fs::create_dir(&project_path);
509
510        let normalized = CacheManager::normalize_project_path(&project_path);
511        assert!(normalized.is_absolute());
512    }
513
514    #[test]
515    fn test_normalize_project_path_relative() {
516        let temp_dir = tempdir().unwrap();
517        let original_dir = std::env::current_dir().unwrap();
518
519        // Change to temp directory
520        std::env::set_current_dir(&temp_dir).unwrap();
521
522        // Create a project directory
523        let project_name = "relative_project";
524        let _ = fs::create_dir(project_name);
525
526        let relative_path = Path::new(project_name);
527        let normalized = CacheManager::normalize_project_path(relative_path);
528
529        // Restore original directory
530        std::env::set_current_dir(original_dir).unwrap();
531
532        assert!(normalized.is_absolute());
533        assert!(normalized.to_string_lossy().contains(project_name));
534    }
535
536    #[test]
537    fn test_hash_config_same_values() {
538        let config1 = Config {
539            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
540            ignore: Some(vec!["target".to_string()]),
541            line_numbers: Some(false),
542            ..Default::default()
543        };
544
545        let config2 = Config {
546            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
547            ignore: Some(vec!["target".to_string()]),
548            line_numbers: Some(false),
549            ..Default::default()
550        };
551
552        let hash1 = CacheManager::hash_config(&config1);
553        let hash2 = CacheManager::hash_config(&config2);
554
555        assert_eq!(
556            hash1, hash2,
557            "Identical configs should produce identical hashes"
558        );
559    }
560
561    #[test]
562    fn test_migrate_old_cache_preserves_new_files() {
563        let dir = tempdir().unwrap();
564        let project_path = dir.path().join("test_project");
565        let _ = fs::create_dir(&project_path);
566
567        let cache_dir = project_path.join(".context-builder").join("cache");
568        let _ = fs::create_dir_all(&cache_dir);
569
570        // Create both old and new cache files
571        let _ = fs::write(cache_dir.join("last_canonical.md"), "old content");
572        let _ = fs::write(cache_dir.join("state_abc123_def456.json"), "new content");
573
574        let config = Config::default();
575        let _cache_manager = CacheManager::new(&project_path, &config);
576
577        // Old file should be removed
578        assert!(!cache_dir.join("last_canonical.md").exists());
579
580        // New file should be preserved
581        assert!(cache_dir.join("state_abc123_def456.json").exists());
582    }
583}