Skip to main content

context_builder/
cache.rs

1//! Cache management for context-builder.
2//!
3//! This module handles caching of project states to enable the auto-diff feature.
4//! It uses a hash of the project path and configuration to avoid cache collisions
5//! between different projects or configurations.
6
7use fs2::FileExt;
8
9use std::fs;
10use std::fs::File;
11
12use std::io::{Read, Write};
13use std::path::{Path, PathBuf};
14
15use crate::config::Config;
16use crate::state::ProjectState;
17
18/// Manages cache operations with file locking to prevent corruption
19pub struct CacheManager {
20    cache_dir: PathBuf,
21    project_hash: String,
22    config_hash: String,
23}
24
25impl CacheManager {
26    /// Create a new cache manager for the given project path and configuration
27    pub fn new(project_path: &Path, config: &Config) -> Self {
28        // Normalize the project path first for consistency
29        let normalized_project_path = Self::normalize_project_path(project_path);
30
31        let project_hash = Self::hash_path(&normalized_project_path);
32        let config_hash = Self::hash_config(config);
33
34        // Ensure cache directory exists relative to normalized project root
35        let cache_dir = normalized_project_path
36            .join(".context-builder")
37            .join("cache");
38        if !cache_dir.exists() {
39            let _ = fs::create_dir_all(&cache_dir);
40        }
41
42        let cache_manager = Self {
43            cache_dir,
44            project_hash,
45            config_hash,
46        };
47
48        // Migrate old cache format if present
49        cache_manager.migrate_old_cache();
50
51        cache_manager
52    }
53
54    /// Normalize project path for consistent hashing and cache directory creation
55    fn normalize_project_path(path: &Path) -> PathBuf {
56        // Always resolve to absolute path first
57        let absolute_path = if path.is_absolute() {
58            path.to_path_buf()
59        } else {
60            match std::env::current_dir() {
61                Ok(cwd) => cwd.join(path),
62                Err(_) => path.to_path_buf(),
63            }
64        };
65
66        // Try to canonicalize for consistency, but normalize the result
67        if let Ok(canonical) = absolute_path.canonicalize() {
68            Self::normalize_path_format(&canonical)
69        } else {
70            absolute_path
71        }
72    }
73
74    /// Generate a hash from the normalized project path
75    fn hash_path(path: &Path) -> String {
76        let path_str = path.to_string_lossy();
77        let hash = xxhash_rust::xxh3::xxh3_64(path_str.as_bytes());
78        format!("{:x}", hash)
79    }
80
81    /// Normalize path format to handle Windows UNC prefixes
82    fn normalize_path_format(path: &Path) -> PathBuf {
83        let path_str = path.to_string_lossy();
84
85        // Remove Windows UNC prefix if present
86        if cfg!(windows) && path_str.starts_with("\\\\?\\") {
87            PathBuf::from(&path_str[4..])
88        } else {
89            path.to_path_buf()
90        }
91    }
92
93    /// Generate a hash from the configuration
94    fn hash_config(config: &Config) -> String {
95        // Build a stable string representation of config for hashing.
96        // IMPORTANT: Must stay in sync with state.rs::compute_config_hash
97        let mut config_str = String::new();
98        if let Some(ref filters) = config.filter {
99            config_str.push_str(&filters.join(","));
100        }
101        config_str.push('|');
102        if let Some(ref ignores) = config.ignore {
103            config_str.push_str(&ignores.join(","));
104        }
105        config_str.push('|');
106        config_str.push_str(&format!(
107            "{:?}|{:?}|{:?}|{:?}|{:?}|{:?}|{:?}",
108            config.line_numbers,
109            config.auto_diff,
110            config.diff_context_lines,
111            config.signatures,
112            config.structure,
113            config.truncate,
114            config.visibility,
115        ));
116        let hash = xxhash_rust::xxh3::xxh3_64(config_str.as_bytes());
117        format!("{:x}", hash)
118    }
119
120    /// Get the cache file path for this specific project and configuration
121    fn get_cache_path(&self) -> PathBuf {
122        self.cache_dir.join(format!(
123            "state_{}_{}.json",
124            self.project_hash, self.config_hash
125        ))
126    }
127
128    /// Public helper primarily for debugging/tests to inspect the resolved cache path
129    pub fn debug_cache_file_path(&self) -> PathBuf {
130        self.get_cache_path()
131    }
132
133    /// Migrate old markdown-based cache files to new JSON format
134    fn migrate_old_cache(&self) {
135        let old_cache_patterns = ["last_canonical.md", "last_output.md", "current_output.md"];
136
137        for pattern in &old_cache_patterns {
138            let old_cache_path = self.cache_dir.join(pattern);
139            if old_cache_path.exists() {
140                eprintln!("Migrating old cache format: removing {}", pattern);
141                let _ = fs::remove_file(&old_cache_path);
142            }
143        }
144
145        // Also remove any files that look like timestamped outputs from old versions
146        if let Ok(entries) = fs::read_dir(&self.cache_dir) {
147            for entry in entries.flatten() {
148                let file_name = entry.file_name();
149                let name = file_name.to_string_lossy();
150                if name.ends_with(".md") && (name.contains("_20") || name.starts_with("output_")) {
151                    eprintln!("Migrating old cache format: removing {}", name);
152                    let _ = fs::remove_file(entry.path());
153                }
154            }
155        }
156    }
157
158    /// Read the cached project state with file locking
159    pub fn read_cache(&self) -> Result<Option<ProjectState>, Box<dyn std::error::Error>> {
160        let cache_path = self.get_cache_path();
161
162        if !cache_path.exists() {
163            return Ok(None);
164        }
165
166        let file = File::open(&cache_path)?;
167        // Acquire shared lock to prevent reading while writing
168        file.lock_shared()?;
169
170        let mut contents = String::new();
171        let mut file = std::io::BufReader::new(file);
172        file.read_to_string(&mut contents)?;
173
174        // Release lock
175        file.get_ref().unlock()?;
176
177        let state: ProjectState = serde_json::from_str(&contents)?;
178        Ok(Some(state))
179    }
180
181    /// Write the project state to cache with file locking
182    pub fn write_cache(&self, state: &ProjectState) -> Result<(), Box<dyn std::error::Error>> {
183        let cache_path = self.get_cache_path();
184
185        let file = std::fs::OpenOptions::new()
186            .write(true)
187            .create(true)
188            .truncate(false)
189            .open(&cache_path)?;
190        // Acquire exclusive lock BEFORE truncating to prevent TOCTOU races
191        file.lock_exclusive()?;
192        file.set_len(0)?;
193
194        let json = serde_json::to_string_pretty(state)?;
195        let mut file = std::io::BufWriter::new(file);
196        file.write_all(json.as_bytes())?;
197        file.flush()?;
198
199        // Release lock
200        file.get_ref().unlock()?;
201
202        Ok(())
203    }
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209    use std::path::Path;
210    use tempfile::tempdir;
211
212    #[test]
213    fn test_hash_path() {
214        let path1 = Path::new("/project1");
215        let path2 = Path::new("/project2");
216
217        let hash1 = CacheManager::hash_path(path1);
218        let hash2 = CacheManager::hash_path(path2);
219
220        assert_ne!(
221            hash1, hash2,
222            "Different paths should produce different hashes"
223        );
224    }
225
226    #[test]
227    fn test_hash_config() {
228        let config1 = Config {
229            filter: Some(vec!["rs".to_string()]),
230            ignore: Some(vec!["target".to_string()]),
231            line_numbers: Some(true),
232            ..Default::default()
233        };
234
235        let config2 = Config {
236            filter: Some(vec!["md".to_string()]),
237            ignore: Some(vec!["target".to_string()]),
238            line_numbers: Some(true),
239            ..Default::default()
240        };
241
242        let hash1 = CacheManager::hash_config(&config1);
243        let hash2 = CacheManager::hash_config(&config2);
244
245        assert_ne!(
246            hash1, hash2,
247            "Different configs should produce different hashes"
248        );
249    }
250
251    #[test]
252    fn test_cache_operations() {
253        let dir = tempdir().unwrap();
254        let project_path = dir.path().join("test_project");
255        let _ = fs::create_dir(&project_path);
256
257        let config = Config::default();
258        let cache_manager = CacheManager::new(&project_path, &config);
259
260        use crate::state::ProjectMetadata;
261
262        let state = ProjectState {
263            timestamp: "2023-01-01T00:00:00Z".to_string(),
264            config_hash: "test_config_hash".to_string(),
265            files: std::collections::BTreeMap::new(),
266            metadata: ProjectMetadata {
267                project_name: "test".to_string(),
268                file_count: 0,
269                filters: vec![],
270                ignores: vec![],
271                line_numbers: false,
272            },
273        };
274
275        // Write cache
276        assert!(cache_manager.write_cache(&state).is_ok());
277
278        // Read cache
279        let cached_state = cache_manager.read_cache().unwrap();
280        assert!(cached_state.is_some());
281        assert_eq!(cached_state.unwrap().timestamp, state.timestamp);
282    }
283
284    #[test]
285    fn test_old_cache_migration() {
286        let dir = tempdir().unwrap();
287        let project_path = dir.path().join("test_project");
288        let _ = fs::create_dir(&project_path);
289
290        // Create cache directory with old cache files
291        let cache_dir = project_path.join(".context-builder").join("cache");
292        let _ = fs::create_dir_all(&cache_dir);
293
294        let old_files = [
295            "last_canonical.md",
296            "last_output.md",
297            "current_output.md",
298            "output_20230101120000.md",
299        ];
300
301        // Create old cache files
302        for file in &old_files {
303            let old_path = cache_dir.join(file);
304            let _ = fs::write(&old_path, "old cache content");
305            assert!(
306                old_path.exists(),
307                "Old cache file should exist before migration"
308            );
309        }
310
311        // Create cache manager (this should trigger migration)
312        let config = Config::default();
313        let _cache_manager = CacheManager::new(&project_path, &config);
314
315        // Verify old files are removed
316        for file in &old_files {
317            let old_path = cache_dir.join(file);
318            assert!(
319                !old_path.exists(),
320                "Old cache file {} should be removed after migration",
321                file
322            );
323        }
324    }
325
326    #[test]
327    fn test_cache_consistency_across_path_representations() {
328        let dir = tempdir().unwrap();
329        let project_path = dir.path().join("test_project");
330        let _ = fs::create_dir(&project_path);
331
332        let config = Config::default();
333
334        // Test different path representations that should resolve to the same cache
335        let mut paths_to_test = vec![
336            project_path.clone(),
337            project_path.canonicalize().unwrap_or(project_path.clone()),
338        ];
339
340        // If we can create a relative path, test that too
341        if let Ok(current_dir) = std::env::current_dir()
342            && let Ok(relative) = project_path.strip_prefix(&current_dir)
343        {
344            paths_to_test.push(relative.to_path_buf());
345        }
346
347        let mut cache_paths = Vec::new();
348        for path in &paths_to_test {
349            let cache_manager = CacheManager::new(path, &config);
350            cache_paths.push(cache_manager.get_cache_path());
351        }
352
353        // All cache paths should be identical
354        for (i, path1) in cache_paths.iter().enumerate() {
355            for (j, path2) in cache_paths.iter().enumerate() {
356                if i != j {
357                    assert_eq!(
358                        path1, path2,
359                        "Cache paths should be identical for different representations of the same project path"
360                    );
361                }
362            }
363        }
364    }
365
366    #[test]
367    fn test_normalize_path_format() {
368        // Test Windows UNC path normalization
369        if cfg!(windows) {
370            let unc_path = Path::new("\\\\?\\C:\\test\\path");
371            let normalized = CacheManager::normalize_path_format(unc_path);
372            assert_eq!(normalized, PathBuf::from("C:\\test\\path"));
373        }
374
375        // Test normal path (should remain unchanged)
376        let normal_path = Path::new("/normal/path");
377        let normalized = CacheManager::normalize_path_format(normal_path);
378        assert_eq!(normalized, normal_path);
379    }
380
381    #[test]
382    fn test_cache_read_nonexistent_file() {
383        let dir = tempdir().unwrap();
384        let project_path = dir.path().join("nonexistent_project");
385
386        let config = Config::default();
387        let cache_manager = CacheManager::new(&project_path, &config);
388
389        let result = cache_manager.read_cache().unwrap();
390        assert!(result.is_none());
391    }
392
393    #[test]
394    fn test_cache_read_corrupted_file() {
395        let dir = tempdir().unwrap();
396        let project_path = dir.path().join("test_project");
397        let _ = fs::create_dir(&project_path);
398
399        let config = Config::default();
400        let cache_manager = CacheManager::new(&project_path, &config);
401        let cache_path = cache_manager.get_cache_path();
402
403        // Create a corrupted cache file
404        let _ = fs::create_dir_all(cache_path.parent().unwrap());
405        let _ = fs::write(&cache_path, "invalid json content {{{");
406
407        let result = cache_manager.read_cache();
408        assert!(result.is_err());
409    }
410
411    #[test]
412    fn test_cache_write_read_roundtrip() {
413        let dir = tempdir().unwrap();
414        let project_path = dir.path().join("test_project");
415        let _ = fs::create_dir(&project_path);
416
417        let config = Config {
418            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
419            ignore: Some(vec!["target".to_string(), ".git".to_string()]),
420            line_numbers: Some(true),
421            ..Default::default()
422        };
423
424        let cache_manager = CacheManager::new(&project_path, &config);
425
426        use crate::state::ProjectMetadata;
427        use std::collections::BTreeMap;
428
429        let mut files = BTreeMap::new();
430        files.insert(
431            PathBuf::from("test.rs"),
432            crate::state::FileState {
433                content: "fn main() {}".to_string(),
434                size: 12,
435                modified: std::time::SystemTime::UNIX_EPOCH,
436                content_hash: "test_hash".to_string(),
437            },
438        );
439
440        let original_state = ProjectState {
441            timestamp: "2023-01-01T12:00:00Z".to_string(),
442            config_hash: "test_config_hash".to_string(),
443            files,
444            metadata: ProjectMetadata {
445                project_name: "test_project".to_string(),
446                file_count: 1,
447                filters: vec!["rs".to_string(), "toml".to_string()],
448                ignores: vec!["target".to_string(), ".git".to_string()],
449                line_numbers: true,
450            },
451        };
452
453        // Write and read back
454        cache_manager.write_cache(&original_state).unwrap();
455        let cached_state = cache_manager.read_cache().unwrap().unwrap();
456
457        assert_eq!(cached_state.timestamp, original_state.timestamp);
458        assert_eq!(cached_state.config_hash, original_state.config_hash);
459        assert_eq!(cached_state.files.len(), original_state.files.len());
460        assert_eq!(
461            cached_state.metadata.project_name,
462            original_state.metadata.project_name
463        );
464        assert_eq!(
465            cached_state.metadata.file_count,
466            original_state.metadata.file_count
467        );
468        assert_eq!(
469            cached_state.metadata.filters,
470            original_state.metadata.filters
471        );
472        assert_eq!(
473            cached_state.metadata.ignores,
474            original_state.metadata.ignores
475        );
476        assert_eq!(
477            cached_state.metadata.line_numbers,
478            original_state.metadata.line_numbers
479        );
480    }
481
482    #[test]
483    fn test_different_configs_different_cache_files() {
484        let dir = tempdir().unwrap();
485        let project_path = dir.path().join("test_project");
486        let _ = fs::create_dir(&project_path);
487
488        let config1 = Config {
489            filter: Some(vec!["rs".to_string()]),
490            ..Default::default()
491        };
492
493        let config2 = Config {
494            filter: Some(vec!["py".to_string()]),
495            ..Default::default()
496        };
497
498        let cache_manager1 = CacheManager::new(&project_path, &config1);
499        let cache_manager2 = CacheManager::new(&project_path, &config2);
500
501        let cache_path1 = cache_manager1.get_cache_path();
502        let cache_path2 = cache_manager2.get_cache_path();
503
504        assert_ne!(
505            cache_path1, cache_path2,
506            "Different configs should have different cache files"
507        );
508    }
509
510    #[test]
511    fn test_normalize_project_path_absolute() {
512        let temp_dir = tempdir().unwrap();
513        let project_path = temp_dir.path().join("test_project");
514        let _ = fs::create_dir(&project_path);
515
516        let normalized = CacheManager::normalize_project_path(&project_path);
517        assert!(normalized.is_absolute());
518    }
519
520    #[test]
521    fn test_normalize_project_path_relative() {
522        let temp_dir = tempdir().unwrap();
523        let original_dir = std::env::current_dir().unwrap();
524
525        // Change to temp directory
526        std::env::set_current_dir(&temp_dir).unwrap();
527
528        // Create a project directory
529        let project_name = "relative_project";
530        let _ = fs::create_dir(project_name);
531
532        let relative_path = Path::new(project_name);
533        let normalized = CacheManager::normalize_project_path(relative_path);
534
535        // Restore original directory
536        std::env::set_current_dir(original_dir).unwrap();
537
538        assert!(normalized.is_absolute());
539        assert!(normalized.to_string_lossy().contains(project_name));
540    }
541
542    #[test]
543    fn test_hash_config_same_values() {
544        let config1 = Config {
545            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
546            ignore: Some(vec!["target".to_string()]),
547            line_numbers: Some(false),
548            ..Default::default()
549        };
550
551        let config2 = Config {
552            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
553            ignore: Some(vec!["target".to_string()]),
554            line_numbers: Some(false),
555            ..Default::default()
556        };
557
558        let hash1 = CacheManager::hash_config(&config1);
559        let hash2 = CacheManager::hash_config(&config2);
560
561        assert_eq!(
562            hash1, hash2,
563            "Identical configs should produce identical hashes"
564        );
565    }
566
567    #[test]
568    fn test_migrate_old_cache_preserves_new_files() {
569        let dir = tempdir().unwrap();
570        let project_path = dir.path().join("test_project");
571        let _ = fs::create_dir(&project_path);
572
573        let cache_dir = project_path.join(".context-builder").join("cache");
574        let _ = fs::create_dir_all(&cache_dir);
575
576        // Create both old and new cache files
577        let _ = fs::write(cache_dir.join("last_canonical.md"), "old content");
578        let _ = fs::write(cache_dir.join("state_abc123_def456.json"), "new content");
579
580        let config = Config::default();
581        let _cache_manager = CacheManager::new(&project_path, &config);
582
583        // Old file should be removed
584        assert!(!cache_dir.join("last_canonical.md").exists());
585
586        // New file should be preserved
587        assert!(cache_dir.join("state_abc123_def456.json").exists());
588    }
589}