Skip to main content

context_builder/
cache.rs

1//! Cache management for context-builder.
2//!
3//! This module handles caching of project states to enable the auto-diff feature.
4//! It uses a hash of the project path and configuration to avoid cache collisions
5//! between different projects or configurations.
6
7use fs2::FileExt;
8
9use std::fs;
10use std::fs::File;
11
12use std::io::{Read, Write};
13use std::path::{Path, PathBuf};
14
15use crate::config::Config;
16use crate::state::ProjectState;
17
18/// Manages cache operations with file locking to prevent corruption
19pub struct CacheManager {
20    cache_dir: PathBuf,
21    project_hash: String,
22    config_hash: String,
23}
24
25impl CacheManager {
26    /// Create a new cache manager for the given project path and configuration
27    pub fn new(project_path: &Path, config: &Config) -> Self {
28        // Normalize the project path first for consistency
29        let normalized_project_path = Self::normalize_project_path(project_path);
30
31        let project_hash = Self::hash_path(&normalized_project_path);
32        let config_hash = Self::hash_config(config);
33
34        // Ensure cache directory exists relative to normalized project root
35        let cache_dir = normalized_project_path
36            .join(".context-builder")
37            .join("cache");
38        if !cache_dir.exists() {
39            let _ = fs::create_dir_all(&cache_dir);
40        }
41
42        let cache_manager = Self {
43            cache_dir,
44            project_hash,
45            config_hash,
46        };
47
48        // Migrate old cache format if present
49        cache_manager.migrate_old_cache();
50
51        cache_manager
52    }
53
54    /// Normalize project path for consistent hashing and cache directory creation
55    fn normalize_project_path(path: &Path) -> PathBuf {
56        // Always resolve to absolute path first
57        let absolute_path = if path.is_absolute() {
58            path.to_path_buf()
59        } else {
60            match std::env::current_dir() {
61                Ok(cwd) => cwd.join(path),
62                Err(_) => path.to_path_buf(),
63            }
64        };
65
66        // Try to canonicalize for consistency, but normalize the result
67        if let Ok(canonical) = absolute_path.canonicalize() {
68            Self::normalize_path_format(&canonical)
69        } else {
70            absolute_path
71        }
72    }
73
74    /// Generate a hash from the normalized project path
75    fn hash_path(path: &Path) -> String {
76        let path_str = path.to_string_lossy();
77        let hash = xxhash_rust::xxh3::xxh3_64(path_str.as_bytes());
78        format!("{:x}", hash)
79    }
80
81    /// Normalize path format to handle Windows UNC prefixes
82    fn normalize_path_format(path: &Path) -> PathBuf {
83        let path_str = path.to_string_lossy();
84
85        // Remove Windows UNC prefix if present
86        if cfg!(windows) && path_str.starts_with("\\\\?\\") {
87            PathBuf::from(&path_str[4..])
88        } else {
89            path.to_path_buf()
90        }
91    }
92
93    /// Generate a hash from the configuration
94    fn hash_config(config: &Config) -> String {
95        // Build a stable string representation of config for hashing.
96        // IMPORTANT: Must stay in sync with state.rs::compute_config_hash
97        let mut config_str = String::new();
98        if let Some(ref filters) = config.filter {
99            config_str.push_str(&filters.join(","));
100        }
101        config_str.push('|');
102        if let Some(ref ignores) = config.ignore {
103            config_str.push_str(&ignores.join(","));
104        }
105        config_str.push('|');
106        config_str.push_str(&format!(
107            "{:?}|{:?}|{:?}|{:?}|{:?}|{:?}|{:?}",
108            config.line_numbers,
109            config.auto_diff,
110            config.diff_context_lines,
111            config.signatures,
112            config.structure,
113            config.truncate,
114            config.visibility,
115        ));
116        let hash = xxhash_rust::xxh3::xxh3_64(config_str.as_bytes());
117        format!("{:x}", hash)
118    }
119
120    /// Get the cache file path for this specific project and configuration
121    fn get_cache_path(&self) -> PathBuf {
122        self.cache_dir.join(format!(
123            "state_{}_{}.json",
124            self.project_hash, self.config_hash
125        ))
126    }
127
128    /// Public helper primarily for debugging/tests to inspect the resolved cache path
129    pub fn debug_cache_file_path(&self) -> PathBuf {
130        self.get_cache_path()
131    }
132
133    /// Migrate old markdown-based cache files to new JSON format
134    fn migrate_old_cache(&self) {
135        let old_cache_patterns = ["last_canonical.md", "last_output.md", "current_output.md"];
136
137        for pattern in &old_cache_patterns {
138            let old_cache_path = self.cache_dir.join(pattern);
139            if old_cache_path.exists() {
140                eprintln!("Migrating old cache format: removing {}", pattern);
141                let _ = fs::remove_file(&old_cache_path);
142            }
143        }
144
145        // Also remove any files that look like timestamped outputs from old versions
146        if let Ok(entries) = fs::read_dir(&self.cache_dir) {
147            for entry in entries.flatten() {
148                let file_name = entry.file_name();
149                let name = file_name.to_string_lossy();
150                if name.ends_with(".md") && (name.contains("_20") || name.starts_with("output_")) {
151                    eprintln!("Migrating old cache format: removing {}", name);
152                    let _ = fs::remove_file(entry.path());
153                }
154            }
155        }
156    }
157
158    /// Read the cached project state with file locking
159    pub fn read_cache(&self) -> Result<Option<ProjectState>, Box<dyn std::error::Error>> {
160        let cache_path = self.get_cache_path();
161
162        if !cache_path.exists() {
163            return Ok(None);
164        }
165
166        let file = File::open(&cache_path)?;
167        // Acquire shared lock to prevent reading while writing
168        file.lock_shared()?;
169
170        let mut contents = String::new();
171        let mut file = std::io::BufReader::new(file);
172        file.read_to_string(&mut contents)?;
173
174        // Release lock
175        file.get_ref().unlock()?;
176
177        let state: ProjectState = serde_json::from_str(&contents)?;
178        Ok(Some(state))
179    }
180
181    /// Write the project state to cache with file locking
182    pub fn write_cache(&self, state: &ProjectState) -> Result<(), Box<dyn std::error::Error>> {
183        let cache_path = self.get_cache_path();
184
185        let file = std::fs::OpenOptions::new()
186            .write(true)
187            .create(true)
188            .truncate(false)
189            .open(&cache_path)?;
190        // Acquire exclusive lock BEFORE truncating to prevent TOCTOU races
191        file.lock_exclusive()?;
192        file.set_len(0)?;
193
194        let json = serde_json::to_string_pretty(state)?;
195        let mut file = std::io::BufWriter::new(file);
196        file.write_all(json.as_bytes())?;
197        file.flush()?;
198
199        // Release lock
200        file.get_ref().unlock()?;
201
202        Ok(())
203    }
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209    use serial_test::serial;
210    use std::path::Path;
211    use tempfile::tempdir;
212
213    #[test]
214    fn test_hash_path() {
215        let path1 = Path::new("/project1");
216        let path2 = Path::new("/project2");
217
218        let hash1 = CacheManager::hash_path(path1);
219        let hash2 = CacheManager::hash_path(path2);
220
221        assert_ne!(
222            hash1, hash2,
223            "Different paths should produce different hashes"
224        );
225    }
226
227    #[test]
228    fn test_hash_config() {
229        let config1 = Config {
230            filter: Some(vec!["rs".to_string()]),
231            ignore: Some(vec!["target".to_string()]),
232            line_numbers: Some(true),
233            ..Default::default()
234        };
235
236        let config2 = Config {
237            filter: Some(vec!["md".to_string()]),
238            ignore: Some(vec!["target".to_string()]),
239            line_numbers: Some(true),
240            ..Default::default()
241        };
242
243        let hash1 = CacheManager::hash_config(&config1);
244        let hash2 = CacheManager::hash_config(&config2);
245
246        assert_ne!(
247            hash1, hash2,
248            "Different configs should produce different hashes"
249        );
250    }
251
252    #[test]
253    fn test_cache_operations() {
254        let dir = tempdir().unwrap();
255        let project_path = dir.path().join("test_project");
256        let _ = fs::create_dir(&project_path);
257
258        let config = Config::default();
259        let cache_manager = CacheManager::new(&project_path, &config);
260
261        use crate::state::ProjectMetadata;
262
263        let state = ProjectState {
264            timestamp: "2023-01-01T00:00:00Z".to_string(),
265            config_hash: "test_config_hash".to_string(),
266            files: std::collections::BTreeMap::new(),
267            metadata: ProjectMetadata {
268                project_name: "test".to_string(),
269                file_count: 0,
270                filters: vec![],
271                ignores: vec![],
272                line_numbers: false,
273            },
274        };
275
276        // Write cache
277        assert!(cache_manager.write_cache(&state).is_ok());
278
279        // Read cache
280        let cached_state = cache_manager.read_cache().unwrap();
281        assert!(cached_state.is_some());
282        assert_eq!(cached_state.unwrap().timestamp, state.timestamp);
283    }
284
285    #[test]
286    fn test_old_cache_migration() {
287        let dir = tempdir().unwrap();
288        let project_path = dir.path().join("test_project");
289        let _ = fs::create_dir(&project_path);
290
291        // Create cache directory with old cache files
292        let cache_dir = project_path.join(".context-builder").join("cache");
293        let _ = fs::create_dir_all(&cache_dir);
294
295        let old_files = [
296            "last_canonical.md",
297            "last_output.md",
298            "current_output.md",
299            "output_20230101120000.md",
300        ];
301
302        // Create old cache files
303        for file in &old_files {
304            let old_path = cache_dir.join(file);
305            let _ = fs::write(&old_path, "old cache content");
306            assert!(
307                old_path.exists(),
308                "Old cache file should exist before migration"
309            );
310        }
311
312        // Create cache manager (this should trigger migration)
313        let config = Config::default();
314        let _cache_manager = CacheManager::new(&project_path, &config);
315
316        // Verify old files are removed
317        for file in &old_files {
318            let old_path = cache_dir.join(file);
319            assert!(
320                !old_path.exists(),
321                "Old cache file {} should be removed after migration",
322                file
323            );
324        }
325    }
326
327    #[test]
328    fn test_cache_consistency_across_path_representations() {
329        let dir = tempdir().unwrap();
330        let project_path = dir.path().join("test_project");
331        let _ = fs::create_dir(&project_path);
332
333        let config = Config::default();
334
335        // Test different path representations that should resolve to the same cache
336        let mut paths_to_test = vec![
337            project_path.clone(),
338            project_path.canonicalize().unwrap_or(project_path.clone()),
339        ];
340
341        // If we can create a relative path, test that too
342        if let Ok(current_dir) = std::env::current_dir()
343            && let Ok(relative) = project_path.strip_prefix(&current_dir)
344        {
345            paths_to_test.push(relative.to_path_buf());
346        }
347
348        let mut cache_paths = Vec::new();
349        for path in &paths_to_test {
350            let cache_manager = CacheManager::new(path, &config);
351            cache_paths.push(cache_manager.get_cache_path());
352        }
353
354        // All cache paths should be identical
355        for (i, path1) in cache_paths.iter().enumerate() {
356            for (j, path2) in cache_paths.iter().enumerate() {
357                if i != j {
358                    assert_eq!(
359                        path1, path2,
360                        "Cache paths should be identical for different representations of the same project path"
361                    );
362                }
363            }
364        }
365    }
366
367    #[test]
368    fn test_normalize_path_format() {
369        // Test Windows UNC path normalization
370        if cfg!(windows) {
371            let unc_path = Path::new("\\\\?\\C:\\test\\path");
372            let normalized = CacheManager::normalize_path_format(unc_path);
373            assert_eq!(normalized, PathBuf::from("C:\\test\\path"));
374        }
375
376        // Test normal path (should remain unchanged)
377        let normal_path = Path::new("/normal/path");
378        let normalized = CacheManager::normalize_path_format(normal_path);
379        assert_eq!(normalized, normal_path);
380    }
381
382    #[test]
383    fn test_cache_read_nonexistent_file() {
384        let dir = tempdir().unwrap();
385        let project_path = dir.path().join("nonexistent_project");
386
387        let config = Config::default();
388        let cache_manager = CacheManager::new(&project_path, &config);
389
390        let result = cache_manager.read_cache().unwrap();
391        assert!(result.is_none());
392    }
393
394    #[test]
395    fn test_cache_read_corrupted_file() {
396        let dir = tempdir().unwrap();
397        let project_path = dir.path().join("test_project");
398        let _ = fs::create_dir(&project_path);
399
400        let config = Config::default();
401        let cache_manager = CacheManager::new(&project_path, &config);
402        let cache_path = cache_manager.get_cache_path();
403
404        // Create a corrupted cache file
405        let _ = fs::create_dir_all(cache_path.parent().unwrap());
406        let _ = fs::write(&cache_path, "invalid json content {{{");
407
408        let result = cache_manager.read_cache();
409        assert!(result.is_err());
410    }
411
412    #[test]
413    fn test_cache_write_read_roundtrip() {
414        let dir = tempdir().unwrap();
415        let project_path = dir.path().join("test_project");
416        let _ = fs::create_dir(&project_path);
417
418        let config = Config {
419            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
420            ignore: Some(vec!["target".to_string(), ".git".to_string()]),
421            line_numbers: Some(true),
422            ..Default::default()
423        };
424
425        let cache_manager = CacheManager::new(&project_path, &config);
426
427        use crate::state::ProjectMetadata;
428        use std::collections::BTreeMap;
429
430        let mut files = BTreeMap::new();
431        files.insert(
432            PathBuf::from("test.rs"),
433            crate::state::FileState {
434                content: "fn main() {}".to_string(),
435                size: 12,
436                modified: std::time::SystemTime::UNIX_EPOCH,
437                content_hash: "test_hash".to_string(),
438            },
439        );
440
441        let original_state = ProjectState {
442            timestamp: "2023-01-01T12:00:00Z".to_string(),
443            config_hash: "test_config_hash".to_string(),
444            files,
445            metadata: ProjectMetadata {
446                project_name: "test_project".to_string(),
447                file_count: 1,
448                filters: vec!["rs".to_string(), "toml".to_string()],
449                ignores: vec!["target".to_string(), ".git".to_string()],
450                line_numbers: true,
451            },
452        };
453
454        // Write and read back
455        cache_manager.write_cache(&original_state).unwrap();
456        let cached_state = cache_manager.read_cache().unwrap().unwrap();
457
458        assert_eq!(cached_state.timestamp, original_state.timestamp);
459        assert_eq!(cached_state.config_hash, original_state.config_hash);
460        assert_eq!(cached_state.files.len(), original_state.files.len());
461        assert_eq!(
462            cached_state.metadata.project_name,
463            original_state.metadata.project_name
464        );
465        assert_eq!(
466            cached_state.metadata.file_count,
467            original_state.metadata.file_count
468        );
469        assert_eq!(
470            cached_state.metadata.filters,
471            original_state.metadata.filters
472        );
473        assert_eq!(
474            cached_state.metadata.ignores,
475            original_state.metadata.ignores
476        );
477        assert_eq!(
478            cached_state.metadata.line_numbers,
479            original_state.metadata.line_numbers
480        );
481    }
482
483    #[test]
484    fn test_different_configs_different_cache_files() {
485        let dir = tempdir().unwrap();
486        let project_path = dir.path().join("test_project");
487        let _ = fs::create_dir(&project_path);
488
489        let config1 = Config {
490            filter: Some(vec!["rs".to_string()]),
491            ..Default::default()
492        };
493
494        let config2 = Config {
495            filter: Some(vec!["py".to_string()]),
496            ..Default::default()
497        };
498
499        let cache_manager1 = CacheManager::new(&project_path, &config1);
500        let cache_manager2 = CacheManager::new(&project_path, &config2);
501
502        let cache_path1 = cache_manager1.get_cache_path();
503        let cache_path2 = cache_manager2.get_cache_path();
504
505        assert_ne!(
506            cache_path1, cache_path2,
507            "Different configs should have different cache files"
508        );
509    }
510
511    #[test]
512    fn test_normalize_project_path_absolute() {
513        let temp_dir = tempdir().unwrap();
514        let project_path = temp_dir.path().join("test_project");
515        let _ = fs::create_dir(&project_path);
516
517        let normalized = CacheManager::normalize_project_path(&project_path);
518        assert!(normalized.is_absolute());
519    }
520
521    #[test]
522    #[serial]
523    fn test_normalize_project_path_relative() {
524        let temp_dir = tempdir().unwrap();
525        let original_dir = std::env::current_dir().unwrap();
526
527        // Change to temp directory
528        std::env::set_current_dir(&temp_dir).unwrap();
529
530        // Create a project directory
531        let project_name = "relative_project";
532        let _ = fs::create_dir(project_name);
533
534        let relative_path = Path::new(project_name);
535        let normalized = CacheManager::normalize_project_path(relative_path);
536
537        // Restore original directory
538        std::env::set_current_dir(original_dir).unwrap();
539
540        assert!(normalized.is_absolute());
541        assert!(normalized.to_string_lossy().contains(project_name));
542    }
543
544    #[test]
545    fn test_hash_config_same_values() {
546        let config1 = Config {
547            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
548            ignore: Some(vec!["target".to_string()]),
549            line_numbers: Some(false),
550            ..Default::default()
551        };
552
553        let config2 = Config {
554            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
555            ignore: Some(vec!["target".to_string()]),
556            line_numbers: Some(false),
557            ..Default::default()
558        };
559
560        let hash1 = CacheManager::hash_config(&config1);
561        let hash2 = CacheManager::hash_config(&config2);
562
563        assert_eq!(
564            hash1, hash2,
565            "Identical configs should produce identical hashes"
566        );
567    }
568
569    #[test]
570    fn test_migrate_old_cache_preserves_new_files() {
571        let dir = tempdir().unwrap();
572        let project_path = dir.path().join("test_project");
573        let _ = fs::create_dir(&project_path);
574
575        let cache_dir = project_path.join(".context-builder").join("cache");
576        let _ = fs::create_dir_all(&cache_dir);
577
578        // Create both old and new cache files
579        let _ = fs::write(cache_dir.join("last_canonical.md"), "old content");
580        let _ = fs::write(cache_dir.join("state_abc123_def456.json"), "new content");
581
582        let config = Config::default();
583        let _cache_manager = CacheManager::new(&project_path, &config);
584
585        // Old file should be removed
586        assert!(!cache_dir.join("last_canonical.md").exists());
587
588        // New file should be preserved
589        assert!(cache_dir.join("state_abc123_def456.json").exists());
590    }
591}