context_builder/
cache.rs

1//! Cache management for context-builder.
2//!
3//! This module handles caching of project states to enable the auto-diff feature.
4//! It uses a hash of the project path and configuration to avoid cache collisions
5//! between different projects or configurations.
6
7use fs2::FileExt;
8
9use std::collections::hash_map::DefaultHasher;
10use std::fs;
11use std::fs::File;
12use std::hash::{Hash, Hasher};
13use std::io::{Read, Write};
14use std::path::{Path, PathBuf};
15
16use crate::config::Config;
17use crate::state::ProjectState;
18
19/// Manages cache operations with file locking to prevent corruption
20pub struct CacheManager {
21    cache_dir: PathBuf,
22    project_hash: String,
23    config_hash: String,
24}
25
26impl CacheManager {
27    /// Create a new cache manager for the given project path and configuration
28    pub fn new(project_path: &Path, config: &Config) -> Self {
29        // Normalize the project path first for consistency
30        let normalized_project_path = Self::normalize_project_path(project_path);
31
32        let project_hash = Self::hash_path(&normalized_project_path);
33        let config_hash = Self::hash_config(config);
34
35        // Ensure cache directory exists relative to normalized project root
36        let cache_dir = normalized_project_path
37            .join(".context-builder")
38            .join("cache");
39        if !cache_dir.exists() {
40            let _ = fs::create_dir_all(&cache_dir);
41        }
42
43        let cache_manager = Self {
44            cache_dir,
45            project_hash,
46            config_hash,
47        };
48
49        // Migrate old cache format if present
50        cache_manager.migrate_old_cache();
51
52        cache_manager
53    }
54
55    /// Normalize project path for consistent hashing and cache directory creation
56    fn normalize_project_path(path: &Path) -> PathBuf {
57        // Always resolve to absolute path first
58        let absolute_path = if path.is_absolute() {
59            path.to_path_buf()
60        } else {
61            match std::env::current_dir() {
62                Ok(cwd) => cwd.join(path),
63                Err(_) => path.to_path_buf(),
64            }
65        };
66
67        // Try to canonicalize for consistency, but normalize the result
68        if let Ok(canonical) = absolute_path.canonicalize() {
69            Self::normalize_path_format(&canonical)
70        } else {
71            absolute_path
72        }
73    }
74
75    /// Generate a hash from the normalized project path
76    fn hash_path(path: &Path) -> String {
77        let mut hasher = DefaultHasher::new();
78        path.hash(&mut hasher);
79        format!("{:x}", hasher.finish())
80    }
81
82    /// Normalize path format to handle Windows UNC prefixes
83    fn normalize_path_format(path: &Path) -> PathBuf {
84        let path_str = path.to_string_lossy();
85
86        // Remove Windows UNC prefix if present
87        if cfg!(windows) && path_str.starts_with("\\\\?\\") {
88            PathBuf::from(&path_str[4..])
89        } else {
90            path.to_path_buf()
91        }
92    }
93
94    /// Generate a hash from the configuration
95    fn hash_config(config: &Config) -> String {
96        let mut hasher = DefaultHasher::new();
97        // Hash the relevant configuration parameters that affect output
98        config.filter.hash(&mut hasher);
99        config.ignore.hash(&mut hasher);
100        config.line_numbers.hash(&mut hasher);
101        format!("{:x}", hasher.finish())
102    }
103
104    /// Get the cache file path for this specific project and configuration
105    fn get_cache_path(&self) -> PathBuf {
106        self.cache_dir.join(format!(
107            "state_{}_{}.json",
108            self.project_hash, self.config_hash
109        ))
110    }
111
112    /// Public helper primarily for debugging/tests to inspect the resolved cache path
113    pub fn debug_cache_file_path(&self) -> PathBuf {
114        self.get_cache_path()
115    }
116
117    /// Migrate old markdown-based cache files to new JSON format
118    fn migrate_old_cache(&self) {
119        let old_cache_patterns = ["last_canonical.md", "last_output.md", "current_output.md"];
120
121        for pattern in &old_cache_patterns {
122            let old_cache_path = self.cache_dir.join(pattern);
123            if old_cache_path.exists() {
124                eprintln!("Migrating old cache format: removing {}", pattern);
125                let _ = fs::remove_file(&old_cache_path);
126            }
127        }
128
129        // Also remove any files that look like timestamped outputs from old versions
130        if let Ok(entries) = fs::read_dir(&self.cache_dir) {
131            for entry in entries.flatten() {
132                let file_name = entry.file_name();
133                let name = file_name.to_string_lossy();
134                if name.ends_with(".md") && (name.contains("_20") || name.starts_with("output_")) {
135                    eprintln!("Migrating old cache format: removing {}", name);
136                    let _ = fs::remove_file(entry.path());
137                }
138            }
139        }
140    }
141
142    /// Read the cached project state with file locking
143    pub fn read_cache(&self) -> Result<Option<ProjectState>, Box<dyn std::error::Error>> {
144        let cache_path = self.get_cache_path();
145
146        if !cache_path.exists() {
147            return Ok(None);
148        }
149
150        let file = File::open(&cache_path)?;
151        // Acquire shared lock to prevent reading while writing
152        file.lock_shared()?;
153
154        let mut contents = String::new();
155        let mut file = std::io::BufReader::new(file);
156        file.read_to_string(&mut contents)?;
157
158        // Release lock
159        file.get_ref().unlock()?;
160
161        let state: ProjectState = serde_json::from_str(&contents)?;
162        Ok(Some(state))
163    }
164
165    /// Write the project state to cache with file locking
166    pub fn write_cache(&self, state: &ProjectState) -> Result<(), Box<dyn std::error::Error>> {
167        let cache_path = self.get_cache_path();
168
169        let file = File::create(&cache_path)?;
170        // Acquire exclusive lock to prevent concurrent writes
171        file.lock_exclusive()?;
172
173        let json = serde_json::to_string_pretty(state)?;
174        let mut file = std::io::BufWriter::new(file);
175        file.write_all(json.as_bytes())?;
176        file.flush()?;
177
178        // Release lock
179        file.get_ref().unlock()?;
180
181        Ok(())
182    }
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188    use std::path::Path;
189    use tempfile::tempdir;
190
191    #[test]
192    fn test_hash_path() {
193        let path1 = Path::new("/project1");
194        let path2 = Path::new("/project2");
195
196        let hash1 = CacheManager::hash_path(path1);
197        let hash2 = CacheManager::hash_path(path2);
198
199        assert_ne!(
200            hash1, hash2,
201            "Different paths should produce different hashes"
202        );
203    }
204
205    #[test]
206    fn test_hash_config() {
207        let config1 = Config {
208            filter: Some(vec!["rs".to_string()]),
209            ignore: Some(vec!["target".to_string()]),
210            line_numbers: Some(true),
211            ..Default::default()
212        };
213
214        let config2 = Config {
215            filter: Some(vec!["md".to_string()]),
216            ignore: Some(vec!["target".to_string()]),
217            line_numbers: Some(true),
218            ..Default::default()
219        };
220
221        let hash1 = CacheManager::hash_config(&config1);
222        let hash2 = CacheManager::hash_config(&config2);
223
224        assert_ne!(
225            hash1, hash2,
226            "Different configs should produce different hashes"
227        );
228    }
229
230    #[test]
231    fn test_cache_operations() {
232        let dir = tempdir().unwrap();
233        let project_path = dir.path().join("test_project");
234        let _ = fs::create_dir(&project_path);
235
236        let config = Config::default();
237        let cache_manager = CacheManager::new(&project_path, &config);
238
239        use crate::state::ProjectMetadata;
240
241        let state = ProjectState {
242            timestamp: "2023-01-01T00:00:00Z".to_string(),
243            config_hash: "test_config_hash".to_string(),
244            files: std::collections::BTreeMap::new(),
245            metadata: ProjectMetadata {
246                project_name: "test".to_string(),
247                file_count: 0,
248                filters: vec![],
249                ignores: vec![],
250                line_numbers: false,
251            },
252        };
253
254        // Write cache
255        assert!(cache_manager.write_cache(&state).is_ok());
256
257        // Read cache
258        let cached_state = cache_manager.read_cache().unwrap();
259        assert!(cached_state.is_some());
260        assert_eq!(cached_state.unwrap().timestamp, state.timestamp);
261    }
262
263    #[test]
264    fn test_old_cache_migration() {
265        let dir = tempdir().unwrap();
266        let project_path = dir.path().join("test_project");
267        let _ = fs::create_dir(&project_path);
268
269        // Create cache directory with old cache files
270        let cache_dir = project_path.join(".context-builder").join("cache");
271        let _ = fs::create_dir_all(&cache_dir);
272
273        let old_files = [
274            "last_canonical.md",
275            "last_output.md",
276            "current_output.md",
277            "output_20230101120000.md",
278        ];
279
280        // Create old cache files
281        for file in &old_files {
282            let old_path = cache_dir.join(file);
283            let _ = fs::write(&old_path, "old cache content");
284            assert!(
285                old_path.exists(),
286                "Old cache file should exist before migration"
287            );
288        }
289
290        // Create cache manager (this should trigger migration)
291        let config = Config::default();
292        let _cache_manager = CacheManager::new(&project_path, &config);
293
294        // Verify old files are removed
295        for file in &old_files {
296            let old_path = cache_dir.join(file);
297            assert!(
298                !old_path.exists(),
299                "Old cache file {} should be removed after migration",
300                file
301            );
302        }
303    }
304
305    #[test]
306    fn test_cache_consistency_across_path_representations() {
307        let dir = tempdir().unwrap();
308        let project_path = dir.path().join("test_project");
309        let _ = fs::create_dir(&project_path);
310
311        let config = Config::default();
312
313        // Test different path representations that should resolve to the same cache
314        let mut paths_to_test = vec![
315            project_path.clone(),
316            project_path.canonicalize().unwrap_or(project_path.clone()),
317        ];
318
319        // If we can create a relative path, test that too
320        if let Ok(current_dir) = std::env::current_dir()
321            && let Ok(relative) = project_path.strip_prefix(&current_dir)
322        {
323            paths_to_test.push(relative.to_path_buf());
324        }
325
326        let mut cache_paths = Vec::new();
327        for path in &paths_to_test {
328            let cache_manager = CacheManager::new(path, &config);
329            cache_paths.push(cache_manager.get_cache_path());
330        }
331
332        // All cache paths should be identical
333        for (i, path1) in cache_paths.iter().enumerate() {
334            for (j, path2) in cache_paths.iter().enumerate() {
335                if i != j {
336                    assert_eq!(
337                        path1, path2,
338                        "Cache paths should be identical for different representations of the same project path"
339                    );
340                }
341            }
342        }
343    }
344
345    #[test]
346    fn test_normalize_path_format() {
347        // Test Windows UNC path normalization
348        if cfg!(windows) {
349            let unc_path = Path::new("\\\\?\\C:\\test\\path");
350            let normalized = CacheManager::normalize_path_format(unc_path);
351            assert_eq!(normalized, PathBuf::from("C:\\test\\path"));
352        }
353
354        // Test normal path (should remain unchanged)
355        let normal_path = Path::new("/normal/path");
356        let normalized = CacheManager::normalize_path_format(normal_path);
357        assert_eq!(normalized, normal_path);
358    }
359
360    #[test]
361    fn test_cache_read_nonexistent_file() {
362        let dir = tempdir().unwrap();
363        let project_path = dir.path().join("nonexistent_project");
364
365        let config = Config::default();
366        let cache_manager = CacheManager::new(&project_path, &config);
367
368        let result = cache_manager.read_cache().unwrap();
369        assert!(result.is_none());
370    }
371
372    #[test]
373    fn test_cache_read_corrupted_file() {
374        let dir = tempdir().unwrap();
375        let project_path = dir.path().join("test_project");
376        let _ = fs::create_dir(&project_path);
377
378        let config = Config::default();
379        let cache_manager = CacheManager::new(&project_path, &config);
380        let cache_path = cache_manager.get_cache_path();
381
382        // Create a corrupted cache file
383        let _ = fs::create_dir_all(cache_path.parent().unwrap());
384        let _ = fs::write(&cache_path, "invalid json content {{{");
385
386        let result = cache_manager.read_cache();
387        assert!(result.is_err());
388    }
389
390    #[test]
391    fn test_cache_write_read_roundtrip() {
392        let dir = tempdir().unwrap();
393        let project_path = dir.path().join("test_project");
394        let _ = fs::create_dir(&project_path);
395
396        let config = Config {
397            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
398            ignore: Some(vec!["target".to_string(), ".git".to_string()]),
399            line_numbers: Some(true),
400            ..Default::default()
401        };
402
403        let cache_manager = CacheManager::new(&project_path, &config);
404
405        use crate::state::ProjectMetadata;
406        use std::collections::BTreeMap;
407
408        let mut files = BTreeMap::new();
409        files.insert(
410            PathBuf::from("test.rs"),
411            crate::state::FileState {
412                content: "fn main() {}".to_string(),
413                size: 12,
414                modified: std::time::SystemTime::UNIX_EPOCH,
415                content_hash: "test_hash".to_string(),
416            },
417        );
418
419        let original_state = ProjectState {
420            timestamp: "2023-01-01T12:00:00Z".to_string(),
421            config_hash: "test_config_hash".to_string(),
422            files,
423            metadata: ProjectMetadata {
424                project_name: "test_project".to_string(),
425                file_count: 1,
426                filters: vec!["rs".to_string(), "toml".to_string()],
427                ignores: vec!["target".to_string(), ".git".to_string()],
428                line_numbers: true,
429            },
430        };
431
432        // Write and read back
433        cache_manager.write_cache(&original_state).unwrap();
434        let cached_state = cache_manager.read_cache().unwrap().unwrap();
435
436        assert_eq!(cached_state.timestamp, original_state.timestamp);
437        assert_eq!(cached_state.config_hash, original_state.config_hash);
438        assert_eq!(cached_state.files.len(), original_state.files.len());
439        assert_eq!(
440            cached_state.metadata.project_name,
441            original_state.metadata.project_name
442        );
443        assert_eq!(
444            cached_state.metadata.file_count,
445            original_state.metadata.file_count
446        );
447        assert_eq!(
448            cached_state.metadata.filters,
449            original_state.metadata.filters
450        );
451        assert_eq!(
452            cached_state.metadata.ignores,
453            original_state.metadata.ignores
454        );
455        assert_eq!(
456            cached_state.metadata.line_numbers,
457            original_state.metadata.line_numbers
458        );
459    }
460
461    #[test]
462    fn test_different_configs_different_cache_files() {
463        let dir = tempdir().unwrap();
464        let project_path = dir.path().join("test_project");
465        let _ = fs::create_dir(&project_path);
466
467        let config1 = Config {
468            filter: Some(vec!["rs".to_string()]),
469            ..Default::default()
470        };
471
472        let config2 = Config {
473            filter: Some(vec!["py".to_string()]),
474            ..Default::default()
475        };
476
477        let cache_manager1 = CacheManager::new(&project_path, &config1);
478        let cache_manager2 = CacheManager::new(&project_path, &config2);
479
480        let cache_path1 = cache_manager1.get_cache_path();
481        let cache_path2 = cache_manager2.get_cache_path();
482
483        assert_ne!(
484            cache_path1, cache_path2,
485            "Different configs should have different cache files"
486        );
487    }
488
489    #[test]
490    fn test_normalize_project_path_absolute() {
491        let temp_dir = tempdir().unwrap();
492        let project_path = temp_dir.path().join("test_project");
493        let _ = fs::create_dir(&project_path);
494
495        let normalized = CacheManager::normalize_project_path(&project_path);
496        assert!(normalized.is_absolute());
497    }
498
499    #[test]
500    fn test_normalize_project_path_relative() {
501        let temp_dir = tempdir().unwrap();
502        let original_dir = std::env::current_dir().unwrap();
503
504        // Change to temp directory
505        std::env::set_current_dir(&temp_dir).unwrap();
506
507        // Create a project directory
508        let project_name = "relative_project";
509        let _ = fs::create_dir(project_name);
510
511        let relative_path = Path::new(project_name);
512        let normalized = CacheManager::normalize_project_path(relative_path);
513
514        // Restore original directory
515        std::env::set_current_dir(original_dir).unwrap();
516
517        assert!(normalized.is_absolute());
518        assert!(normalized.to_string_lossy().contains(project_name));
519    }
520
521    #[test]
522    fn test_hash_config_same_values() {
523        let config1 = Config {
524            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
525            ignore: Some(vec!["target".to_string()]),
526            line_numbers: Some(false),
527            ..Default::default()
528        };
529
530        let config2 = Config {
531            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
532            ignore: Some(vec!["target".to_string()]),
533            line_numbers: Some(false),
534            ..Default::default()
535        };
536
537        let hash1 = CacheManager::hash_config(&config1);
538        let hash2 = CacheManager::hash_config(&config2);
539
540        assert_eq!(
541            hash1, hash2,
542            "Identical configs should produce identical hashes"
543        );
544    }
545
546    #[test]
547    fn test_migrate_old_cache_preserves_new_files() {
548        let dir = tempdir().unwrap();
549        let project_path = dir.path().join("test_project");
550        let _ = fs::create_dir(&project_path);
551
552        let cache_dir = project_path.join(".context-builder").join("cache");
553        let _ = fs::create_dir_all(&cache_dir);
554
555        // Create both old and new cache files
556        let _ = fs::write(cache_dir.join("last_canonical.md"), "old content");
557        let _ = fs::write(cache_dir.join("state_abc123_def456.json"), "new content");
558
559        let config = Config::default();
560        let _cache_manager = CacheManager::new(&project_path, &config);
561
562        // Old file should be removed
563        assert!(!cache_dir.join("last_canonical.md").exists());
564
565        // New file should be preserved
566        assert!(cache_dir.join("state_abc123_def456.json").exists());
567    }
568}