Skip to main content

context_builder/
cache.rs

1//! Cache management for context-builder.
2//!
3//! This module handles caching of project states to enable the auto-diff feature.
4//! It uses a hash of the project path and configuration to avoid cache collisions
5//! between different projects or configurations.
6
7use fs2::FileExt;
8
9use std::collections::hash_map::DefaultHasher;
10use std::fs;
11use std::fs::File;
12use std::hash::{Hash, Hasher};
13use std::io::{Read, Write};
14use std::path::{Path, PathBuf};
15
16use crate::config::Config;
17use crate::state::ProjectState;
18
19/// Manages cache operations with file locking to prevent corruption
20pub struct CacheManager {
21    cache_dir: PathBuf,
22    project_hash: String,
23    config_hash: String,
24}
25
26impl CacheManager {
27    /// Create a new cache manager for the given project path and configuration
28    pub fn new(project_path: &Path, config: &Config) -> Self {
29        // Normalize the project path first for consistency
30        let normalized_project_path = Self::normalize_project_path(project_path);
31
32        let project_hash = Self::hash_path(&normalized_project_path);
33        let config_hash = Self::hash_config(config);
34
35        // Ensure cache directory exists relative to normalized project root
36        let cache_dir = normalized_project_path
37            .join(".context-builder")
38            .join("cache");
39        if !cache_dir.exists() {
40            let _ = fs::create_dir_all(&cache_dir);
41        }
42
43        let cache_manager = Self {
44            cache_dir,
45            project_hash,
46            config_hash,
47        };
48
49        // Migrate old cache format if present
50        cache_manager.migrate_old_cache();
51
52        cache_manager
53    }
54
55    /// Normalize project path for consistent hashing and cache directory creation
56    fn normalize_project_path(path: &Path) -> PathBuf {
57        // Always resolve to absolute path first
58        let absolute_path = if path.is_absolute() {
59            path.to_path_buf()
60        } else {
61            match std::env::current_dir() {
62                Ok(cwd) => cwd.join(path),
63                Err(_) => path.to_path_buf(),
64            }
65        };
66
67        // Try to canonicalize for consistency, but normalize the result
68        if let Ok(canonical) = absolute_path.canonicalize() {
69            Self::normalize_path_format(&canonical)
70        } else {
71            absolute_path
72        }
73    }
74
75    /// Generate a hash from the normalized project path
76    fn hash_path(path: &Path) -> String {
77        let mut hasher = DefaultHasher::new();
78        path.hash(&mut hasher);
79        format!("{:x}", hasher.finish())
80    }
81
82    /// Normalize path format to handle Windows UNC prefixes
83    fn normalize_path_format(path: &Path) -> PathBuf {
84        let path_str = path.to_string_lossy();
85
86        // Remove Windows UNC prefix if present
87        if cfg!(windows) && path_str.starts_with("\\\\?\\") {
88            PathBuf::from(&path_str[4..])
89        } else {
90            path.to_path_buf()
91        }
92    }
93
94    /// Generate a hash from the configuration
95    fn hash_config(config: &Config) -> String {
96        let mut hasher = DefaultHasher::new();
97        // Hash the relevant configuration parameters that affect output
98        config.filter.hash(&mut hasher);
99        config.ignore.hash(&mut hasher);
100        config.line_numbers.hash(&mut hasher);
101        format!("{:x}", hasher.finish())
102    }
103
104    /// Get the cache file path for this specific project and configuration
105    fn get_cache_path(&self) -> PathBuf {
106        self.cache_dir.join(format!(
107            "state_{}_{}.json",
108            self.project_hash, self.config_hash
109        ))
110    }
111
112    /// Public helper primarily for debugging/tests to inspect the resolved cache path
113    pub fn debug_cache_file_path(&self) -> PathBuf {
114        self.get_cache_path()
115    }
116
117    /// Migrate old markdown-based cache files to new JSON format
118    fn migrate_old_cache(&self) {
119        let old_cache_patterns = ["last_canonical.md", "last_output.md", "current_output.md"];
120
121        for pattern in &old_cache_patterns {
122            let old_cache_path = self.cache_dir.join(pattern);
123            if old_cache_path.exists() {
124                eprintln!("Migrating old cache format: removing {}", pattern);
125                let _ = fs::remove_file(&old_cache_path);
126            }
127        }
128
129        // Also remove any files that look like timestamped outputs from old versions
130        if let Ok(entries) = fs::read_dir(&self.cache_dir) {
131            for entry in entries.flatten() {
132                let file_name = entry.file_name();
133                let name = file_name.to_string_lossy();
134                if name.ends_with(".md") && (name.contains("_20") || name.starts_with("output_")) {
135                    eprintln!("Migrating old cache format: removing {}", name);
136                    let _ = fs::remove_file(entry.path());
137                }
138            }
139        }
140    }
141
142    /// Read the cached project state with file locking
143    pub fn read_cache(&self) -> Result<Option<ProjectState>, Box<dyn std::error::Error>> {
144        let cache_path = self.get_cache_path();
145
146        if !cache_path.exists() {
147            return Ok(None);
148        }
149
150        let file = File::open(&cache_path)?;
151        // Acquire shared lock to prevent reading while writing
152        file.lock_shared()?;
153
154        let mut contents = String::new();
155        let mut file = std::io::BufReader::new(file);
156        file.read_to_string(&mut contents)?;
157
158        // Release lock
159        file.get_ref().unlock()?;
160
161        let state: ProjectState = serde_json::from_str(&contents)?;
162        Ok(Some(state))
163    }
164
165    /// Write the project state to cache with file locking
166    pub fn write_cache(&self, state: &ProjectState) -> Result<(), Box<dyn std::error::Error>> {
167        let cache_path = self.get_cache_path();
168
169        let file = std::fs::OpenOptions::new()
170            .write(true)
171            .create(true)
172            .truncate(false)
173            .open(&cache_path)?;
174        // Acquire exclusive lock BEFORE truncating to prevent TOCTOU races
175        file.lock_exclusive()?;
176        file.set_len(0)?;
177
178        let json = serde_json::to_string_pretty(state)?;
179        let mut file = std::io::BufWriter::new(file);
180        file.write_all(json.as_bytes())?;
181        file.flush()?;
182
183        // Release lock
184        file.get_ref().unlock()?;
185
186        Ok(())
187    }
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193    use std::path::Path;
194    use tempfile::tempdir;
195
196    #[test]
197    fn test_hash_path() {
198        let path1 = Path::new("/project1");
199        let path2 = Path::new("/project2");
200
201        let hash1 = CacheManager::hash_path(path1);
202        let hash2 = CacheManager::hash_path(path2);
203
204        assert_ne!(
205            hash1, hash2,
206            "Different paths should produce different hashes"
207        );
208    }
209
210    #[test]
211    fn test_hash_config() {
212        let config1 = Config {
213            filter: Some(vec!["rs".to_string()]),
214            ignore: Some(vec!["target".to_string()]),
215            line_numbers: Some(true),
216            ..Default::default()
217        };
218
219        let config2 = Config {
220            filter: Some(vec!["md".to_string()]),
221            ignore: Some(vec!["target".to_string()]),
222            line_numbers: Some(true),
223            ..Default::default()
224        };
225
226        let hash1 = CacheManager::hash_config(&config1);
227        let hash2 = CacheManager::hash_config(&config2);
228
229        assert_ne!(
230            hash1, hash2,
231            "Different configs should produce different hashes"
232        );
233    }
234
235    #[test]
236    fn test_cache_operations() {
237        let dir = tempdir().unwrap();
238        let project_path = dir.path().join("test_project");
239        let _ = fs::create_dir(&project_path);
240
241        let config = Config::default();
242        let cache_manager = CacheManager::new(&project_path, &config);
243
244        use crate::state::ProjectMetadata;
245
246        let state = ProjectState {
247            timestamp: "2023-01-01T00:00:00Z".to_string(),
248            config_hash: "test_config_hash".to_string(),
249            files: std::collections::BTreeMap::new(),
250            metadata: ProjectMetadata {
251                project_name: "test".to_string(),
252                file_count: 0,
253                filters: vec![],
254                ignores: vec![],
255                line_numbers: false,
256            },
257        };
258
259        // Write cache
260        assert!(cache_manager.write_cache(&state).is_ok());
261
262        // Read cache
263        let cached_state = cache_manager.read_cache().unwrap();
264        assert!(cached_state.is_some());
265        assert_eq!(cached_state.unwrap().timestamp, state.timestamp);
266    }
267
268    #[test]
269    fn test_old_cache_migration() {
270        let dir = tempdir().unwrap();
271        let project_path = dir.path().join("test_project");
272        let _ = fs::create_dir(&project_path);
273
274        // Create cache directory with old cache files
275        let cache_dir = project_path.join(".context-builder").join("cache");
276        let _ = fs::create_dir_all(&cache_dir);
277
278        let old_files = [
279            "last_canonical.md",
280            "last_output.md",
281            "current_output.md",
282            "output_20230101120000.md",
283        ];
284
285        // Create old cache files
286        for file in &old_files {
287            let old_path = cache_dir.join(file);
288            let _ = fs::write(&old_path, "old cache content");
289            assert!(
290                old_path.exists(),
291                "Old cache file should exist before migration"
292            );
293        }
294
295        // Create cache manager (this should trigger migration)
296        let config = Config::default();
297        let _cache_manager = CacheManager::new(&project_path, &config);
298
299        // Verify old files are removed
300        for file in &old_files {
301            let old_path = cache_dir.join(file);
302            assert!(
303                !old_path.exists(),
304                "Old cache file {} should be removed after migration",
305                file
306            );
307        }
308    }
309
310    #[test]
311    fn test_cache_consistency_across_path_representations() {
312        let dir = tempdir().unwrap();
313        let project_path = dir.path().join("test_project");
314        let _ = fs::create_dir(&project_path);
315
316        let config = Config::default();
317
318        // Test different path representations that should resolve to the same cache
319        let mut paths_to_test = vec![
320            project_path.clone(),
321            project_path.canonicalize().unwrap_or(project_path.clone()),
322        ];
323
324        // If we can create a relative path, test that too
325        if let Ok(current_dir) = std::env::current_dir()
326            && let Ok(relative) = project_path.strip_prefix(&current_dir)
327        {
328            paths_to_test.push(relative.to_path_buf());
329        }
330
331        let mut cache_paths = Vec::new();
332        for path in &paths_to_test {
333            let cache_manager = CacheManager::new(path, &config);
334            cache_paths.push(cache_manager.get_cache_path());
335        }
336
337        // All cache paths should be identical
338        for (i, path1) in cache_paths.iter().enumerate() {
339            for (j, path2) in cache_paths.iter().enumerate() {
340                if i != j {
341                    assert_eq!(
342                        path1, path2,
343                        "Cache paths should be identical for different representations of the same project path"
344                    );
345                }
346            }
347        }
348    }
349
350    #[test]
351    fn test_normalize_path_format() {
352        // Test Windows UNC path normalization
353        if cfg!(windows) {
354            let unc_path = Path::new("\\\\?\\C:\\test\\path");
355            let normalized = CacheManager::normalize_path_format(unc_path);
356            assert_eq!(normalized, PathBuf::from("C:\\test\\path"));
357        }
358
359        // Test normal path (should remain unchanged)
360        let normal_path = Path::new("/normal/path");
361        let normalized = CacheManager::normalize_path_format(normal_path);
362        assert_eq!(normalized, normal_path);
363    }
364
365    #[test]
366    fn test_cache_read_nonexistent_file() {
367        let dir = tempdir().unwrap();
368        let project_path = dir.path().join("nonexistent_project");
369
370        let config = Config::default();
371        let cache_manager = CacheManager::new(&project_path, &config);
372
373        let result = cache_manager.read_cache().unwrap();
374        assert!(result.is_none());
375    }
376
377    #[test]
378    fn test_cache_read_corrupted_file() {
379        let dir = tempdir().unwrap();
380        let project_path = dir.path().join("test_project");
381        let _ = fs::create_dir(&project_path);
382
383        let config = Config::default();
384        let cache_manager = CacheManager::new(&project_path, &config);
385        let cache_path = cache_manager.get_cache_path();
386
387        // Create a corrupted cache file
388        let _ = fs::create_dir_all(cache_path.parent().unwrap());
389        let _ = fs::write(&cache_path, "invalid json content {{{");
390
391        let result = cache_manager.read_cache();
392        assert!(result.is_err());
393    }
394
395    #[test]
396    fn test_cache_write_read_roundtrip() {
397        let dir = tempdir().unwrap();
398        let project_path = dir.path().join("test_project");
399        let _ = fs::create_dir(&project_path);
400
401        let config = Config {
402            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
403            ignore: Some(vec!["target".to_string(), ".git".to_string()]),
404            line_numbers: Some(true),
405            ..Default::default()
406        };
407
408        let cache_manager = CacheManager::new(&project_path, &config);
409
410        use crate::state::ProjectMetadata;
411        use std::collections::BTreeMap;
412
413        let mut files = BTreeMap::new();
414        files.insert(
415            PathBuf::from("test.rs"),
416            crate::state::FileState {
417                content: "fn main() {}".to_string(),
418                size: 12,
419                modified: std::time::SystemTime::UNIX_EPOCH,
420                content_hash: "test_hash".to_string(),
421            },
422        );
423
424        let original_state = ProjectState {
425            timestamp: "2023-01-01T12:00:00Z".to_string(),
426            config_hash: "test_config_hash".to_string(),
427            files,
428            metadata: ProjectMetadata {
429                project_name: "test_project".to_string(),
430                file_count: 1,
431                filters: vec!["rs".to_string(), "toml".to_string()],
432                ignores: vec!["target".to_string(), ".git".to_string()],
433                line_numbers: true,
434            },
435        };
436
437        // Write and read back
438        cache_manager.write_cache(&original_state).unwrap();
439        let cached_state = cache_manager.read_cache().unwrap().unwrap();
440
441        assert_eq!(cached_state.timestamp, original_state.timestamp);
442        assert_eq!(cached_state.config_hash, original_state.config_hash);
443        assert_eq!(cached_state.files.len(), original_state.files.len());
444        assert_eq!(
445            cached_state.metadata.project_name,
446            original_state.metadata.project_name
447        );
448        assert_eq!(
449            cached_state.metadata.file_count,
450            original_state.metadata.file_count
451        );
452        assert_eq!(
453            cached_state.metadata.filters,
454            original_state.metadata.filters
455        );
456        assert_eq!(
457            cached_state.metadata.ignores,
458            original_state.metadata.ignores
459        );
460        assert_eq!(
461            cached_state.metadata.line_numbers,
462            original_state.metadata.line_numbers
463        );
464    }
465
466    #[test]
467    fn test_different_configs_different_cache_files() {
468        let dir = tempdir().unwrap();
469        let project_path = dir.path().join("test_project");
470        let _ = fs::create_dir(&project_path);
471
472        let config1 = Config {
473            filter: Some(vec!["rs".to_string()]),
474            ..Default::default()
475        };
476
477        let config2 = Config {
478            filter: Some(vec!["py".to_string()]),
479            ..Default::default()
480        };
481
482        let cache_manager1 = CacheManager::new(&project_path, &config1);
483        let cache_manager2 = CacheManager::new(&project_path, &config2);
484
485        let cache_path1 = cache_manager1.get_cache_path();
486        let cache_path2 = cache_manager2.get_cache_path();
487
488        assert_ne!(
489            cache_path1, cache_path2,
490            "Different configs should have different cache files"
491        );
492    }
493
494    #[test]
495    fn test_normalize_project_path_absolute() {
496        let temp_dir = tempdir().unwrap();
497        let project_path = temp_dir.path().join("test_project");
498        let _ = fs::create_dir(&project_path);
499
500        let normalized = CacheManager::normalize_project_path(&project_path);
501        assert!(normalized.is_absolute());
502    }
503
504    #[test]
505    fn test_normalize_project_path_relative() {
506        let temp_dir = tempdir().unwrap();
507        let original_dir = std::env::current_dir().unwrap();
508
509        // Change to temp directory
510        std::env::set_current_dir(&temp_dir).unwrap();
511
512        // Create a project directory
513        let project_name = "relative_project";
514        let _ = fs::create_dir(project_name);
515
516        let relative_path = Path::new(project_name);
517        let normalized = CacheManager::normalize_project_path(relative_path);
518
519        // Restore original directory
520        std::env::set_current_dir(original_dir).unwrap();
521
522        assert!(normalized.is_absolute());
523        assert!(normalized.to_string_lossy().contains(project_name));
524    }
525
526    #[test]
527    fn test_hash_config_same_values() {
528        let config1 = Config {
529            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
530            ignore: Some(vec!["target".to_string()]),
531            line_numbers: Some(false),
532            ..Default::default()
533        };
534
535        let config2 = Config {
536            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
537            ignore: Some(vec!["target".to_string()]),
538            line_numbers: Some(false),
539            ..Default::default()
540        };
541
542        let hash1 = CacheManager::hash_config(&config1);
543        let hash2 = CacheManager::hash_config(&config2);
544
545        assert_eq!(
546            hash1, hash2,
547            "Identical configs should produce identical hashes"
548        );
549    }
550
551    #[test]
552    fn test_migrate_old_cache_preserves_new_files() {
553        let dir = tempdir().unwrap();
554        let project_path = dir.path().join("test_project");
555        let _ = fs::create_dir(&project_path);
556
557        let cache_dir = project_path.join(".context-builder").join("cache");
558        let _ = fs::create_dir_all(&cache_dir);
559
560        // Create both old and new cache files
561        let _ = fs::write(cache_dir.join("last_canonical.md"), "old content");
562        let _ = fs::write(cache_dir.join("state_abc123_def456.json"), "new content");
563
564        let config = Config::default();
565        let _cache_manager = CacheManager::new(&project_path, &config);
566
567        // Old file should be removed
568        assert!(!cache_dir.join("last_canonical.md").exists());
569
570        // New file should be preserved
571        assert!(cache_dir.join("state_abc123_def456.json").exists());
572    }
573}