Skip to main content

context_builder/
cache.rs

1//! Cache management for context-builder.
2//!
3//! This module handles caching of project states to enable the auto-diff feature.
4//! It uses a hash of the project path and configuration to avoid cache collisions
5//! between different projects or configurations.
6
7use fs2::FileExt;
8
9use std::fs;
10use std::fs::File;
11
12use std::io::{Read, Write};
13use std::path::{Path, PathBuf};
14
15use crate::config::Config;
16use crate::state::ProjectState;
17
18/// Manages cache operations with file locking to prevent corruption
19pub struct CacheManager {
20    cache_dir: PathBuf,
21    project_hash: String,
22    config_hash: String,
23}
24
25impl CacheManager {
26    /// Create a new cache manager for the given project path and configuration
27    pub fn new(project_path: &Path, config: &Config) -> Self {
28        // Normalize the project path first for consistency
29        let normalized_project_path = Self::normalize_project_path(project_path);
30
31        let project_hash = Self::hash_path(&normalized_project_path);
32        let config_hash = Self::hash_config(config);
33
34        // Ensure cache directory exists relative to normalized project root
35        let cache_dir = normalized_project_path
36            .join(".context-builder")
37            .join("cache");
38        if !cache_dir.exists() {
39            let _ = fs::create_dir_all(&cache_dir);
40        }
41
42        let cache_manager = Self {
43            cache_dir,
44            project_hash,
45            config_hash,
46        };
47
48        // Migrate old cache format if present
49        cache_manager.migrate_old_cache();
50
51        cache_manager
52    }
53
54    /// Normalize project path for consistent hashing and cache directory creation
55    fn normalize_project_path(path: &Path) -> PathBuf {
56        // Always resolve to absolute path first
57        let absolute_path = if path.is_absolute() {
58            path.to_path_buf()
59        } else {
60            match std::env::current_dir() {
61                Ok(cwd) => cwd.join(path),
62                Err(_) => path.to_path_buf(),
63            }
64        };
65
66        // Try to canonicalize for consistency, but normalize the result
67        if let Ok(canonical) = absolute_path.canonicalize() {
68            Self::normalize_path_format(&canonical)
69        } else {
70            absolute_path
71        }
72    }
73
74    /// Generate a hash from the normalized project path
75    fn hash_path(path: &Path) -> String {
76        let path_str = path.to_string_lossy();
77        let hash = xxhash_rust::xxh3::xxh3_64(path_str.as_bytes());
78        format!("{:x}", hash)
79    }
80
81    /// Normalize path format to handle Windows UNC prefixes
82    fn normalize_path_format(path: &Path) -> PathBuf {
83        let path_str = path.to_string_lossy();
84
85        // Remove Windows UNC prefix if present
86        if cfg!(windows) && path_str.starts_with("\\\\?\\") {
87            PathBuf::from(&path_str[4..])
88        } else {
89            path.to_path_buf()
90        }
91    }
92
93    /// Generate a hash from the configuration
94    fn hash_config(config: &Config) -> String {
95        // Build a stable string representation of config for hashing
96        let mut config_str = String::new();
97        if let Some(ref filters) = config.filter {
98            config_str.push_str(&filters.join(","));
99        }
100        config_str.push('|');
101        if let Some(ref ignores) = config.ignore {
102            config_str.push_str(&ignores.join(","));
103        }
104        config_str.push('|');
105        config_str.push_str(&format!("{:?}", config.line_numbers));
106        let hash = xxhash_rust::xxh3::xxh3_64(config_str.as_bytes());
107        format!("{:x}", hash)
108    }
109
110    /// Get the cache file path for this specific project and configuration
111    fn get_cache_path(&self) -> PathBuf {
112        self.cache_dir.join(format!(
113            "state_{}_{}.json",
114            self.project_hash, self.config_hash
115        ))
116    }
117
118    /// Public helper primarily for debugging/tests to inspect the resolved cache path
119    pub fn debug_cache_file_path(&self) -> PathBuf {
120        self.get_cache_path()
121    }
122
123    /// Migrate old markdown-based cache files to new JSON format
124    fn migrate_old_cache(&self) {
125        let old_cache_patterns = ["last_canonical.md", "last_output.md", "current_output.md"];
126
127        for pattern in &old_cache_patterns {
128            let old_cache_path = self.cache_dir.join(pattern);
129            if old_cache_path.exists() {
130                eprintln!("Migrating old cache format: removing {}", pattern);
131                let _ = fs::remove_file(&old_cache_path);
132            }
133        }
134
135        // Also remove any files that look like timestamped outputs from old versions
136        if let Ok(entries) = fs::read_dir(&self.cache_dir) {
137            for entry in entries.flatten() {
138                let file_name = entry.file_name();
139                let name = file_name.to_string_lossy();
140                if name.ends_with(".md") && (name.contains("_20") || name.starts_with("output_")) {
141                    eprintln!("Migrating old cache format: removing {}", name);
142                    let _ = fs::remove_file(entry.path());
143                }
144            }
145        }
146    }
147
148    /// Read the cached project state with file locking
149    pub fn read_cache(&self) -> Result<Option<ProjectState>, Box<dyn std::error::Error>> {
150        let cache_path = self.get_cache_path();
151
152        if !cache_path.exists() {
153            return Ok(None);
154        }
155
156        let file = File::open(&cache_path)?;
157        // Acquire shared lock to prevent reading while writing
158        file.lock_shared()?;
159
160        let mut contents = String::new();
161        let mut file = std::io::BufReader::new(file);
162        file.read_to_string(&mut contents)?;
163
164        // Release lock
165        file.get_ref().unlock()?;
166
167        let state: ProjectState = serde_json::from_str(&contents)?;
168        Ok(Some(state))
169    }
170
171    /// Write the project state to cache with file locking
172    pub fn write_cache(&self, state: &ProjectState) -> Result<(), Box<dyn std::error::Error>> {
173        let cache_path = self.get_cache_path();
174
175        let file = std::fs::OpenOptions::new()
176            .write(true)
177            .create(true)
178            .truncate(false)
179            .open(&cache_path)?;
180        // Acquire exclusive lock BEFORE truncating to prevent TOCTOU races
181        file.lock_exclusive()?;
182        file.set_len(0)?;
183
184        let json = serde_json::to_string_pretty(state)?;
185        let mut file = std::io::BufWriter::new(file);
186        file.write_all(json.as_bytes())?;
187        file.flush()?;
188
189        // Release lock
190        file.get_ref().unlock()?;
191
192        Ok(())
193    }
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199    use std::path::Path;
200    use tempfile::tempdir;
201
202    #[test]
203    fn test_hash_path() {
204        let path1 = Path::new("/project1");
205        let path2 = Path::new("/project2");
206
207        let hash1 = CacheManager::hash_path(path1);
208        let hash2 = CacheManager::hash_path(path2);
209
210        assert_ne!(
211            hash1, hash2,
212            "Different paths should produce different hashes"
213        );
214    }
215
216    #[test]
217    fn test_hash_config() {
218        let config1 = Config {
219            filter: Some(vec!["rs".to_string()]),
220            ignore: Some(vec!["target".to_string()]),
221            line_numbers: Some(true),
222            ..Default::default()
223        };
224
225        let config2 = Config {
226            filter: Some(vec!["md".to_string()]),
227            ignore: Some(vec!["target".to_string()]),
228            line_numbers: Some(true),
229            ..Default::default()
230        };
231
232        let hash1 = CacheManager::hash_config(&config1);
233        let hash2 = CacheManager::hash_config(&config2);
234
235        assert_ne!(
236            hash1, hash2,
237            "Different configs should produce different hashes"
238        );
239    }
240
241    #[test]
242    fn test_cache_operations() {
243        let dir = tempdir().unwrap();
244        let project_path = dir.path().join("test_project");
245        let _ = fs::create_dir(&project_path);
246
247        let config = Config::default();
248        let cache_manager = CacheManager::new(&project_path, &config);
249
250        use crate::state::ProjectMetadata;
251
252        let state = ProjectState {
253            timestamp: "2023-01-01T00:00:00Z".to_string(),
254            config_hash: "test_config_hash".to_string(),
255            files: std::collections::BTreeMap::new(),
256            metadata: ProjectMetadata {
257                project_name: "test".to_string(),
258                file_count: 0,
259                filters: vec![],
260                ignores: vec![],
261                line_numbers: false,
262            },
263        };
264
265        // Write cache
266        assert!(cache_manager.write_cache(&state).is_ok());
267
268        // Read cache
269        let cached_state = cache_manager.read_cache().unwrap();
270        assert!(cached_state.is_some());
271        assert_eq!(cached_state.unwrap().timestamp, state.timestamp);
272    }
273
274    #[test]
275    fn test_old_cache_migration() {
276        let dir = tempdir().unwrap();
277        let project_path = dir.path().join("test_project");
278        let _ = fs::create_dir(&project_path);
279
280        // Create cache directory with old cache files
281        let cache_dir = project_path.join(".context-builder").join("cache");
282        let _ = fs::create_dir_all(&cache_dir);
283
284        let old_files = [
285            "last_canonical.md",
286            "last_output.md",
287            "current_output.md",
288            "output_20230101120000.md",
289        ];
290
291        // Create old cache files
292        for file in &old_files {
293            let old_path = cache_dir.join(file);
294            let _ = fs::write(&old_path, "old cache content");
295            assert!(
296                old_path.exists(),
297                "Old cache file should exist before migration"
298            );
299        }
300
301        // Create cache manager (this should trigger migration)
302        let config = Config::default();
303        let _cache_manager = CacheManager::new(&project_path, &config);
304
305        // Verify old files are removed
306        for file in &old_files {
307            let old_path = cache_dir.join(file);
308            assert!(
309                !old_path.exists(),
310                "Old cache file {} should be removed after migration",
311                file
312            );
313        }
314    }
315
316    #[test]
317    fn test_cache_consistency_across_path_representations() {
318        let dir = tempdir().unwrap();
319        let project_path = dir.path().join("test_project");
320        let _ = fs::create_dir(&project_path);
321
322        let config = Config::default();
323
324        // Test different path representations that should resolve to the same cache
325        let mut paths_to_test = vec![
326            project_path.clone(),
327            project_path.canonicalize().unwrap_or(project_path.clone()),
328        ];
329
330        // If we can create a relative path, test that too
331        if let Ok(current_dir) = std::env::current_dir()
332            && let Ok(relative) = project_path.strip_prefix(&current_dir)
333        {
334            paths_to_test.push(relative.to_path_buf());
335        }
336
337        let mut cache_paths = Vec::new();
338        for path in &paths_to_test {
339            let cache_manager = CacheManager::new(path, &config);
340            cache_paths.push(cache_manager.get_cache_path());
341        }
342
343        // All cache paths should be identical
344        for (i, path1) in cache_paths.iter().enumerate() {
345            for (j, path2) in cache_paths.iter().enumerate() {
346                if i != j {
347                    assert_eq!(
348                        path1, path2,
349                        "Cache paths should be identical for different representations of the same project path"
350                    );
351                }
352            }
353        }
354    }
355
356    #[test]
357    fn test_normalize_path_format() {
358        // Test Windows UNC path normalization
359        if cfg!(windows) {
360            let unc_path = Path::new("\\\\?\\C:\\test\\path");
361            let normalized = CacheManager::normalize_path_format(unc_path);
362            assert_eq!(normalized, PathBuf::from("C:\\test\\path"));
363        }
364
365        // Test normal path (should remain unchanged)
366        let normal_path = Path::new("/normal/path");
367        let normalized = CacheManager::normalize_path_format(normal_path);
368        assert_eq!(normalized, normal_path);
369    }
370
371    #[test]
372    fn test_cache_read_nonexistent_file() {
373        let dir = tempdir().unwrap();
374        let project_path = dir.path().join("nonexistent_project");
375
376        let config = Config::default();
377        let cache_manager = CacheManager::new(&project_path, &config);
378
379        let result = cache_manager.read_cache().unwrap();
380        assert!(result.is_none());
381    }
382
383    #[test]
384    fn test_cache_read_corrupted_file() {
385        let dir = tempdir().unwrap();
386        let project_path = dir.path().join("test_project");
387        let _ = fs::create_dir(&project_path);
388
389        let config = Config::default();
390        let cache_manager = CacheManager::new(&project_path, &config);
391        let cache_path = cache_manager.get_cache_path();
392
393        // Create a corrupted cache file
394        let _ = fs::create_dir_all(cache_path.parent().unwrap());
395        let _ = fs::write(&cache_path, "invalid json content {{{");
396
397        let result = cache_manager.read_cache();
398        assert!(result.is_err());
399    }
400
401    #[test]
402    fn test_cache_write_read_roundtrip() {
403        let dir = tempdir().unwrap();
404        let project_path = dir.path().join("test_project");
405        let _ = fs::create_dir(&project_path);
406
407        let config = Config {
408            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
409            ignore: Some(vec!["target".to_string(), ".git".to_string()]),
410            line_numbers: Some(true),
411            ..Default::default()
412        };
413
414        let cache_manager = CacheManager::new(&project_path, &config);
415
416        use crate::state::ProjectMetadata;
417        use std::collections::BTreeMap;
418
419        let mut files = BTreeMap::new();
420        files.insert(
421            PathBuf::from("test.rs"),
422            crate::state::FileState {
423                content: "fn main() {}".to_string(),
424                size: 12,
425                modified: std::time::SystemTime::UNIX_EPOCH,
426                content_hash: "test_hash".to_string(),
427            },
428        );
429
430        let original_state = ProjectState {
431            timestamp: "2023-01-01T12:00:00Z".to_string(),
432            config_hash: "test_config_hash".to_string(),
433            files,
434            metadata: ProjectMetadata {
435                project_name: "test_project".to_string(),
436                file_count: 1,
437                filters: vec!["rs".to_string(), "toml".to_string()],
438                ignores: vec!["target".to_string(), ".git".to_string()],
439                line_numbers: true,
440            },
441        };
442
443        // Write and read back
444        cache_manager.write_cache(&original_state).unwrap();
445        let cached_state = cache_manager.read_cache().unwrap().unwrap();
446
447        assert_eq!(cached_state.timestamp, original_state.timestamp);
448        assert_eq!(cached_state.config_hash, original_state.config_hash);
449        assert_eq!(cached_state.files.len(), original_state.files.len());
450        assert_eq!(
451            cached_state.metadata.project_name,
452            original_state.metadata.project_name
453        );
454        assert_eq!(
455            cached_state.metadata.file_count,
456            original_state.metadata.file_count
457        );
458        assert_eq!(
459            cached_state.metadata.filters,
460            original_state.metadata.filters
461        );
462        assert_eq!(
463            cached_state.metadata.ignores,
464            original_state.metadata.ignores
465        );
466        assert_eq!(
467            cached_state.metadata.line_numbers,
468            original_state.metadata.line_numbers
469        );
470    }
471
472    #[test]
473    fn test_different_configs_different_cache_files() {
474        let dir = tempdir().unwrap();
475        let project_path = dir.path().join("test_project");
476        let _ = fs::create_dir(&project_path);
477
478        let config1 = Config {
479            filter: Some(vec!["rs".to_string()]),
480            ..Default::default()
481        };
482
483        let config2 = Config {
484            filter: Some(vec!["py".to_string()]),
485            ..Default::default()
486        };
487
488        let cache_manager1 = CacheManager::new(&project_path, &config1);
489        let cache_manager2 = CacheManager::new(&project_path, &config2);
490
491        let cache_path1 = cache_manager1.get_cache_path();
492        let cache_path2 = cache_manager2.get_cache_path();
493
494        assert_ne!(
495            cache_path1, cache_path2,
496            "Different configs should have different cache files"
497        );
498    }
499
500    #[test]
501    fn test_normalize_project_path_absolute() {
502        let temp_dir = tempdir().unwrap();
503        let project_path = temp_dir.path().join("test_project");
504        let _ = fs::create_dir(&project_path);
505
506        let normalized = CacheManager::normalize_project_path(&project_path);
507        assert!(normalized.is_absolute());
508    }
509
510    #[test]
511    fn test_normalize_project_path_relative() {
512        let temp_dir = tempdir().unwrap();
513        let original_dir = std::env::current_dir().unwrap();
514
515        // Change to temp directory
516        std::env::set_current_dir(&temp_dir).unwrap();
517
518        // Create a project directory
519        let project_name = "relative_project";
520        let _ = fs::create_dir(project_name);
521
522        let relative_path = Path::new(project_name);
523        let normalized = CacheManager::normalize_project_path(relative_path);
524
525        // Restore original directory
526        std::env::set_current_dir(original_dir).unwrap();
527
528        assert!(normalized.is_absolute());
529        assert!(normalized.to_string_lossy().contains(project_name));
530    }
531
532    #[test]
533    fn test_hash_config_same_values() {
534        let config1 = Config {
535            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
536            ignore: Some(vec!["target".to_string()]),
537            line_numbers: Some(false),
538            ..Default::default()
539        };
540
541        let config2 = Config {
542            filter: Some(vec!["rs".to_string(), "toml".to_string()]),
543            ignore: Some(vec!["target".to_string()]),
544            line_numbers: Some(false),
545            ..Default::default()
546        };
547
548        let hash1 = CacheManager::hash_config(&config1);
549        let hash2 = CacheManager::hash_config(&config2);
550
551        assert_eq!(
552            hash1, hash2,
553            "Identical configs should produce identical hashes"
554        );
555    }
556
557    #[test]
558    fn test_migrate_old_cache_preserves_new_files() {
559        let dir = tempdir().unwrap();
560        let project_path = dir.path().join("test_project");
561        let _ = fs::create_dir(&project_path);
562
563        let cache_dir = project_path.join(".context-builder").join("cache");
564        let _ = fs::create_dir_all(&cache_dir);
565
566        // Create both old and new cache files
567        let _ = fs::write(cache_dir.join("last_canonical.md"), "old content");
568        let _ = fs::write(cache_dir.join("state_abc123_def456.json"), "new content");
569
570        let config = Config::default();
571        let _cache_manager = CacheManager::new(&project_path, &config);
572
573        // Old file should be removed
574        assert!(!cache_dir.join("last_canonical.md").exists());
575
576        // New file should be preserved
577        assert!(cache_dir.join("state_abc123_def456.json").exists());
578    }
579}