Skip to main content

mindmap_cli/
cache.rs

1//! MindmapCache: Secure file loading and caching for recursive navigation
2//!
3//! This module provides:
4//! - Lazy loading and caching of mindmap files
5//! - Secure path resolution with validation (prevents directory traversal)
6//! - File size checks (max 10MB by default)
7//! - Integration with NavigationContext for cycle detection
8
9use anyhow::{Context, Result, bail};
10use std::{
11    collections::HashMap,
12    fs,
13    path::{Component, Path, PathBuf},
14};
15
16use crate::Mindmap;
17
18/// Manages loading and caching of mindmap files with security validation
19#[derive(Debug)]
20pub struct MindmapCache {
21    /// Cache of loaded mindmaps: canonical path -> Mindmap
22    cache: HashMap<PathBuf, Mindmap>,
23    /// Canonicalized workspace root for safety checks
24    workspace_root: PathBuf,
25    /// Max file size to load (default: 10MB)
26    max_file_size: u64,
27    /// Max recursion depth (default: 50)
28    max_depth: usize,
29}
30
31impl MindmapCache {
32    /// Create a new cache with the given workspace root
33    pub fn new(workspace_root: PathBuf) -> Self {
34        // Canonicalize workspace root to absolute, real path
35        let canonical_root = fs::canonicalize(&workspace_root)
36            .unwrap_or_else(|_| workspace_root.canonicalize().unwrap_or(workspace_root));
37
38        MindmapCache {
39            cache: HashMap::new(),
40            workspace_root: canonical_root,
41            max_file_size: 10 * 1024 * 1024, // 10 MB
42            max_depth: 50,
43        }
44    }
45
46    /// Get the workspace root
47    pub fn workspace_root(&self) -> &Path {
48        &self.workspace_root
49    }
50
51    /// Load a mindmap file with caching
52    ///
53    /// # Arguments
54    /// * `base_file` - The file that contains the reference (used to resolve relative paths)
55    /// * `relative` - The relative path to load (e.g., "./MINDMAP.llm.md")
56    /// * `visited` - Set of already-visited files (for cycle detection)
57    ///
58    /// # Errors
59    /// - Path traversal attempts (e.g., "../../../etc/passwd")
60    /// - Absolute paths (POSIX, Windows drive letters, UNC paths)
61    /// - File too large (> max_file_size)
62    /// - File not found
63    /// - Cycle detected (path already in visited set)
64    pub fn load(
65        &mut self,
66        base_file: &Path,
67        relative: &str,
68        visited: &std::collections::HashSet<PathBuf>,
69    ) -> Result<&Mindmap> {
70        // Resolve relative path from the current file's directory
71        let canonical = self.resolve_path(base_file, relative)?;
72
73        // Check for cycles
74        if visited.contains(&canonical) {
75            bail!(
76                "Circular reference detected: {} -> {}",
77                base_file.display(),
78                relative
79            );
80        }
81
82        // Return cached version if already loaded
83        if self.cache.contains_key(&canonical) {
84            return Ok(self.cache.get(&canonical).unwrap());
85        }
86
87        // Check file size before reading
88        let metadata = fs::metadata(&canonical)
89            .with_context(|| format!("Failed to stat file: {}", canonical.display()))?;
90
91        if metadata.len() > self.max_file_size {
92            bail!(
93                "File too large: {} bytes (max: {} bytes)",
94                metadata.len(),
95                self.max_file_size
96            );
97        }
98
99        // Load the mindmap
100        let mm = Mindmap::load(canonical.clone())
101            .with_context(|| format!("Failed to load mindmap: {}", canonical.display()))?;
102
103        // Cache and return
104        self.cache.insert(canonical.clone(), mm);
105        Ok(self.cache.get(&canonical).unwrap())
106    }
107
108    /// Resolve a relative path to a canonical absolute path
109    ///
110    /// Path resolution rules:
111    /// 1. Resolve relative to the base_file's directory
112    /// 2. No absolute paths allowed (POSIX /foo, Windows C:\foo, UNC \\server\share)
113    /// 3. No directory traversal escapes (../../../ blocked)
114    /// 4. Final path must be within workspace_root
115    ///
116    /// # Errors
117    /// - Absolute path detected
118    /// - Path escape attempt (outside workspace_root)
119    /// - Path canonicalization failed
120    pub fn resolve_path(&self, base_file: &Path, relative: &str) -> Result<PathBuf> {
121        let rel_path = Path::new(relative);
122
123        // Reject absolute paths (POSIX, Windows, UNC)
124        if rel_path.is_absolute() {
125            bail!("Absolute paths not allowed: {}", relative);
126        }
127
128        // Check for Windows drive letters or UNC prefixes or POSIX root
129        for component in rel_path.components() {
130            match component {
131                Component::Prefix(_) | Component::RootDir => {
132                    bail!("Absolute paths not allowed: {}", relative);
133                }
134                _ => {}
135            }
136        }
137
138        // Resolve relative to the base file's directory
139        let base_dir = base_file.parent().unwrap_or(&self.workspace_root);
140
141        // Canonicalize the base directory if it exists
142        let canonical_base = fs::canonicalize(base_dir).unwrap_or_else(|_| base_dir.to_path_buf());
143
144        // Join the relative path
145        let full_path = canonical_base.join(rel_path);
146
147        // Canonicalize (this validates the path structure and resolves symlinks)
148        let canonical = fs::canonicalize(&full_path).with_context(|| {
149            format!(
150                "Failed to resolve path: {} (relative to {})",
151                relative,
152                base_dir.display()
153            )
154        })?;
155
156        // Ensure the resolved path is still within the workspace
157        if !canonical.starts_with(&self.workspace_root) {
158            bail!(
159                "Path escape attempt: {} resolves outside workspace",
160                relative
161            );
162        }
163
164        Ok(canonical)
165    }
166
167    /// Clear the cache
168    pub fn clear(&mut self) {
169        self.cache.clear();
170    }
171
172    /// Get cache statistics
173    pub fn stats(&self) -> CacheStats {
174        CacheStats {
175            num_cached: self.cache.len(),
176            total_nodes: self.cache.values().map(|mm| mm.nodes.len()).sum(),
177        }
178    }
179
180    /// Set max file size (for testing)
181    #[cfg(test)]
182    pub fn set_max_file_size(&mut self, size: u64) {
183        self.max_file_size = size;
184    }
185
186    /// Set max recursion depth (for testing)
187    #[cfg(test)]
188    pub fn set_max_depth(&mut self, depth: usize) {
189        self.max_depth = depth;
190    }
191
192    /// Get max recursion depth
193    pub fn max_depth(&self) -> usize {
194        self.max_depth
195    }
196}
197
198/// Cache statistics
199#[derive(Debug, Clone)]
200pub struct CacheStats {
201    pub num_cached: usize,
202    pub total_nodes: usize,
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208    use std::fs;
209    use tempfile::TempDir;
210
211    #[test]
212    fn test_cache_new() {
213        let cache = MindmapCache::new(PathBuf::from("."));
214        assert_eq!(cache.cache.len(), 0);
215        assert!(cache.workspace_root.is_absolute());
216    }
217
218    #[test]
219    fn test_resolve_path_relative() -> Result<()> {
220        let temp = TempDir::new()?;
221        let base = temp.path().join("subdir");
222        fs::create_dir(&base)?;
223        let base_file = base.join("MINDMAP.md");
224        fs::write(&base_file, "[1] **Test** - body")?;
225
226        // Create the target file
227        let other_file = base.join("other.md");
228        fs::write(&other_file, "[2] **Other** - body")?;
229
230        let cache = MindmapCache::new(temp.path().to_path_buf());
231
232        // Relative path in same directory
233        let resolved = cache.resolve_path(&base_file, "./other.md")?;
234        assert!(resolved.ends_with("other.md"));
235        assert!(resolved.starts_with(cache.workspace_root()));
236
237        Ok(())
238    }
239
240    #[test]
241    fn test_resolve_path_rejects_absolute_posix() {
242        let cache = MindmapCache::new(PathBuf::from("."));
243        let base_file = PathBuf::from("MINDMAP.md");
244
245        // Should reject absolute POSIX path
246        let result = cache.resolve_path(&base_file, "/etc/passwd");
247        assert!(result.is_err());
248        assert!(
249            result
250                .unwrap_err()
251                .to_string()
252                .contains("Absolute paths not allowed")
253        );
254    }
255
256    #[test]
257    fn test_resolve_path_rejects_parent_escape() -> Result<()> {
258        let temp = TempDir::new()?;
259        let workspace = temp.path();
260
261        // Create structure: workspace/subdir/MINDMAP.md
262        let subdir = workspace.join("subdir");
263        fs::create_dir(&subdir)?;
264        let base_file = subdir.join("MINDMAP.md");
265        fs::write(&base_file, "[1] **Test** - body")?;
266
267        let cache = MindmapCache::new(workspace.to_path_buf());
268
269        // Attempt to escape with ../..
270        let relative = format!(
271            "{}{}",
272            std::path::MAIN_SEPARATOR.to_string().repeat(10),
273            "etc/passwd"
274        );
275
276        let result = cache.resolve_path(&base_file, &relative);
277
278        // Should detect escape (path outside workspace) or canonicalization fail
279        if result.is_ok() {
280            let resolved = result.unwrap();
281            assert!(
282                !resolved.starts_with(workspace),
283                "Should not resolve outside workspace"
284            );
285        }
286
287        Ok(())
288    }
289
290    #[test]
291    fn test_load_caches_files() -> Result<()> {
292        let temp = TempDir::new()?;
293        let file1 = temp.path().join("MINDMAP.md");
294        fs::write(&file1, "[1] **Test** - body\n")?;
295
296        let mut cache = MindmapCache::new(temp.path().to_path_buf());
297        let visited = std::collections::HashSet::new();
298
299        // First load - capture pointer before borrow ends
300        let mm1_ptr = {
301            let mm1 = cache.load(&file1, "./MINDMAP.md", &visited)?;
302            mm1 as *const _
303        };
304        assert_eq!(cache.cache.len(), 1);
305
306        // Second load should return cached
307        let mm2_ptr = {
308            let mm2 = cache.load(&file1, "./MINDMAP.md", &visited)?;
309            mm2 as *const _
310        };
311        assert_eq!(cache.cache.len(), 1);
312
313        // Both should be the same (pointer equality)
314        assert_eq!(mm1_ptr, mm2_ptr);
315
316        Ok(())
317    }
318
319    #[test]
320    fn test_load_detects_cycle() -> Result<()> {
321        let temp = TempDir::new()?;
322        let file1 = temp.path().join("MINDMAP.md");
323        fs::write(&file1, "[1] **Test** - body\n")?;
324
325        let mut cache = MindmapCache::new(temp.path().to_path_buf());
326        let mut visited = std::collections::HashSet::new();
327
328        // First load
329        let canonical = cache.resolve_path(&file1, "./MINDMAP.md")?;
330        visited.insert(canonical.clone());
331
332        // Try to load again with visited set - should fail
333        let result = cache.load(&file1, "./MINDMAP.md", &visited);
334        assert!(result.is_err());
335        assert!(
336            result
337                .unwrap_err()
338                .to_string()
339                .contains("Circular reference")
340        );
341
342        Ok(())
343    }
344
345    #[test]
346    fn test_load_rejects_oversized_file() -> Result<()> {
347        let temp = TempDir::new()?;
348        let file1 = temp.path().join("big.md");
349
350        // Create a file larger than the test limit
351        let content = "x".repeat(1024 * 1024); // 1 MB
352        fs::write(&file1, &content)?;
353
354        let mut cache = MindmapCache::new(temp.path().to_path_buf());
355        cache.set_max_file_size(1024); // Set limit to 1 KB
356
357        let visited = std::collections::HashSet::new();
358        let result = cache.load(&file1, "./big.md", &visited);
359
360        assert!(result.is_err());
361        assert!(result.unwrap_err().to_string().contains("File too large"));
362
363        Ok(())
364    }
365
366    #[test]
367    fn test_cache_stats() -> Result<()> {
368        let temp = TempDir::new()?;
369        let file1 = temp.path().join("MINDMAP.md");
370        fs::write(&file1, "[1] **Test1** - body\n[2] **Test2** - body\n")?;
371
372        let mut cache = MindmapCache::new(temp.path().to_path_buf());
373        let visited = std::collections::HashSet::new();
374
375        cache.load(&file1, "./MINDMAP.md", &visited)?;
376        let stats = cache.stats();
377
378        assert_eq!(stats.num_cached, 1);
379        assert_eq!(stats.total_nodes, 2);
380
381        Ok(())
382    }
383}