go_brrr/callgraph/
cache.rs

1//! Incremental call graph caching.
2//!
3//! Provides persistent caching of call graphs with incremental updates:
4//! - Caches graphs to `.brrr/cache/call_graph.json` for fast repeated queries
5//! - Detects dirty files by comparing content hashes
6//! - Applies incremental patches instead of full rebuilds when possible
7//!
8//! This module mirrors the Python implementation's `_get_or_build_graph()`
9//! function from `cli.py`, ensuring consistent behavior across both backends.
10//!
11//! # Cache Format
12//!
13//! The cache file is a JSON object with the following structure:
14//! ```json
15//! {
16//!   "edges": [...],
17//!   "file_hashes": {"path/to/file.py": 12345678901234567890, ...},
18//!   "languages": ["python"],
19//!   "timestamp": 1234567890.123
20//! }
21//! ```
22//!
23//! # Incremental Updates
24//!
25//! When files are modified:
26//! 1. Detect dirty files by comparing content hashes
27//! 2. Remove old edges involving dirty files
28//! 3. Re-parse only the dirty files
29//! 4. Merge new edges into the graph
30//! 5. Update cache with new hashes
31
32use std::collections::hash_map::DefaultHasher;
33use std::collections::{HashMap, HashSet};
34use std::fs;
35use std::hash::{Hash, Hasher};
36use std::path::{Path, PathBuf};
37use std::time::{SystemTime, UNIX_EPOCH};
38
39use serde::{Deserialize, Serialize};
40use tracing::{debug, info, warn};
41
42use crate::callgraph::indexer::FunctionIndex;
43use crate::callgraph::resolver;
44use crate::callgraph::scanner::{ProjectScanner, ScanConfig};
45use crate::callgraph::types::{CallEdge, CallGraph, FunctionRef};
46use crate::error::{Result, BrrrError};
47
48// =============================================================================
49// Cache Types
50// =============================================================================
51
52/// Cached call graph with metadata for incremental updates.
53///
54/// Stores the serialized graph edges along with file content hashes
55/// to detect modifications since the last build.
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct CachedCallGraph {
58    /// Call graph edges in serializable format.
59    pub edges: Vec<CachedEdge>,
60    /// Map of file paths to their content hashes.
61    /// Used to detect which files have changed since last cache.
62    #[serde(default)]
63    pub file_hashes: HashMap<String, u64>,
64    /// Languages included in this cache.
65    #[serde(default)]
66    pub languages: Vec<String>,
67    /// Unix timestamp when cache was created.
68    #[serde(default)]
69    pub timestamp: f64,
70}
71
72/// Serializable representation of a call edge.
73///
74/// Simplified structure that matches the Python cache format.
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct CachedEdge {
77    pub from_file: String,
78    pub from_func: String,
79    pub to_file: String,
80    pub to_func: String,
81    #[serde(default)]
82    pub call_line: usize,
83}
84
85impl CachedEdge {
86    /// Convert from CallEdge to CachedEdge.
87    pub fn from_edge(edge: &CallEdge) -> Self {
88        Self {
89            from_file: edge.caller.file.clone(),
90            from_func: edge.caller.name.clone(),
91            to_file: edge.callee.file.clone(),
92            to_func: edge.callee.name.clone(),
93            call_line: edge.call_line,
94        }
95    }
96
97    /// Convert to CallEdge.
98    pub fn to_edge(&self) -> CallEdge {
99        CallEdge {
100            caller: FunctionRef {
101                file: self.from_file.clone(),
102                name: self.from_func.clone(),
103                qualified_name: None,
104            },
105            callee: FunctionRef {
106                file: self.to_file.clone(),
107                name: self.to_func.clone(),
108                qualified_name: None,
109            },
110            call_line: self.call_line,
111        }
112    }
113}
114
115// =============================================================================
116// Cache Operations
117// =============================================================================
118
119/// Get the cache directory for a project.
120pub fn get_cache_dir(project: &Path) -> PathBuf {
121    project.join(".brrr").join("cache")
122}
123
124/// Get the cache file path for a project.
125pub fn get_cache_file(project: &Path) -> PathBuf {
126    get_cache_dir(project).join("call_graph.json")
127}
128
129/// Load cached call graph from disk.
130///
131/// Returns `None` if cache doesn't exist or is invalid.
132pub fn load_cached_graph(project: &Path) -> Option<CachedCallGraph> {
133    let cache_file = get_cache_file(project);
134
135    if !cache_file.exists() {
136        debug!("Cache file not found: {}", cache_file.display());
137        return None;
138    }
139
140    let content = match fs::read_to_string(&cache_file) {
141        Ok(c) => c,
142        Err(e) => {
143            warn!("Failed to read cache file: {}", e);
144            return None;
145        }
146    };
147
148    match serde_json::from_str(&content) {
149        Ok(cached) => Some(cached),
150        Err(e) => {
151            warn!("Failed to parse cache file: {}", e);
152            None
153        }
154    }
155}
156
157/// Save cached call graph to disk.
158///
159/// Creates the cache directory if it doesn't exist.
160pub fn save_cached_graph(project: &Path, cached: &CachedCallGraph) -> Result<()> {
161    let cache_dir = get_cache_dir(project);
162    let cache_file = get_cache_file(project);
163
164    // Ensure cache directory exists
165    if !cache_dir.exists() {
166        fs::create_dir_all(&cache_dir).map_err(|e| {
167            BrrrError::Cache(format!(
168                "Failed to create cache directory {}: {}",
169                cache_dir.display(),
170                e
171            ))
172        })?;
173    }
174
175    // Serialize with pretty printing for human readability
176    let content = serde_json::to_string_pretty(cached).map_err(|e| {
177        BrrrError::Cache(format!("Failed to serialize cache: {}", e))
178    })?;
179
180    fs::write(&cache_file, content).map_err(|e| {
181        BrrrError::Cache(format!(
182            "Failed to write cache file {}: {}",
183            cache_file.display(),
184            e
185        ))
186    })?;
187
188    debug!("Saved cache to: {}", cache_file.display());
189    Ok(())
190}
191
192// =============================================================================
193// Hash Computation
194// =============================================================================
195
196/// Compute a hash for file content.
197///
198/// Uses `DefaultHasher` which provides fast, deterministic hashing
199/// suitable for change detection (not cryptographic security).
200pub fn compute_content_hash(content: &str) -> u64 {
201    let mut hasher = DefaultHasher::new();
202    content.hash(&mut hasher);
203    hasher.finish()
204}
205
206/// Compute hashes for a list of file paths.
207pub fn compute_hashes_for_files(files: &[PathBuf]) -> HashMap<String, u64> {
208    let mut hashes = HashMap::new();
209
210    for file_path in files {
211        if let Ok(content) = fs::read_to_string(file_path) {
212            let hash = compute_content_hash(&content);
213            if let Some(path_str) = file_path.to_str() {
214                hashes.insert(path_str.to_string(), hash);
215            }
216        }
217    }
218
219    hashes
220}
221
222// =============================================================================
223// Dirty File Detection
224// =============================================================================
225
226/// Find files that have changed since the cache was created.
227///
228/// Compares current file hashes against cached hashes to identify:
229/// - Modified files (hash changed)
230/// - New files (not in cache)
231/// - Deleted files (in cache but no longer exist)
232pub fn find_dirty_files(
233    _project: &Path,
234    cached_hashes: &HashMap<String, u64>,
235    current_files: &[PathBuf],
236) -> Vec<PathBuf> {
237    let mut dirty = Vec::new();
238
239    // Check each current file against cache
240    for file_path in current_files {
241        let path_str = match file_path.to_str() {
242            Some(s) => s,
243            None => continue,
244        };
245
246        // Read current content and compute hash
247        let current_hash = match fs::read_to_string(file_path) {
248            Ok(content) => compute_content_hash(&content),
249            Err(_) => continue, // Skip unreadable files
250        };
251
252        // Compare against cached hash
253        match cached_hashes.get(path_str) {
254            Some(&cached_hash) if cached_hash == current_hash => {
255                // File unchanged
256            }
257            _ => {
258                // File is new or modified
259                dirty.push(file_path.clone());
260            }
261        }
262    }
263
264    // Note: We don't need to explicitly track deleted files here
265    // because their edges will be filtered out when we use the graph.
266    // The incremental update will naturally exclude deleted file edges.
267
268    dirty
269}
270
271// =============================================================================
272// Incremental Updates
273// =============================================================================
274
275/// Apply incremental update to a graph for dirty files.
276///
277/// This function:
278/// 1. Removes old edges involving dirty files
279/// 2. Re-parses only the dirty files
280/// 3. Merges new edges into the graph
281///
282/// # Arguments
283///
284/// * `graph` - The existing call graph to update
285/// * `dirty_files` - Files that need to be re-parsed
286/// * `project` - Project root directory
287/// * `lang` - Optional language filter
288///
289/// # Returns
290///
291/// Updated call graph with new edges for dirty files.
292pub fn apply_incremental_update(
293    graph: &mut CallGraph,
294    dirty_files: &[PathBuf],
295    project: &Path,
296) -> Result<()> {
297    if dirty_files.is_empty() {
298        return Ok(());
299    }
300
301    info!("Applying incremental update for {} dirty files", dirty_files.len());
302
303    // Build set of dirty file paths for efficient lookup
304    let dirty_set: HashSet<String> = dirty_files
305        .iter()
306        .filter_map(|p| p.to_str().map(String::from))
307        .collect();
308
309    // Remove old edges involving dirty files
310    let original_count = graph.edges.len();
311    graph.edges.retain(|edge| {
312        !dirty_set.contains(&edge.caller.file) && !dirty_set.contains(&edge.callee.file)
313    });
314    let removed_count = original_count - graph.edges.len();
315    debug!("Removed {} edges from dirty files", removed_count);
316
317    // Build index from remaining files
318    // We need to include all existing files to properly resolve cross-file calls
319    let mut all_files: Vec<PathBuf> = graph.edges
320        .iter()
321        .flat_map(|e| {
322            let mut files = Vec::new();
323            if !e.caller.file.is_empty() {
324                files.push(PathBuf::from(&e.caller.file));
325            }
326            if !e.callee.file.is_empty() {
327                files.push(PathBuf::from(&e.callee.file));
328            }
329            files
330        })
331        .collect();
332
333    // Add dirty files to the list
334    all_files.extend(dirty_files.iter().cloned());
335
336    // Deduplicate
337    let unique_files: Vec<PathBuf> = all_files
338        .into_iter()
339        .collect::<HashSet<_>>()
340        .into_iter()
341        .collect();
342
343    // Build function index from all files
344    let index = FunctionIndex::build(&unique_files)?;
345
346    // Resolve calls for dirty files only
347    let new_edges = resolver::resolve_calls(dirty_files, &index, project)?;
348
349    // Merge new edges into graph
350    let new_edge_count = new_edges.edges.len();
351    graph.edges.extend(new_edges.edges);
352
353    debug!("Added {} new edges from dirty files", new_edge_count);
354
355    // Rebuild indexes for the updated graph
356    graph.build_indexes();
357
358    Ok(())
359}
360
361// =============================================================================
362// Main API
363// =============================================================================
364
365/// Get or build a call graph with caching and configuration options.
366///
367/// # Arguments
368///
369/// * `project` - Path to the project root
370/// * `lang` - Optional language filter (e.g., "python", "rust")
371/// * `no_ignore` - Whether to ignore .gitignore/.brrrignore patterns
372///
373/// # Returns
374///
375/// A call graph, either loaded from cache or freshly built.
376pub fn get_or_build_graph_with_config(
377    project: &Path,
378    lang: Option<&str>,
379    no_ignore: bool,
380) -> Result<CallGraph> {
381    let project = project.canonicalize().unwrap_or_else(|_| project.to_path_buf());
382
383    // When no_ignore is set, always do fresh build to ensure consistency
384    // (cache was built with ignore patterns, but user now wants to include ignored files)
385    if no_ignore {
386        info!("no_ignore set, building fresh graph without cache");
387        return build_and_cache_with_config(&project, lang, no_ignore);
388    }
389
390    // Try to load cached graph
391    if let Some(cached) = load_cached_graph(&project) {
392        // Validate cache language compatibility
393        if let Some(requested_lang) = lang {
394            if requested_lang != "all"
395                && !cached.languages.is_empty()
396                && !cached.languages.contains(&requested_lang.to_string())
397            {
398                info!("Cache language mismatch, rebuilding");
399                return build_and_cache_with_config(&project, lang, no_ignore);
400            }
401        }
402
403        // Scan current project files
404        let scanner = ProjectScanner::new(project.to_str().unwrap_or("."))?;
405        let mut scan_config = match lang {
406            Some(l) if l != "all" => ScanConfig::for_language(l),
407            _ => ScanConfig::default(),
408        };
409        scan_config.no_ignore = no_ignore;
410        let scan_result = scanner.scan_with_config(&scan_config)?;
411
412        // Find dirty files
413        let dirty_files = find_dirty_files(&project, &cached.file_hashes, &scan_result.files);
414
415        if dirty_files.is_empty() {
416            // Cache is fresh, convert to CallGraph
417            info!("Cache is fresh, loading {} edges", cached.edges.len());
418            let edges: Vec<CallEdge> = cached.edges.iter().map(|e| e.to_edge()).collect();
419            let mut graph = CallGraph::from_edges(edges);
420            graph.build_indexes();
421            return Ok(graph);
422        }
423
424        info!("Found {} dirty files, applying incremental update", dirty_files.len());
425
426        // Convert cache to CallGraph
427        let edges: Vec<CallEdge> = cached.edges.iter().map(|e| e.to_edge()).collect();
428        let mut graph = CallGraph::from_edges(edges);
429
430        // Apply incremental update
431        apply_incremental_update(&mut graph, &dirty_files, &project)?;
432
433        // Compute new hashes and update cache
434        let new_hashes = compute_hashes_for_files(&scan_result.files);
435        let new_cached = CachedCallGraph {
436            edges: graph.edges.iter().map(CachedEdge::from_edge).collect(),
437            file_hashes: new_hashes,
438            languages: cached.languages.clone(),
439            timestamp: SystemTime::now()
440                .duration_since(UNIX_EPOCH)
441                .map(|d| d.as_secs_f64())
442                .unwrap_or(0.0),
443        };
444
445        // Save updated cache (non-fatal if it fails)
446        if let Err(e) = save_cached_graph(&project, &new_cached) {
447            warn!("Failed to save updated cache: {}", e);
448        }
449
450        return Ok(graph);
451    }
452
453    // No cache or invalid cache - do fresh build
454    build_and_cache_with_config(&project, lang, no_ignore)
455}
456
457/// Build a fresh call graph with configuration options and save to cache.
458fn build_and_cache_with_config(
459    project: &Path,
460    lang: Option<&str>,
461    no_ignore: bool,
462) -> Result<CallGraph> {
463    info!("Building fresh call graph for {}", project.display());
464
465    // Use the build_with_config function that respects no_ignore
466    let graph = crate::callgraph::build_with_config(
467        project.to_str().unwrap_or("."),
468        lang,
469        no_ignore,
470    )?;
471
472    // Only save to cache if not in no_ignore mode
473    // (cache built with no_ignore would be inconsistent with normal cached graphs)
474    if !no_ignore {
475        // Scan files to get hashes
476        let scanner = ProjectScanner::new(project.to_str().unwrap_or("."))?;
477        let mut scan_config = match lang {
478            Some(l) if l != "all" => ScanConfig::for_language(l),
479            _ => ScanConfig::default(),
480        };
481        scan_config.no_ignore = no_ignore;
482        let scan_result = scanner.scan_with_config(&scan_config)?;
483        let file_hashes = compute_hashes_for_files(&scan_result.files);
484
485        // Create cache entry
486        let cached = CachedCallGraph {
487            edges: graph.edges.iter().map(CachedEdge::from_edge).collect(),
488            file_hashes,
489            languages: lang.map(|l| vec![l.to_string()]).unwrap_or_default(),
490            timestamp: SystemTime::now()
491                .duration_since(UNIX_EPOCH)
492                .map(|d| d.as_secs_f64())
493                .unwrap_or(0.0),
494        };
495
496        // Save cache (non-fatal if it fails)
497        if let Err(e) = save_cached_graph(project, &cached) {
498            warn!("Failed to save cache: {}", e);
499        }
500    }
501
502    info!("Built graph with {} edges", graph.edges.len());
503    Ok(graph)
504}
505
506/// Warm the call graph cache with configuration options.
507///
508/// # Arguments
509///
510/// * `project` - Path to the project root
511/// * `lang` - Optional language filter (e.g., "python", "rust")
512/// * `no_ignore` - Whether to ignore .gitignore/.brrrignore patterns
513pub fn warm_cache_with_config(project: &Path, lang: Option<&str>, no_ignore: bool) -> Result<()> {
514    let _ = build_and_cache_with_config(project, lang, no_ignore)?;
515    Ok(())
516}
517
518/// Invalidate the cache for a project.
519///
520/// Removes the cache file, forcing a fresh build on next query.
521#[allow(dead_code)]
522pub fn invalidate_cache(project: &Path) -> Result<()> {
523    let cache_file = get_cache_file(project);
524
525    if cache_file.exists() {
526        fs::remove_file(&cache_file).map_err(|e| {
527            BrrrError::Cache(format!(
528                "Failed to remove cache file {}: {}",
529                cache_file.display(),
530                e
531            ))
532        })?;
533        info!("Invalidated cache: {}", cache_file.display());
534    }
535
536    Ok(())
537}
538
539// =============================================================================
540// Tests
541// =============================================================================
542
543#[cfg(test)]
544mod tests {
545    use super::*;
546    use tempfile::TempDir;
547
548    #[test]
549    fn test_compute_content_hash() {
550        let hash1 = compute_content_hash("hello world");
551        let hash2 = compute_content_hash("hello world");
552        let hash3 = compute_content_hash("hello world!");
553
554        assert_eq!(hash1, hash2);
555        assert_ne!(hash1, hash3);
556    }
557
558    #[test]
559    fn test_cached_edge_conversion() {
560        let edge = CallEdge {
561            caller: FunctionRef {
562                file: "src/main.py".to_string(),
563                name: "main".to_string(),
564                qualified_name: None,
565            },
566            callee: FunctionRef {
567                file: "src/utils.py".to_string(),
568                name: "helper".to_string(),
569                qualified_name: None,
570            },
571            call_line: 10,
572        };
573
574        let cached = CachedEdge::from_edge(&edge);
575        assert_eq!(cached.from_file, "src/main.py");
576        assert_eq!(cached.from_func, "main");
577        assert_eq!(cached.to_file, "src/utils.py");
578        assert_eq!(cached.to_func, "helper");
579        assert_eq!(cached.call_line, 10);
580
581        let back = cached.to_edge();
582        assert_eq!(back.caller.file, edge.caller.file);
583        assert_eq!(back.caller.name, edge.caller.name);
584        assert_eq!(back.callee.file, edge.callee.file);
585        assert_eq!(back.callee.name, edge.callee.name);
586        assert_eq!(back.call_line, edge.call_line);
587    }
588
589    #[test]
590    fn test_find_dirty_files_empty_cache() {
591        let temp_dir = TempDir::new().unwrap();
592        let project = temp_dir.path();
593
594        // Create a test file
595        let test_file = project.join("test.py");
596        fs::write(&test_file, "def foo(): pass").unwrap();
597
598        // Empty cache hashes - all files should be dirty
599        let cached_hashes = HashMap::new();
600        let current_files = vec![test_file.clone()];
601
602        let dirty = find_dirty_files(project, &cached_hashes, &current_files);
603        assert_eq!(dirty.len(), 1);
604        assert_eq!(dirty[0], test_file);
605    }
606
607    #[test]
608    fn test_find_dirty_files_unchanged() {
609        let temp_dir = TempDir::new().unwrap();
610        let project = temp_dir.path();
611
612        // Create a test file
613        let test_file = project.join("test.py");
614        let content = "def foo(): pass";
615        fs::write(&test_file, content).unwrap();
616
617        // Cache with matching hash
618        let mut cached_hashes = HashMap::new();
619        cached_hashes.insert(
620            test_file.to_str().unwrap().to_string(),
621            compute_content_hash(content),
622        );
623        let current_files = vec![test_file.clone()];
624
625        let dirty = find_dirty_files(project, &cached_hashes, &current_files);
626        assert!(dirty.is_empty());
627    }
628
629    #[test]
630    fn test_find_dirty_files_modified() {
631        let temp_dir = TempDir::new().unwrap();
632        let project = temp_dir.path();
633
634        // Create a test file
635        let test_file = project.join("test.py");
636        let old_content = "def foo(): pass";
637        let new_content = "def foo(): return 42";
638        fs::write(&test_file, new_content).unwrap();
639
640        // Cache with old hash
641        let mut cached_hashes = HashMap::new();
642        cached_hashes.insert(
643            test_file.to_str().unwrap().to_string(),
644            compute_content_hash(old_content),
645        );
646        let current_files = vec![test_file.clone()];
647
648        let dirty = find_dirty_files(project, &cached_hashes, &current_files);
649        assert_eq!(dirty.len(), 1);
650        assert_eq!(dirty[0], test_file);
651    }
652
653    #[test]
654    fn test_cache_file_path() {
655        let project = Path::new("/home/user/project");
656        let cache_file = get_cache_file(project);
657        assert_eq!(
658            cache_file,
659            PathBuf::from("/home/user/project/.brrr/cache/call_graph.json")
660        );
661    }
662}