Skip to main content

graphify_watch/
lib.rs

1//! File watching and auto-rebuild for graphify.
2//!
3//! Uses `notify` + debouncing to watch for file changes and trigger
4//! incremental graph rebuilds. Port of Python `watch.py`.
5
6use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8use std::time::Duration;
9
10use notify::RecursiveMode;
11use notify_debouncer_mini::new_debouncer;
12use thiserror::Error;
13use tokio::sync::mpsc;
14use tracing::{debug, info, warn};
15
16/// Debounce duration before triggering a rebuild.
17const DEBOUNCE_DURATION: Duration = Duration::from_secs(3);
18
19/// Default ignore patterns for files that should not trigger rebuilds.
20const IGNORE_PATTERNS: &[&str] = &[
21    ".git",
22    "node_modules",
23    "__pycache__",
24    ".pyc",
25    "target",
26    "graphify-out",
27    ".DS_Store",
28];
29
30/// Errors from the watcher.
31#[derive(Debug, Error)]
32pub enum WatchError {
33    #[error("IO error: {0}")]
34    Io(#[from] std::io::Error),
35
36    #[error("notify error: {0}")]
37    Notify(#[from] notify::Error),
38
39    #[error("watch setup failed: {0}")]
40    Setup(String),
41
42    #[error("rebuild failed: {0}")]
43    Rebuild(String),
44}
45
46/// Check if a path should be ignored based on common patterns.
47fn should_ignore(path: &Path) -> bool {
48    let path_str = path.to_string_lossy();
49    IGNORE_PATTERNS.iter().any(|p| path_str.contains(p))
50}
51
52/// Filter changed paths to only include relevant source files.
53fn filter_changes(paths: &[PathBuf]) -> Vec<PathBuf> {
54    paths
55        .iter()
56        .filter(|p| !should_ignore(p))
57        .cloned()
58        .collect()
59}
60
61/// Run the full pipeline: detect -> extract -> build -> cluster -> analyze -> export.
62///
63/// When `changed_files` is provided, only those files have their cache invalidated
64/// before extraction, achieving an incremental rebuild without re-parsing unchanged files.
65fn rebuild(
66    root: &Path,
67    output_dir: &Path,
68    changed_files: Option<&[PathBuf]>,
69) -> Result<(), WatchError> {
70    let cache_dir = output_dir.join("cache");
71
72    // ── Step 0: Invalidate cache for changed files ──
73    if let Some(changed) = changed_files {
74        for path in changed {
75            let _ = graphify_cache::invalidate_cached(path, root, &cache_dir);
76        }
77        info!(
78            "rebuild: invalidated cache for {} changed file(s)",
79            changed.len()
80        );
81    }
82
83    // ── Step 1: Detect files ──
84    info!("rebuild: detecting files...");
85    let detection = graphify_detect::detect(root);
86    info!(
87        "rebuild: found {} files (~{} words)",
88        detection.total_files, detection.total_words
89    );
90
91    // ── Step 2: Extract AST ──
92    let code_files: Vec<PathBuf> = detection
93        .files
94        .get(&graphify_detect::FileType::Code)
95        .map(|v| v.iter().map(|f| root.join(f)).collect())
96        .unwrap_or_default();
97
98    if code_files.is_empty() {
99        info!("rebuild: no code files found, skipping");
100        return Ok(());
101    }
102
103    info!(
104        "rebuild: extracting AST from {} code files...",
105        code_files.len()
106    );
107    let mut ast_result = graphify_core::model::ExtractionResult::default();
108    let mut cache_hits = 0usize;
109    let mut errors = 0usize;
110    for file_path in &code_files {
111        if let Some(cached) = graphify_cache::load_cached_from::<
112            graphify_core::model::ExtractionResult,
113        >(file_path, root, &cache_dir)
114        {
115            cache_hits += 1;
116            ast_result.nodes.extend(cached.nodes);
117            ast_result.edges.extend(cached.edges);
118            ast_result.hyperedges.extend(cached.hyperedges);
119            continue;
120        }
121        // Extract fresh, catching panics
122        match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
123            graphify_extract::extract(std::slice::from_ref(file_path))
124        })) {
125            Ok(fresh) => {
126                let _ = graphify_cache::save_cached_to(file_path, &fresh, root, &cache_dir);
127                ast_result.nodes.extend(fresh.nodes);
128                ast_result.edges.extend(fresh.edges);
129                ast_result.hyperedges.extend(fresh.hyperedges);
130            }
131            Err(_) => {
132                errors += 1;
133                warn!("rebuild: extraction panicked for {}", file_path.display());
134            }
135        }
136    }
137    if cache_hits > 0 {
138        info!(
139            "rebuild: cache {} hits, {} extracted fresh",
140            cache_hits,
141            code_files.len() - cache_hits
142        );
143    }
144    if errors > 0 {
145        warn!("rebuild: {} file(s) had extraction errors", errors);
146    }
147    info!(
148        "rebuild: Pass 1 (AST): {} nodes, {} edges",
149        ast_result.nodes.len(),
150        ast_result.edges.len()
151    );
152
153    let extractions = vec![ast_result];
154
155    // ── Step 3: Build graph ──
156    info!("rebuild: building graph...");
157    let graph = graphify_build::build(&extractions)
158        .map_err(|e| WatchError::Rebuild(format!("build failed: {e}")))?;
159    info!(
160        "rebuild: graph has {} nodes, {} edges",
161        graph.node_count(),
162        graph.edge_count()
163    );
164
165    // ── Step 4: Cluster ──
166    info!("rebuild: detecting communities...");
167    let communities = graphify_cluster::cluster(&graph);
168    let cohesion = graphify_cluster::score_all(&graph, &communities);
169
170    let community_labels: HashMap<usize, String> = communities
171        .iter()
172        .map(|(cid, nodes)| {
173            let label = nodes
174                .first()
175                .and_then(|id| graph.get_node(id))
176                .map(|n| n.label.clone())
177                .unwrap_or_else(|| format!("Community {}", cid));
178            (*cid, label)
179        })
180        .collect();
181    info!("rebuild: {} communities detected", communities.len());
182
183    // ── Step 5: Analyze ──
184    info!("rebuild: analyzing...");
185    let god_list = graphify_analyze::god_nodes(&graph, 10);
186    let surprise_list = graphify_analyze::surprising_connections(&graph, &communities, 5);
187    let questions = graphify_analyze::suggest_questions(&graph, &communities, &community_labels, 7);
188
189    // ── Step 6: Export all formats ──
190    std::fs::create_dir_all(output_dir)
191        .map_err(|e| WatchError::Rebuild(format!("create output dir: {e}")))?;
192
193    let _ = graphify_export::export_json(&graph, output_dir);
194    let _ = graphify_export::export_html(&graph, &communities, &community_labels, output_dir);
195    let _ = graphify_export::export_graphml(&graph, output_dir);
196    let _ = graphify_export::export_cypher(&graph, output_dir);
197    let _ = graphify_export::export_svg(&graph, &communities, output_dir);
198    let _ = graphify_export::export_wiki(&graph, &communities, &community_labels, output_dir);
199
200    // Report
201    let detection_json = serde_json::json!({
202        "total_files": detection.total_files,
203        "total_words": detection.total_words,
204        "warning": detection.warning,
205    });
206    let god_json: Vec<serde_json::Value> = god_list
207        .iter()
208        .map(|g| serde_json::json!({"label": g.label, "edges": g.degree}))
209        .collect();
210    let surprise_json: Vec<serde_json::Value> = surprise_list
211        .iter()
212        .map(|s| serde_json::to_value(s).unwrap_or_default())
213        .collect();
214    let question_json: Vec<serde_json::Value> = questions
215        .iter()
216        .map(|q| serde_json::to_value(q).unwrap_or_default())
217        .collect();
218    let token_cost: HashMap<String, usize> =
219        HashMap::from([("input".to_string(), 0), ("output".to_string(), 0)]);
220
221    let root_str = root.to_string_lossy();
222    let report = graphify_export::generate_report(
223        &graph,
224        &communities,
225        &cohesion,
226        &community_labels,
227        &god_json,
228        &surprise_json,
229        &detection_json,
230        &token_cost,
231        &root_str,
232        Some(&question_json),
233    );
234    let report_path = output_dir.join("GRAPH_REPORT.md");
235    let _ = std::fs::write(&report_path, &report);
236
237    // Save manifest
238    let manifest_path = output_dir.join(".graphify_manifest.json");
239    let manifest = graphify_detect::Manifest {
240        files: detection
241            .files
242            .iter()
243            .flat_map(|(ft, paths)| paths.iter().map(move |p| (p.clone(), *ft)))
244            .collect(),
245    };
246    let _ = graphify_detect::save_manifest(&manifest_path, &manifest);
247
248    info!("rebuild: done");
249    Ok(())
250}
251
252/// Watch `root` for file changes and trigger rebuilds into `output_dir`.
253///
254/// This is an async loop that runs until cancelled. On each batch of
255/// debounced file changes, it logs the changed paths and invokes an
256/// incremental rebuild (only changed files have their cache invalidated).
257///
258/// # Arguments
259/// * `root` - Directory to watch recursively.
260/// * `output_dir` - Where to write rebuild output.
261pub async fn watch_directory(root: &Path, output_dir: &Path) -> Result<(), WatchError> {
262    let (tx, mut rx) = mpsc::channel::<Vec<PathBuf>>(100);
263
264    let mut debouncer = new_debouncer(
265        DEBOUNCE_DURATION,
266        move |res: Result<Vec<notify_debouncer_mini::DebouncedEvent>, notify::Error>| match res {
267            Ok(events) => {
268                let paths: Vec<PathBuf> = events.into_iter().map(|e| e.path).collect();
269                if let Err(e) = tx.blocking_send(paths) {
270                    warn!("Failed to send watch events: {}", e);
271                }
272            }
273            Err(e) => {
274                warn!("Watch error: {}", e);
275            }
276        },
277    )
278    .map_err(|e| WatchError::Setup(e.to_string()))?;
279
280    debouncer.watcher().watch(root, RecursiveMode::Recursive)?;
281
282    info!(
283        "Watching {} for changes (output: {})",
284        root.display(),
285        output_dir.display()
286    );
287    println!("Watching {} for changes...", root.display());
288
289    // Run initial build (full)
290    println!("Running initial build...");
291    match rebuild(root, output_dir, None) {
292        Ok(()) => println!("Initial build complete."),
293        Err(e) => eprintln!("Initial build failed: {e}"),
294    }
295
296    while let Some(changed_paths) = rx.recv().await {
297        let relevant = filter_changes(&changed_paths);
298
299        if relevant.is_empty() {
300            debug!("Ignoring changes in excluded paths");
301            continue;
302        }
303
304        info!("{} file(s) changed, triggering rebuild...", relevant.len());
305        println!(
306            "Files changed ({}), triggering incremental rebuild...",
307            relevant.len()
308        );
309
310        for p in &relevant {
311            debug!("  changed: {}", p.display());
312        }
313
314        match rebuild(root, output_dir, Some(&relevant)) {
315            Ok(()) => {
316                println!("Rebuild complete.");
317            }
318            Err(e) => {
319                eprintln!("Rebuild failed: {e}");
320            }
321        }
322    }
323
324    Ok(())
325}
326
327// ---------------------------------------------------------------------------
328// Tests
329// ---------------------------------------------------------------------------
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334    use std::path::PathBuf;
335
336    #[test]
337    fn test_should_ignore_git() {
338        assert!(should_ignore(Path::new("/repo/.git/objects/abc")));
339        assert!(should_ignore(Path::new("/repo/node_modules/foo.js")));
340        assert!(should_ignore(Path::new("/repo/__pycache__/mod.pyc")));
341        assert!(should_ignore(Path::new("/repo/target/debug/build")));
342        assert!(should_ignore(Path::new("/repo/graphify-out/graph.json")));
343    }
344
345    #[test]
346    fn test_should_not_ignore_source() {
347        assert!(!should_ignore(Path::new("/repo/src/main.rs")));
348        assert!(!should_ignore(Path::new("/repo/lib/utils.py")));
349        assert!(!should_ignore(Path::new("/repo/README.md")));
350    }
351
352    #[test]
353    fn test_filter_changes() {
354        let paths = vec![
355            PathBuf::from("/repo/src/main.rs"),
356            PathBuf::from("/repo/.git/HEAD"),
357            PathBuf::from("/repo/src/lib.rs"),
358            PathBuf::from("/repo/node_modules/foo/index.js"),
359        ];
360        let filtered = filter_changes(&paths);
361        assert_eq!(filtered.len(), 2);
362        assert!(filtered.contains(&PathBuf::from("/repo/src/main.rs")));
363        assert!(filtered.contains(&PathBuf::from("/repo/src/lib.rs")));
364    }
365
366    #[test]
367    fn test_filter_changes_all_ignored() {
368        let paths = vec![
369            PathBuf::from("/repo/.git/HEAD"),
370            PathBuf::from("/repo/.DS_Store"),
371        ];
372        let filtered = filter_changes(&paths);
373        assert!(filtered.is_empty());
374    }
375
376    #[test]
377    fn test_filter_changes_empty() {
378        let filtered = filter_changes(&[]);
379        assert!(filtered.is_empty());
380    }
381
382    #[test]
383    fn test_rebuild_empty_dir() {
384        let dir = tempfile::tempdir().unwrap();
385        let output = tempfile::tempdir().unwrap();
386        // Should succeed with empty directory (no code files)
387        let result = rebuild(dir.path(), output.path(), None);
388        assert!(result.is_ok());
389    }
390
391    #[test]
392    fn test_rebuild_with_code_files() {
393        let dir = tempfile::tempdir().unwrap();
394        let output = tempfile::tempdir().unwrap();
395        let src = dir.path().join("src");
396        std::fs::create_dir_all(&src).unwrap();
397        std::fs::write(
398            src.join("main.rs"),
399            "fn main() { hello(); }\nfn hello() { println!(\"hi\"); }\n",
400        )
401        .unwrap();
402        std::fs::write(
403            src.join("lib.rs"),
404            "pub fn add(a: i32, b: i32) -> i32 { a + b }\n",
405        )
406        .unwrap();
407
408        let result = rebuild(dir.path(), output.path(), None);
409        assert!(result.is_ok());
410
411        // Check that output files were created
412        assert!(output.path().join("graph.json").exists());
413        assert!(output.path().join("graph.html").exists());
414        assert!(output.path().join("GRAPH_REPORT.md").exists());
415    }
416
417    #[test]
418    fn test_incremental_rebuild() {
419        let dir = tempfile::tempdir().unwrap();
420        let output = tempfile::tempdir().unwrap();
421        let src = dir.path().join("src");
422        std::fs::create_dir_all(&src).unwrap();
423        std::fs::write(
424            src.join("main.rs"),
425            "fn main() { hello(); }\nfn hello() { println!(\"hi\"); }\n",
426        )
427        .unwrap();
428
429        // Initial full build
430        let result = rebuild(dir.path(), output.path(), None);
431        assert!(result.is_ok());
432
433        // Incremental rebuild with changed files
434        let changed = vec![src.join("main.rs")];
435        let result = rebuild(dir.path(), output.path(), Some(&changed));
436        assert!(result.is_ok());
437    }
438}