Skip to main content

graphify_watch/
lib.rs

1//! File watching and auto-rebuild for graphify.
2//!
3//! Uses `notify` + debouncing to watch for file changes and trigger
4//! incremental graph rebuilds. Port of Python `watch.py`.
5
6use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8use std::time::Duration;
9
10use notify::RecursiveMode;
11use notify_debouncer_mini::new_debouncer;
12use thiserror::Error;
13use tokio::sync::mpsc;
14use tracing::{debug, info, warn};
15
16/// Debounce duration before triggering a rebuild.
17const DEBOUNCE_DURATION: Duration = Duration::from_secs(3);
18
19/// Default ignore patterns for files that should not trigger rebuilds.
20const IGNORE_PATTERNS: &[&str] = &[
21    ".git",
22    "node_modules",
23    "__pycache__",
24    ".pyc",
25    "target",
26    "graphify-out",
27    ".DS_Store",
28];
29
30/// Errors from the watcher.
31#[derive(Debug, Error)]
32pub enum WatchError {
33    #[error("IO error: {0}")]
34    Io(#[from] std::io::Error),
35
36    #[error("notify error: {0}")]
37    Notify(#[from] notify::Error),
38
39    #[error("watch setup failed: {0}")]
40    Setup(String),
41
42    #[error("rebuild failed: {0}")]
43    Rebuild(String),
44}
45
46/// Check if a path should be ignored based on common patterns.
47fn should_ignore(path: &Path) -> bool {
48    let path_str = path.to_string_lossy();
49    IGNORE_PATTERNS.iter().any(|p| path_str.contains(p))
50}
51
52/// Filter changed paths to only include relevant source files.
53fn filter_changes(paths: &[PathBuf]) -> Vec<PathBuf> {
54    paths
55        .iter()
56        .filter(|p| !should_ignore(p))
57        .cloned()
58        .collect()
59}
60
61/// Run the full pipeline: detect -> extract -> build -> cluster -> analyze -> export.
62///
63/// When `changed_files` is provided, only those files have their cache invalidated
64/// before extraction, achieving an incremental rebuild without re-parsing unchanged files.
65fn rebuild(
66    root: &Path,
67    output_dir: &Path,
68    changed_files: Option<&[PathBuf]>,
69) -> Result<(), WatchError> {
70    let cache_dir = output_dir.join("cache");
71
72    if let Some(changed) = changed_files {
73        for path in changed {
74            let _ = graphify_cache::invalidate_cached(path, root, &cache_dir);
75        }
76        info!(
77            "rebuild: invalidated cache for {} changed file(s)",
78            changed.len()
79        );
80    }
81
82    info!("rebuild: detecting files...");
83    let detection = graphify_detect::detect(root);
84    info!(
85        "rebuild: found {} files (~{} words)",
86        detection.total_files, detection.total_words
87    );
88
89    let code_files: Vec<PathBuf> = detection
90        .files
91        .get(&graphify_detect::FileType::Code)
92        .map(|v| v.iter().map(|f| root.join(f)).collect())
93        .unwrap_or_default();
94
95    if code_files.is_empty() {
96        info!("rebuild: no code files found, skipping");
97        return Ok(());
98    }
99
100    info!(
101        "rebuild: extracting AST from {} code files...",
102        code_files.len()
103    );
104    let mut ast_result = graphify_core::model::ExtractionResult::default();
105    let mut cache_hits = 0usize;
106    let mut errors = 0usize;
107    for file_path in &code_files {
108        if let Some(cached) = graphify_cache::load_cached_from::<
109            graphify_core::model::ExtractionResult,
110        >(file_path, root, &cache_dir)
111        {
112            cache_hits += 1;
113            ast_result.nodes.extend(cached.nodes);
114            ast_result.edges.extend(cached.edges);
115            ast_result.hyperedges.extend(cached.hyperedges);
116            continue;
117        }
118        if let Ok(fresh) = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
119            graphify_extract::extract(std::slice::from_ref(file_path))
120        })) {
121            let _ = graphify_cache::save_cached_to(file_path, &fresh, root, &cache_dir);
122            ast_result.nodes.extend(fresh.nodes);
123            ast_result.edges.extend(fresh.edges);
124            ast_result.hyperedges.extend(fresh.hyperedges);
125        } else {
126            errors += 1;
127            warn!("rebuild: extraction panicked for {}", file_path.display());
128        }
129    }
130    if cache_hits > 0 {
131        info!(
132            "rebuild: cache {} hits, {} extracted fresh",
133            cache_hits,
134            code_files.len() - cache_hits
135        );
136    }
137    if errors > 0 {
138        warn!("rebuild: {} file(s) had extraction errors", errors);
139    }
140    info!(
141        "rebuild: Pass 1 (AST): {} nodes, {} edges",
142        ast_result.nodes.len(),
143        ast_result.edges.len()
144    );
145
146    let extractions = vec![ast_result];
147
148    info!("rebuild: building graph...");
149    let graph = graphify_build::build(&extractions)
150        .map_err(|e| WatchError::Rebuild(format!("build failed: {e}")))?;
151    info!(
152        "rebuild: graph has {} nodes, {} edges",
153        graph.node_count(),
154        graph.edge_count()
155    );
156
157    info!("rebuild: detecting communities...");
158    let communities = graphify_cluster::cluster(&graph);
159    let cohesion = graphify_cluster::score_all(&graph, &communities);
160
161    let community_labels: HashMap<usize, String> = communities
162        .iter()
163        .map(|(cid, nodes)| {
164            let label = nodes
165                .first()
166                .and_then(|id| graph.get_node(id))
167                .map_or_else(|| format!("Community {cid}"), |n| n.label.clone());
168            (*cid, label)
169        })
170        .collect();
171    info!("rebuild: {} communities detected", communities.len());
172
173    info!("rebuild: analyzing...");
174    let god_list = graphify_analyze::god_nodes(&graph, 10);
175    let surprise_list = graphify_analyze::surprising_connections(&graph, &communities, 5);
176    let questions = graphify_analyze::suggest_questions(&graph, &communities, &community_labels, 7);
177
178    std::fs::create_dir_all(output_dir)
179        .map_err(|e| WatchError::Rebuild(format!("create output dir: {e}")))?;
180
181    let _ = graphify_export::export_json(&graph, output_dir);
182    let _ = graphify_export::export_html(&graph, &communities, &community_labels, output_dir, None);
183    let _ = graphify_export::export_graphml(&graph, output_dir);
184    let _ = graphify_export::export_cypher(&graph, output_dir);
185    let _ = graphify_export::export_svg(&graph, &communities, output_dir);
186    let _ = graphify_export::export_wiki(&graph, &communities, &community_labels, output_dir);
187
188    let detection_json = serde_json::json!({
189        "total_files": detection.total_files,
190        "total_words": detection.total_words,
191        "warning": detection.warning,
192    });
193    let question_json: Vec<serde_json::Value> = questions
194        .iter()
195        .map(|q| serde_json::to_value(q).unwrap_or_default())
196        .collect();
197    let token_cost: HashMap<String, usize> =
198        HashMap::from([("input".to_string(), 0), ("output".to_string(), 0)]);
199
200    let root_str = root.to_string_lossy();
201    if let Ok(report) = graphify_export::generate_report(&graphify_export::ReportInput {
202        graph: &graph,
203        communities: &communities,
204        cohesion_scores: &cohesion,
205        community_labels: &community_labels,
206        god_nodes: &god_list,
207        surprises: &surprise_list,
208        detection_result: &detection_json,
209        token_cost: &token_cost,
210        root: &root_str,
211        suggested_questions: Some(&question_json),
212    }) {
213        let report_path = output_dir.join("GRAPH_REPORT.md");
214        let _ = std::fs::write(&report_path, &report);
215    }
216
217    let manifest_path = output_dir.join(".graphify_manifest.json");
218    let manifest = graphify_detect::Manifest {
219        files: detection
220            .files
221            .iter()
222            .flat_map(|(ft, paths)| paths.iter().map(move |p| (p.clone(), *ft)))
223            .collect(),
224        hashes: HashMap::new(),
225    };
226    let _ = graphify_detect::save_manifest(&manifest_path, &manifest);
227
228    info!("rebuild: done");
229    Ok(())
230}
231
232/// Watch `root` for file changes and trigger rebuilds into `output_dir`.
233///
234/// This is an async loop that runs until cancelled. On each batch of
235/// debounced file changes, it logs the changed paths and invokes an
236/// incremental rebuild (only changed files have their cache invalidated).
237///
238/// # Arguments
239/// * `root` - Directory to watch recursively.
240/// * `output_dir` - Where to write rebuild output.
241pub async fn watch_directory(root: &Path, output_dir: &Path) -> Result<(), WatchError> {
242    let (tx, mut rx) = mpsc::channel::<Vec<PathBuf>>(100);
243
244    let mut debouncer = new_debouncer(
245        DEBOUNCE_DURATION,
246        move |res: Result<Vec<notify_debouncer_mini::DebouncedEvent>, notify::Error>| match res {
247            Ok(events) => {
248                let paths: Vec<PathBuf> = events.into_iter().map(|e| e.path).collect();
249                if let Err(e) = tx.blocking_send(paths) {
250                    warn!("Failed to send watch events: {}", e);
251                }
252            }
253            Err(e) => {
254                warn!("Watch error: {}", e);
255            }
256        },
257    )
258    .map_err(|e| WatchError::Setup(e.to_string()))?;
259
260    debouncer.watcher().watch(root, RecursiveMode::Recursive)?;
261
262    info!(
263        "Watching {} for changes (output: {})",
264        root.display(),
265        output_dir.display()
266    );
267    println!("Watching {} for changes...", root.display());
268
269    println!("Running initial build...");
270    let root_clone = root.to_path_buf();
271    let out_clone = output_dir.to_path_buf();
272    match tokio::task::spawn_blocking(move || rebuild(&root_clone, &out_clone, None)).await {
273        Ok(Ok(())) => println!("Initial build complete."),
274        Ok(Err(e)) => eprintln!("Initial build failed: {e}"),
275        Err(e) => eprintln!("Initial build panicked: {e}"),
276    }
277
278    while let Some(changed_paths) = rx.recv().await {
279        let relevant = filter_changes(&changed_paths);
280
281        if relevant.is_empty() {
282            debug!("Ignoring changes in excluded paths");
283            continue;
284        }
285
286        info!("{} file(s) changed, triggering rebuild...", relevant.len());
287        println!(
288            "Files changed ({}), triggering incremental rebuild...",
289            relevant.len()
290        );
291
292        for p in &relevant {
293            debug!("  changed: {}", p.display());
294        }
295
296        let root_clone = root.to_path_buf();
297        let out_clone = output_dir.to_path_buf();
298        match tokio::task::spawn_blocking(move || rebuild(&root_clone, &out_clone, Some(&relevant)))
299            .await
300        {
301            Ok(Ok(())) => println!("Rebuild complete."),
302            Ok(Err(e)) => eprintln!("Rebuild failed: {e}"),
303            Err(e) => eprintln!("Rebuild panicked: {e}"),
304        }
305    }
306
307    Ok(())
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313    use std::path::PathBuf;
314
315    #[test]
316    fn test_should_ignore_git() {
317        assert!(should_ignore(Path::new("/repo/.git/objects/abc")));
318        assert!(should_ignore(Path::new("/repo/node_modules/foo.js")));
319        assert!(should_ignore(Path::new("/repo/__pycache__/mod.pyc")));
320        assert!(should_ignore(Path::new("/repo/target/debug/build")));
321        assert!(should_ignore(Path::new("/repo/graphify-out/graph.json")));
322    }
323
324    #[test]
325    fn test_should_not_ignore_source() {
326        assert!(!should_ignore(Path::new("/repo/src/main.rs")));
327        assert!(!should_ignore(Path::new("/repo/lib/utils.py")));
328        assert!(!should_ignore(Path::new("/repo/README.md")));
329    }
330
331    #[test]
332    fn test_filter_changes() {
333        let paths = vec![
334            PathBuf::from("/repo/src/main.rs"),
335            PathBuf::from("/repo/.git/HEAD"),
336            PathBuf::from("/repo/src/lib.rs"),
337            PathBuf::from("/repo/node_modules/foo/index.js"),
338        ];
339        let filtered = filter_changes(&paths);
340        assert_eq!(filtered.len(), 2);
341        assert!(filtered.contains(&PathBuf::from("/repo/src/main.rs")));
342        assert!(filtered.contains(&PathBuf::from("/repo/src/lib.rs")));
343    }
344
345    #[test]
346    fn test_filter_changes_all_ignored() {
347        let paths = vec![
348            PathBuf::from("/repo/.git/HEAD"),
349            PathBuf::from("/repo/.DS_Store"),
350        ];
351        let filtered = filter_changes(&paths);
352        assert!(filtered.is_empty());
353    }
354
355    #[test]
356    fn test_filter_changes_empty() {
357        let filtered = filter_changes(&[]);
358        assert!(filtered.is_empty());
359    }
360
361    #[test]
362    fn test_rebuild_empty_dir() {
363        let dir = tempfile::tempdir().unwrap();
364        let output = tempfile::tempdir().unwrap();
365        let result = rebuild(dir.path(), output.path(), None);
366        assert!(result.is_ok());
367    }
368
369    #[test]
370    fn test_rebuild_with_code_files() {
371        let dir = tempfile::tempdir().unwrap();
372        let output = tempfile::tempdir().unwrap();
373        let src = dir.path().join("src");
374        std::fs::create_dir_all(&src).unwrap();
375        std::fs::write(
376            src.join("main.rs"),
377            "fn main() { hello(); }\nfn hello() { println!(\"hi\"); }\n",
378        )
379        .unwrap();
380        std::fs::write(
381            src.join("lib.rs"),
382            "pub fn add(a: i32, b: i32) -> i32 { a + b }\n",
383        )
384        .unwrap();
385
386        let result = rebuild(dir.path(), output.path(), None);
387        assert!(result.is_ok());
388
389        assert!(output.path().join("graph.json").exists());
390        assert!(output.path().join("graph.html").exists());
391        assert!(output.path().join("GRAPH_REPORT.md").exists());
392    }
393
394    #[test]
395    fn test_incremental_rebuild() {
396        let dir = tempfile::tempdir().unwrap();
397        let output = tempfile::tempdir().unwrap();
398        let src = dir.path().join("src");
399        std::fs::create_dir_all(&src).unwrap();
400        std::fs::write(
401            src.join("main.rs"),
402            "fn main() { hello(); }\nfn hello() { println!(\"hi\"); }\n",
403        )
404        .unwrap();
405
406        let result = rebuild(dir.path(), output.path(), None);
407        assert!(result.is_ok());
408
409        let changed = vec![src.join("main.rs")];
410        let result = rebuild(dir.path(), output.path(), Some(&changed));
411        assert!(result.is_ok());
412    }
413}