Skip to main content

gid_core/
watch.rs

1//! Watch — file system monitoring for automatic code graph sync.
2//!
3//! The core logic is in [`sync_on_change`], which is a testable pure function.
4//! The watch loop itself ([`watch_and_sync`]) is a thin shell around it
5//! that uses the `notify` crate for filesystem events.
6
7use std::path::{Path, PathBuf};
8use std::time::Instant;
9
10use anyhow::{Context, Result};
11
12use crate::code_graph::CodeGraph;
13use crate::graph::Graph;
14use crate::ignore::IgnoreList;
15use crate::storage::{load_graph_auto, save_graph_auto, StorageBackend};
16use crate::unify::{codegraph_to_graph_nodes, merge_code_layer, generate_bridge_edges};
17use crate::semantify::apply_heuristic_layers;
18
19/// Result of a sync operation.
20#[derive(Debug, Clone)]
21pub struct SyncResult {
22    /// Number of files that changed.
23    pub files_changed: usize,
24    /// Number of code nodes in the updated graph.
25    pub code_nodes: usize,
26    /// Number of code edges in the updated graph.
27    pub code_edges: usize,
28    /// Number of bridge edges generated.
29    pub bridge_edges: usize,
30    /// Time taken for the sync operation.
31    pub duration_ms: u64,
32    /// Whether the graph was actually modified (false if no files changed).
33    pub graph_modified: bool,
34}
35
36/// Configuration for the watch/sync operation.
37#[derive(Debug, Clone)]
38pub struct WatchConfig {
39    /// Directory to watch for changes.
40    pub watch_dir: PathBuf,
41    /// Path to the .gid directory.
42    pub gid_dir: PathBuf,
43    /// Debounce interval in milliseconds.
44    pub debounce_ms: u64,
45    /// Whether to run LSP refinement (expensive).
46    pub lsp: bool,
47    /// Whether to skip semantify.
48    pub no_semantify: bool,
49    /// Storage backend override (None = auto-detect).
50    pub backend: Option<StorageBackend>,
51}
52
53impl WatchConfig {
54    /// Create a new WatchConfig with defaults.
55    pub fn new(watch_dir: PathBuf, gid_dir: PathBuf) -> Self {
56        Self {
57            watch_dir,
58            gid_dir,
59            debounce_ms: 1000,
60            lsp: true,
61            no_semantify: false,
62            backend: None,
63        }
64    }
65}
66
67/// Check if a changed path should trigger a re-extraction.
68///
69/// Returns false for:
70/// - Paths inside .gid/ directory
71/// - Paths matching .gidignore patterns
72/// - Paths matching common ignore patterns (node_modules, target, .git, etc.)
73/// - Non-source files (binary, media, etc.)
74pub fn should_trigger_sync(path: &Path, watch_dir: &Path, gid_dir: &Path, ignore_list: &IgnoreList) -> bool {
75    // Never trigger on .gid/ changes
76    if path.starts_with(gid_dir) {
77        return false;
78    }
79
80    // Never trigger on .git/ changes
81    let git_dir = watch_dir.join(".git");
82    if path.starts_with(&git_dir) {
83        return false;
84    }
85
86    // Check .gidignore patterns
87    if let Ok(rel) = path.strip_prefix(watch_dir) {
88        let rel_str = rel.to_string_lossy();
89        // Check full relative path
90        if ignore_list.should_ignore(&rel_str, path.is_dir()) {
91            return false;
92        }
93        // Check each path component individually (a pattern like "node_modules"
94        // should block all files inside node_modules/, matching gitignore semantics
95        // where directory patterns ignore all contents)
96        for component in rel.components() {
97            let comp_str = component.as_os_str().to_string_lossy();
98            if ignore_list.should_ignore(&comp_str, true) {
99                return false;
100            }
101        }
102    }
103
104    // Only trigger on source-like files
105    match path.extension().and_then(|e| e.to_str()) {
106        Some("rs" | "py" | "ts" | "tsx" | "js" | "jsx" | "go" | "java" | "c" | "cpp" | "h" | "hpp"
107             | "rb" | "swift" | "kt" | "scala" | "zig" | "toml" | "yaml" | "yml" | "json") => true,
108        // Known config extensions that are relevant
109        Some("mod") => {
110            // go.mod but not random .mod files
111            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
112            name == "go.mod"
113        }
114        Some("gradle") => true,
115        // No extension but named like source (Makefile, Dockerfile, etc.)
116        None => {
117            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
118            matches!(name, "Makefile" | "Dockerfile")
119        }
120        _ => false,
121    }
122}
123
124/// Core sync logic: extract changed files, merge into graph, save.
125///
126/// This is the testable heart of the watch system. The watch loop
127/// calls this function on each batch of file changes.
128///
129/// Returns `Ok(SyncResult)` with `graph_modified: false` if no files changed.
130pub fn sync_on_change(config: &WatchConfig) -> Result<SyncResult> {
131    let start = Instant::now();
132    let meta_path = config.gid_dir.join("extract-meta.json");
133
134    // Run incremental extraction
135    let (code_graph, report) = CodeGraph::extract_incremental(
136        &config.watch_dir,
137        &config.gid_dir,
138        &meta_path,
139        false, // never force in watch mode
140    ).context("incremental extraction failed")?;
141
142    let files_changed = report.added + report.modified + report.deleted;
143    if files_changed == 0 {
144        return Ok(SyncResult {
145            files_changed: 0,
146            code_nodes: 0,
147            code_edges: 0,
148            bridge_edges: 0,
149            duration_ms: start.elapsed().as_millis() as u64,
150            graph_modified: false,
151        });
152    }
153
154    // Convert to graph nodes
155    let (code_nodes, code_edges) = codegraph_to_graph_nodes(&code_graph, &config.watch_dir);
156    let code_node_count = code_nodes.len();
157    let code_edge_count = code_edges.len();
158
159    // Load existing graph
160    let mut graph = load_graph_auto(&config.gid_dir, config.backend).unwrap_or_default();
161
162    // Merge code layer
163    merge_code_layer(&mut graph, code_nodes, code_edges);
164
165    // Semantify + bridge edges
166    if !config.no_semantify {
167        apply_heuristic_layers(&mut graph);
168        generate_bridge_edges(&mut graph);
169    }
170
171    let bridge_count = graph.bridge_edges().len();
172
173    // Save graph via backend-agnostic storage
174    save_graph_auto(&graph, &config.gid_dir, config.backend)
175        .map_err(|e| anyhow::anyhow!("{e}"))?;
176
177    Ok(SyncResult {
178        files_changed,
179        code_nodes: code_node_count,
180        code_edges: code_edge_count,
181        bridge_edges: bridge_count,
182        duration_ms: start.elapsed().as_millis() as u64,
183        graph_modified: true,
184    })
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190    use std::fs;
191    use tempfile::TempDir;
192    use crate::parser::load_graph;
193
194    fn setup_test_project(source: &str) -> (TempDir, PathBuf, PathBuf) {
195        let tmp = TempDir::new().unwrap();
196        let src_dir = tmp.path().join("src");
197        fs::create_dir_all(&src_dir).unwrap();
198        let gid_dir = tmp.path().join(".gid");
199        fs::create_dir_all(&gid_dir).unwrap();
200
201        // Write a Rust source file
202        fs::write(src_dir.join("main.rs"), source).unwrap();
203
204        // Write a minimal graph.yml
205        fs::write(gid_dir.join("graph.yml"), "nodes: []\nedges: []\n").unwrap();
206
207        (tmp, src_dir, gid_dir)
208    }
209
210    // ── should_trigger_sync tests ──────────────────────────────────────────
211
212    #[test]
213    fn test_trigger_rust_file() {
214        let ignore = IgnoreList::with_defaults();
215        let watch = Path::new("/project");
216        let gid = Path::new("/project/.gid");
217        assert!(should_trigger_sync(Path::new("/project/src/main.rs"), watch, gid, &ignore));
218    }
219
220    #[test]
221    fn test_trigger_python_file() {
222        let ignore = IgnoreList::with_defaults();
223        let watch = Path::new("/project");
224        let gid = Path::new("/project/.gid");
225        assert!(should_trigger_sync(Path::new("/project/lib/parser.py"), watch, gid, &ignore));
226    }
227
228    #[test]
229    fn test_trigger_typescript_file() {
230        let ignore = IgnoreList::with_defaults();
231        let watch = Path::new("/project");
232        let gid = Path::new("/project/.gid");
233        assert!(should_trigger_sync(Path::new("/project/src/app.tsx"), watch, gid, &ignore));
234    }
235
236    #[test]
237    fn test_no_trigger_gid_dir() {
238        let ignore = IgnoreList::with_defaults();
239        let watch = Path::new("/project");
240        let gid = Path::new("/project/.gid");
241        assert!(!should_trigger_sync(Path::new("/project/.gid/graph.yml"), watch, gid, &ignore));
242    }
243
244    #[test]
245    fn test_no_trigger_git_dir() {
246        let ignore = IgnoreList::with_defaults();
247        let watch = Path::new("/project");
248        let gid = Path::new("/project/.gid");
249        assert!(!should_trigger_sync(Path::new("/project/.git/HEAD"), watch, gid, &ignore));
250    }
251
252    #[test]
253    fn test_no_trigger_binary_file() {
254        let ignore = IgnoreList::with_defaults();
255        let watch = Path::new("/project");
256        let gid = Path::new("/project/.gid");
257        assert!(!should_trigger_sync(Path::new("/project/image.png"), watch, gid, &ignore));
258    }
259
260    #[test]
261    fn test_no_trigger_compiled_file() {
262        let ignore = IgnoreList::with_defaults();
263        let watch = Path::new("/project");
264        let gid = Path::new("/project/.gid");
265        assert!(!should_trigger_sync(Path::new("/project/main.o"), watch, gid, &ignore));
266    }
267
268    #[test]
269    fn test_no_trigger_node_modules() {
270        let ignore = IgnoreList::with_defaults();
271        let watch = Path::new("/project");
272        let gid = Path::new("/project/.gid");
273        assert!(!should_trigger_sync(
274            Path::new("/project/node_modules/lodash/index.js"), watch, gid, &ignore
275        ));
276    }
277
278    #[test]
279    fn test_no_trigger_target_dir() {
280        let ignore = IgnoreList::with_defaults();
281        let watch = Path::new("/project");
282        let gid = Path::new("/project/.gid");
283        assert!(!should_trigger_sync(
284            Path::new("/project/target/debug/main.rs"), watch, gid, &ignore
285        ));
286    }
287
288    #[test]
289    fn test_trigger_cargo_toml() {
290        let ignore = IgnoreList::with_defaults();
291        let watch = Path::new("/project");
292        let gid = Path::new("/project/.gid");
293        assert!(should_trigger_sync(Path::new("/project/Cargo.toml"), watch, gid, &ignore));
294    }
295
296    #[test]
297    fn test_trigger_json_config() {
298        let ignore = IgnoreList::with_defaults();
299        let watch = Path::new("/project");
300        let gid = Path::new("/project/.gid");
301        assert!(should_trigger_sync(Path::new("/project/tsconfig.json"), watch, gid, &ignore));
302    }
303
304    #[test]
305    fn test_trigger_go_file() {
306        let ignore = IgnoreList::with_defaults();
307        let watch = Path::new("/project");
308        let gid = Path::new("/project/.gid");
309        assert!(should_trigger_sync(Path::new("/project/cmd/main.go"), watch, gid, &ignore));
310    }
311
312    #[test]
313    fn test_no_trigger_markdown() {
314        let ignore = IgnoreList::with_defaults();
315        let watch = Path::new("/project");
316        let gid = Path::new("/project/.gid");
317        // .md files are not source code
318        assert!(!should_trigger_sync(Path::new("/project/README.md"), watch, gid, &ignore));
319    }
320
321    #[test]
322    fn test_trigger_makefile() {
323        let ignore = IgnoreList::with_defaults();
324        let watch = Path::new("/project");
325        let gid = Path::new("/project/.gid");
326        assert!(should_trigger_sync(Path::new("/project/Makefile"), watch, gid, &ignore));
327    }
328
329    #[test]
330    fn test_trigger_dockerfile() {
331        let ignore = IgnoreList::with_defaults();
332        let watch = Path::new("/project");
333        let gid = Path::new("/project/.gid");
334        assert!(should_trigger_sync(Path::new("/project/Dockerfile"), watch, gid, &ignore));
335    }
336
337    #[test]
338    fn test_no_trigger_lock_file() {
339        let ignore = IgnoreList::with_defaults();
340        let watch = Path::new("/project");
341        let gid = Path::new("/project/.gid");
342        assert!(!should_trigger_sync(Path::new("/project/Cargo.lock"), watch, gid, &ignore));
343    }
344
345    #[test]
346    fn test_custom_gidignore_pattern() {
347        let mut ignore = IgnoreList::with_defaults();
348        ignore.add("generated/").unwrap();
349        let watch = Path::new("/project");
350        let gid = Path::new("/project/.gid");
351        assert!(!should_trigger_sync(
352            Path::new("/project/generated/types.rs"), watch, gid, &ignore
353        ));
354    }
355
356    // ── sync_on_change tests ───────────────────────────────────────────────
357
358    #[test]
359    fn test_sync_creates_graph_from_source() {
360        let (_tmp, _src_dir, gid_dir) = setup_test_project(
361            r#"
362pub fn hello() -> String {
363    "hello".to_string()
364}
365
366pub fn world() -> String {
367    "world".to_string()
368}
369"#,
370        );
371
372        let config = WatchConfig::new(
373            _tmp.path().to_path_buf(),
374            gid_dir.clone(),
375        );
376
377        let result = sync_on_change(&config).unwrap();
378        assert!(result.graph_modified, "files_changed={} code_nodes={}", result.files_changed, result.code_nodes);
379        assert!(result.files_changed > 0);
380        assert!(result.code_nodes > 0);
381        assert!(result.duration_ms < 30_000); // should complete in under 30s
382
383        // Verify graph was written
384        let graph = load_graph(&gid_dir.join("graph.yml")).unwrap();
385        assert!(!graph.nodes.is_empty());
386    }
387
388    #[test]
389    fn test_sync_no_change_second_run() {
390        let (_tmp, _src_dir, gid_dir) = setup_test_project(
391            "pub fn stable() {}\n",
392        );
393
394        let config = WatchConfig::new(
395            _tmp.path().to_path_buf(),
396            gid_dir.clone(),
397        );
398
399        // First run — extracts
400        let r1 = sync_on_change(&config).unwrap();
401        assert!(r1.graph_modified);
402
403        // Second run — no changes
404        let r2 = sync_on_change(&config).unwrap();
405        assert!(!r2.graph_modified);
406        assert_eq!(r2.files_changed, 0);
407    }
408
409    #[test]
410    fn test_sync_detects_file_modification() {
411        let (_tmp, src_dir, gid_dir) = setup_test_project(
412            "pub fn original() {}\n",
413        );
414
415        let config = WatchConfig::new(
416            _tmp.path().to_path_buf(),
417            gid_dir.clone(),
418        );
419
420        // First extraction
421        let r1 = sync_on_change(&config).unwrap();
422        assert!(r1.graph_modified);
423
424        // Modify the file — content changes are detected via content hash
425        // even within the same second (mtime granularity is seconds)
426        std::thread::sleep(std::time::Duration::from_millis(100));
427        fs::write(src_dir.join("main.rs"), "pub fn modified() {}\npub fn added() {}\n").unwrap();
428
429        // Second extraction should detect the change
430        let r2 = sync_on_change(&config).unwrap();
431        assert!(r2.graph_modified);
432        assert!(r2.files_changed > 0);
433    }
434
435    #[test]
436    fn test_sync_preserves_project_nodes() {
437        let (_tmp, _src_dir, gid_dir) = setup_test_project(
438            "pub fn code() {}\n",
439        );
440
441        // Write a graph with a project-layer task node
442        let graph_content = r#"
443nodes:
444  - id: task-auth
445    title: "Implement auth"
446    type: task
447    status: todo
448edges: []
449"#;
450        fs::write(gid_dir.join("graph.yml"), graph_content).unwrap();
451
452        let config = WatchConfig::new(
453            _tmp.path().to_path_buf(),
454            gid_dir.clone(),
455        );
456
457        let result = sync_on_change(&config).unwrap();
458        assert!(result.graph_modified);
459
460        // Verify project node is preserved
461        let graph = load_graph(&gid_dir.join("graph.yml")).unwrap();
462        assert!(graph.get_node("task-auth").is_some(), "project node should be preserved");
463    }
464
465    #[test]
466    fn test_sync_atomic_write() {
467        let (_tmp, _src_dir, gid_dir) = setup_test_project(
468            "pub fn atomic() {}\n",
469        );
470
471        let config = WatchConfig::new(
472            _tmp.path().to_path_buf(),
473            gid_dir.clone(),
474        );
475
476        sync_on_change(&config).unwrap();
477
478        // No .tmp file should remain
479        assert!(!gid_dir.join("graph.yml.tmp").exists());
480        // graph.yml should exist and be valid
481        let graph = load_graph(&gid_dir.join("graph.yml")).unwrap();
482        assert!(!graph.nodes.is_empty());
483    }
484
485    #[test]
486    fn test_sync_with_no_semantify() {
487        let (_tmp, _src_dir, gid_dir) = setup_test_project(
488            "pub fn no_sem() {}\n",
489        );
490
491        let mut config = WatchConfig::new(
492            _tmp.path().to_path_buf(),
493            gid_dir.clone(),
494        );
495        config.no_semantify = true;
496
497        let result = sync_on_change(&config).unwrap();
498        assert!(result.graph_modified);
499        // Bridge edges should be 0 when semantify is skipped
500        assert_eq!(result.bridge_edges, 0);
501    }
502
503    #[test]
504    fn test_sync_result_fields() {
505        let (_tmp, _src_dir, gid_dir) = setup_test_project(
506            "pub fn field_check() {}\n",
507        );
508
509        let config = WatchConfig::new(
510            _tmp.path().to_path_buf(),
511            gid_dir,
512        );
513
514        let result = sync_on_change(&config).unwrap();
515        assert!(result.graph_modified);
516        assert!(result.files_changed > 0);
517        assert!(result.code_nodes > 0);
518        // code_edges might be 0 for a simple file
519        assert!(result.duration_ms < 60_000);
520    }
521
522    #[test]
523    fn test_sync_new_file_added() {
524        let (_tmp, src_dir, gid_dir) = setup_test_project(
525            "pub fn initial() {}\n",
526        );
527
528        let config = WatchConfig::new(
529            _tmp.path().to_path_buf(),
530            gid_dir.clone(),
531        );
532
533        // First extraction
534        sync_on_change(&config).unwrap();
535
536        // Add a new file
537        std::thread::sleep(std::time::Duration::from_millis(100));
538        fs::write(src_dir.join("utils.rs"), "pub fn helper() -> i32 { 42 }\n").unwrap();
539
540        // Should detect new file
541        let result = sync_on_change(&config).unwrap();
542        assert!(result.graph_modified);
543
544        // Both files' functions should be in graph
545        let graph = load_graph(&gid_dir.join("graph.yml")).unwrap();
546        let func_nodes: Vec<_> = graph.nodes.iter()
547            .filter(|n| n.node_kind.as_deref() == Some("Function"))
548            .collect();
549        assert!(func_nodes.len() >= 2, "should have at least 2 function nodes, got {}", func_nodes.len());
550    }
551
552    #[test]
553    fn test_sync_missing_gid_dir() {
554        let tmp = TempDir::new().unwrap();
555        let src_dir = tmp.path().join("src");
556        fs::create_dir_all(&src_dir).unwrap();
557        fs::write(src_dir.join("main.rs"), "fn main() {}\n").unwrap();
558
559        // gid_dir doesn't exist — sync should handle gracefully
560        let gid_dir = tmp.path().join(".gid");
561        // Don't create it — let sync_on_change handle it
562        fs::create_dir_all(&gid_dir).unwrap();
563        fs::write(gid_dir.join("graph.yml"), "nodes: []\nedges: []\n").unwrap();
564
565        let config = WatchConfig::new(
566            tmp.path().to_path_buf(),
567            gid_dir,
568        );
569
570        let result = sync_on_change(&config).unwrap();
571        assert!(result.graph_modified);
572    }
573}