Skip to main content

mcp_methods/server/
watch.rs

1//! Filesystem-watcher subsystem for `--watch DIR` mode.
2//!
3//! Boots a debounced recursive watcher on the configured directory and
4//! invokes a caller-supplied callback when files change. Downstream
5//! binaries register callbacks to drive whatever rebuild they need —
6//! kglite-mcp-server, for example, wires this to `code_tree::build()`
7//! against the watched directory and atomic-swaps the active graph.
8//!
9//! mcp-methods's binary on its own does not own a rebuild target;
10//! it logs change events at INFO level and forwards them to any
11//! registered callback. When no callback is set the watcher still
12//! runs, so the change events show up in stderr.
13//!
14//! ## Default skip patterns
15//!
16//! Events matching conventional noise paths ([`DEFAULT_SKIP_SUBSTRINGS`]
17//! and [`DEFAULT_SKIP_EXTENSIONS`]) are dropped before the callback
18//! runs — `.git/`, `target/`, `node_modules/`, `__pycache__/`, `*.pyc`,
19//! editor swap files, etc. A wide sandbox under active development
20//! generates hundreds of these per second; without the filter every
21//! consumer either rebuilds wastefully or implements the same skip
22//! list. With it, consumers see only events that could plausibly
23//! matter.
24//!
25//! Bindings that need everything (test fixtures, future consumers
26//! with a genuine reason to see every event) pass
27//! [`WatchConfig::unfiltered`] to [`watch_with_config`].
28
29#![allow(dead_code)]
30
31use std::path::{Path, PathBuf};
32use std::sync::Arc;
33use std::time::Duration;
34
35use anyhow::{Context, Result};
36use notify_debouncer_mini::notify::RecursiveMode;
37use notify_debouncer_mini::{new_debouncer, DebounceEventResult, Debouncer};
38
39/// Callback invoked on a debounced file-change event.
40///
41/// `paths` is the deduplicated set of paths reported as changed within
42/// the debounce window, **after** the active [`WatchConfig`]'s skip
43/// filter has run. The callback runs on a background thread; keep it
44/// non-blocking or push work onto a channel.
45pub type ChangeHandler = Arc<dyn Fn(&[PathBuf]) + Send + Sync>;
46
47/// Default debounce window — short enough to feel responsive, long
48/// enough to coalesce noisy editor saves and IDE temp-file dance.
49pub const DEFAULT_DEBOUNCE: Duration = Duration::from_millis(500);
50
51/// Default substrings to skip. A path containing any of these as a
52/// substring is dropped before the callback runs.
53///
54/// Conventional build / VCS / cache directories that no graph builder,
55/// search index, or rebuild target should care about. The substrings
56/// are anchored with `/` on both sides where appropriate so they don't
57/// false-match (e.g. `/.git/` matches `.../my-repo/.git/HEAD` but not
58/// a file literally named `.gitignore`).
59pub const DEFAULT_SKIP_SUBSTRINGS: &[&str] = &[
60    "/.git/",         // git objects + index churn on any git operation
61    "/target/",       // Cargo build artifacts (worst storm offender)
62    "/node_modules/", // npm/yarn install storms + cache writes
63    "/__pycache__/",  // CPython bytecode dirs
64    "/.venv/",        // Python venv internals
65    "/build/",        // generic build outputs across many tools
66    "/dist/",         // generic build/distribution outputs
67    "/.DS_Store",     // macOS Finder metadata churn
68];
69
70/// Default file extensions to skip (without the leading dot).
71pub const DEFAULT_SKIP_EXTENSIONS: &[&str] = &[
72    "pyc", "pyo", // CPython bytecode files
73    "swp", "swo", // vim swap files
74    "tmp", // atomic-save temp files
75];
76
77/// Configuration for a [`watch_with_config`] call. Controls which
78/// events reach the callback.
79#[derive(Clone, Debug)]
80pub struct WatchConfig {
81    /// Substrings to skip. A path containing any of these (anywhere)
82    /// is dropped before the callback fires. Matching is
83    /// case-sensitive and allocation-free.
84    pub skip_substrings: Vec<String>,
85    /// File extensions (without leading dot) to skip. Matching uses
86    /// the path's last extension via [`Path::extension`] and is
87    /// case-sensitive.
88    pub skip_extensions: Vec<String>,
89}
90
91impl Default for WatchConfig {
92    /// The recommended default: skip [`DEFAULT_SKIP_SUBSTRINGS`] +
93    /// [`DEFAULT_SKIP_EXTENSIONS`]. Most consumers want this — see
94    /// [`unfiltered`](Self::unfiltered) for the escape hatch.
95    fn default() -> Self {
96        Self {
97            skip_substrings: DEFAULT_SKIP_SUBSTRINGS
98                .iter()
99                .map(|s| (*s).to_string())
100                .collect(),
101            skip_extensions: DEFAULT_SKIP_EXTENSIONS
102                .iter()
103                .map(|s| (*s).to_string())
104                .collect(),
105        }
106    }
107}
108
109impl WatchConfig {
110    /// Empty skip set — every event reaches the callback. Use when
111    /// you genuinely want raw FS events (test fixtures, log-every-
112    /// change diagnostic modes, or future consumers with a reason to
113    /// see `.git/objects/...` writes).
114    pub fn unfiltered() -> Self {
115        Self {
116            skip_substrings: Vec::new(),
117            skip_extensions: Vec::new(),
118        }
119    }
120
121    /// Test a path against the active skip set. `true` → skip; `false`
122    /// → forward to callback. Public so consumers building their own
123    /// orchestration over the same conventions can reuse the predicate
124    /// without re-deriving it.
125    pub fn is_skipped(&self, path: &Path) -> bool {
126        // Substring match against the full path. UTF-8 fallback is
127        // lossy: paths that aren't valid UTF-8 skip the substring
128        // check (we still run the extension check below). On the
129        // platforms we care about (macOS / Linux / Windows) this is
130        // never the hot path's bottleneck.
131        if let Some(s) = path.to_str() {
132            for needle in &self.skip_substrings {
133                if s.contains(needle.as_str()) {
134                    return true;
135                }
136            }
137        }
138        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
139            for skip in &self.skip_extensions {
140                if ext == skip {
141                    return true;
142                }
143            }
144        }
145        false
146    }
147}
148
149/// Apply a [`WatchConfig`]'s skip filter to a batch of event paths,
150/// keeping only those that should reach the callback. The debouncer
151/// drops the whole batch (no callback at all) when this returns empty —
152/// the pure-noise-storm case (`cargo build`'s `target/` churn, a `git`
153/// operation's `.git/` writes). Extracted as a free function so the
154/// retention decision is unit-testable without depending on a real
155/// watcher's platform-specific event-path semantics.
156fn retain_unskipped(
157    config: &WatchConfig,
158    paths: impl IntoIterator<Item = PathBuf>,
159) -> Vec<PathBuf> {
160    paths
161        .into_iter()
162        .filter(|p| !config.is_skipped(p))
163        .collect()
164}
165
166/// Active watcher handle. Drop to stop watching.
167pub struct WatchHandle {
168    _debouncer: Debouncer<notify_debouncer_mini::notify::RecommendedWatcher>,
169}
170
171/// Spawn a recursive debounced watcher on `dir` using the default
172/// [`WatchConfig`] (skips conventional noise paths — `.git/`,
173/// `target/`, `node_modules/`, etc.).
174///
175/// Returns a handle whose `Drop` impl tears the watcher down. Errors
176/// surface synchronously if the path is not a directory or the platform
177/// watcher refuses to register.
178///
179/// For control over the skip set, use [`watch_with_config`].
180pub fn watch(
181    dir: &Path,
182    on_change: Option<ChangeHandler>,
183    debounce: Option<Duration>,
184) -> Result<WatchHandle> {
185    watch_with_config(dir, on_change, debounce, WatchConfig::default())
186}
187
188/// Spawn a recursive debounced watcher with an explicit
189/// [`WatchConfig`]. Behaves like [`watch`] except the skip set is
190/// caller-controlled — pass [`WatchConfig::unfiltered`] to receive
191/// every event, or build a custom config to add / remove patterns.
192pub fn watch_with_config(
193    dir: &Path,
194    on_change: Option<ChangeHandler>,
195    debounce: Option<Duration>,
196    config: WatchConfig,
197) -> Result<WatchHandle> {
198    if !dir.is_dir() {
199        anyhow::bail!("--watch path is not a directory: {}", dir.display());
200    }
201    let debounce = debounce.unwrap_or(DEFAULT_DEBOUNCE);
202    let dir_for_log = dir.to_path_buf();
203    let on_change = on_change.unwrap_or_else(|| {
204        Arc::new(|_| {
205            // No-op callback when no downstream consumer is configured.
206        })
207    });
208
209    let mut debouncer = new_debouncer(debounce, move |result: DebounceEventResult| match result {
210        Ok(events) => {
211            // Drop skipped events before they're handed to the
212            // callback or counted in the log line. Empty post-filter
213            // batches (a pure-noise storm like `cargo build`'s
214            // `target/` churn) return without a callback invocation
215            // at all.
216            let paths = retain_unskipped(&config, events.into_iter().map(|e| e.path));
217            if paths.is_empty() {
218                return;
219            }
220            tracing::info!(
221                root = %dir_for_log.display(),
222                changed = paths.len(),
223                "watch: file change debounced"
224            );
225            on_change(&paths);
226        }
227        Err(e) => {
228            tracing::warn!(error = %e, "watch: error from notify");
229        }
230    })
231    .context("failed to construct file-system debouncer")?;
232
233    debouncer
234        .watcher()
235        .watch(dir, RecursiveMode::Recursive)
236        .with_context(|| format!("failed to watch {}", dir.display()))?;
237
238    tracing::info!(root = %dir.display(), debounce_ms = debounce.as_millis() as u64, "watch: active");
239    Ok(WatchHandle {
240        _debouncer: debouncer,
241    })
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247    use std::sync::atomic::{AtomicUsize, Ordering};
248
249    #[test]
250    fn watch_rejects_non_directory() {
251        let result = watch(Path::new("/this/does/not/exist"), None, None);
252        assert!(result.is_err());
253    }
254
255    #[test]
256    fn watch_starts_and_drops_clean() {
257        let dir = tempfile::tempdir().unwrap();
258        let _handle = watch(dir.path(), None, Some(Duration::from_millis(100))).unwrap();
259        // Drop at end of scope tears it down without panicking.
260    }
261
262    #[test]
263    fn callback_fires_on_file_change() {
264        use std::thread::sleep;
265        let dir = tempfile::tempdir().unwrap();
266        let counter = Arc::new(AtomicUsize::new(0));
267        let counter_for_cb = counter.clone();
268        let cb: ChangeHandler = Arc::new(move |_paths: &[PathBuf]| {
269            counter_for_cb.fetch_add(1, Ordering::SeqCst);
270        });
271        let _handle = watch(dir.path(), Some(cb), Some(Duration::from_millis(100))).unwrap();
272        sleep(Duration::from_millis(50)); // let watcher settle
273        std::fs::write(dir.path().join("a.txt"), "hi").unwrap();
274        sleep(Duration::from_millis(400)); // debounce + buffer
275        assert!(
276            counter.load(Ordering::SeqCst) >= 1,
277            "expected callback to fire at least once after file write"
278        );
279    }
280
281    // ── skip-pattern coverage ───────────────────────────────────────
282
283    #[test]
284    fn default_config_skips_git_dir() {
285        let cfg = WatchConfig::default();
286        assert!(cfg.is_skipped(Path::new("/repo/.git/HEAD")));
287        assert!(cfg.is_skipped(Path::new("/repo/.git/objects/ab/cdef")));
288    }
289
290    #[test]
291    fn default_config_skips_target_dir() {
292        let cfg = WatchConfig::default();
293        assert!(cfg.is_skipped(Path::new("/repo/target/debug/foo.rlib")));
294        assert!(cfg.is_skipped(Path::new("/repo/target/release/build/x.o")));
295    }
296
297    #[test]
298    fn default_config_skips_node_modules() {
299        let cfg = WatchConfig::default();
300        assert!(cfg.is_skipped(Path::new("/repo/node_modules/@scope/package/index.js")));
301    }
302
303    #[test]
304    fn default_config_skips_python_bytecode() {
305        let cfg = WatchConfig::default();
306        assert!(cfg.is_skipped(Path::new("/repo/pkg/__pycache__/m.cpython-312.pyc")));
307        assert!(cfg.is_skipped(Path::new("/repo/lib.pyc")));
308    }
309
310    #[test]
311    fn default_config_skips_editor_swap() {
312        let cfg = WatchConfig::default();
313        assert!(cfg.is_skipped(Path::new("/repo/src/main.rs.swp")));
314        assert!(cfg.is_skipped(Path::new("/repo/draft.tmp")));
315    }
316
317    #[test]
318    fn default_config_passes_source_files() {
319        let cfg = WatchConfig::default();
320        // Files with these patterns OUTSIDE the skip dirs should pass.
321        assert!(!cfg.is_skipped(Path::new("/repo/src/main.rs")));
322        assert!(!cfg.is_skipped(Path::new("/repo/lib.py")));
323        assert!(!cfg.is_skipped(Path::new("/repo/index.ts")));
324        // A literal `.gitignore` (not under `.git/`) should pass.
325        assert!(!cfg.is_skipped(Path::new("/repo/.gitignore")));
326    }
327
328    #[test]
329    fn unfiltered_config_skips_nothing() {
330        let cfg = WatchConfig::unfiltered();
331        assert!(!cfg.is_skipped(Path::new("/repo/.git/HEAD")));
332        assert!(!cfg.is_skipped(Path::new("/repo/target/foo.rlib")));
333        assert!(!cfg.is_skipped(Path::new("/repo/lib.pyc")));
334    }
335
336    #[test]
337    fn custom_config_round_trip() {
338        let cfg = WatchConfig {
339            skip_substrings: vec!["/secret/".to_string()],
340            skip_extensions: vec!["bak".to_string()],
341        };
342        assert!(cfg.is_skipped(Path::new("/repo/secret/key.txt")));
343        assert!(cfg.is_skipped(Path::new("/repo/file.bak")));
344        // Substrings from the default set are NOT in this config:
345        assert!(!cfg.is_skipped(Path::new("/repo/.git/HEAD")));
346        assert!(!cfg.is_skipped(Path::new("/repo/lib.pyc")));
347    }
348
349    #[test]
350    fn default_skip_substrings_are_anchored() {
351        let cfg = WatchConfig::default();
352        // `/target/` (not `target/`) so a file literally named `target`
353        // at the repo root doesn't false-match.
354        assert!(!cfg.is_skipped(Path::new("/repo/target")));
355        // But `/repo/target/...` does:
356        assert!(cfg.is_skipped(Path::new("/repo/target/foo")));
357    }
358
359    // The debouncer fires the callback iff `retain_unskipped` returns a
360    // non-empty batch. We test that retention decision directly rather
361    // than against a live watcher: a real-FS "noise-only batch" test is
362    // inherently flaky across platforms, because inotify (Linux) and
363    // FSEvents (macOS) report different event paths for the same writes
364    // (e.g. a write inside `target/` can surface a modify event on the
365    // bare `target` directory entry on Linux but not on macOS). The
366    // positive wiring is covered by `callback_fires_on_file_change`.
367
368    #[test]
369    fn noise_only_batch_retains_nothing() {
370        let cfg = WatchConfig::default();
371        // A pure `cargo build` / `git` storm — every path is noise.
372        let batch = vec![
373            PathBuf::from("/repo/target/debug/deps/a.rlib"),
374            PathBuf::from("/repo/target/release/build/x.o"),
375            PathBuf::from("/repo/.git/objects/ab/cdef"),
376            PathBuf::from("/repo/pkg/__pycache__/m.cpython-312.pyc"),
377            PathBuf::from("/repo/lib.pyc"),
378        ];
379        // Empty result → the debouncer returns without firing the callback.
380        assert!(retain_unskipped(&cfg, batch).is_empty());
381    }
382
383    #[test]
384    fn mixed_batch_retains_only_non_noise() {
385        let cfg = WatchConfig::default();
386        let batch = vec![
387            PathBuf::from("/repo/target/debug/deps/a.rlib"), // noise
388            PathBuf::from("/repo/src/main.rs"),              // source — keep
389            PathBuf::from("/repo/.git/HEAD"),                // noise
390            PathBuf::from("/repo/lib.py"),                   // source — keep
391        ];
392        let kept = retain_unskipped(&cfg, batch);
393        assert_eq!(
394            kept,
395            vec![
396                PathBuf::from("/repo/src/main.rs"),
397                PathBuf::from("/repo/lib.py"),
398            ]
399        );
400    }
401
402    #[test]
403    fn unfiltered_config_retains_everything() {
404        let cfg = WatchConfig::unfiltered();
405        let batch = vec![
406            PathBuf::from("/repo/target/debug/a.rlib"),
407            PathBuf::from("/repo/.git/HEAD"),
408        ];
409        // Nothing is dropped → the callback sees the raw batch.
410        assert_eq!(retain_unskipped(&cfg, batch.clone()), batch);
411    }
412}