mcp_methods/server/watch.rs
1//! Filesystem-watcher subsystem for `--watch DIR` mode.
2//!
3//! Boots a debounced recursive watcher on the configured directory and
4//! invokes a caller-supplied callback when files change. Downstream
5//! binaries register callbacks to drive whatever rebuild they need —
6//! kglite-mcp-server, for example, wires this to `code_tree::build()`
7//! against the watched directory and atomic-swaps the active graph.
8//!
9//! mcp-methods's binary on its own does not own a rebuild target;
10//! it logs change events at INFO level and forwards them to any
11//! registered callback. When no callback is set the watcher still
12//! runs, so the change events show up in stderr.
13//!
14//! ## Default skip patterns
15//!
16//! Events matching conventional noise paths ([`DEFAULT_SKIP_SUBSTRINGS`]
17//! and [`DEFAULT_SKIP_EXTENSIONS`]) are dropped before the callback
18//! runs — `.git/`, `target/`, `node_modules/`, `__pycache__/`, `*.pyc`,
19//! editor swap files, etc. A wide sandbox under active development
20//! generates hundreds of these per second; without the filter every
21//! consumer either rebuilds wastefully or implements the same skip
22//! list. With it, consumers see only events that could plausibly
23//! matter.
24//!
25//! Bindings that need everything (test fixtures, future consumers
26//! with a genuine reason to see every event) pass
27//! [`WatchConfig::unfiltered`] to [`watch_with_config`].
28
29#![allow(dead_code)]
30
31use std::path::{Path, PathBuf};
32use std::sync::Arc;
33use std::time::Duration;
34
35use anyhow::{Context, Result};
36use notify_debouncer_mini::notify::RecursiveMode;
37use notify_debouncer_mini::{new_debouncer, DebounceEventResult, Debouncer};
38
39/// Callback invoked on a debounced file-change event.
40///
41/// `paths` is the deduplicated set of paths reported as changed within
42/// the debounce window, **after** the active [`WatchConfig`]'s skip
43/// filter has run. The callback runs on a background thread; keep it
44/// non-blocking or push work onto a channel.
45pub type ChangeHandler = Arc<dyn Fn(&[PathBuf]) + Send + Sync>;
46
47/// Default debounce window — short enough to feel responsive, long
48/// enough to coalesce noisy editor saves and IDE temp-file dance.
49pub const DEFAULT_DEBOUNCE: Duration = Duration::from_millis(500);
50
51/// Default substrings to skip. A path containing any of these as a
52/// substring is dropped before the callback runs.
53///
54/// Conventional build / VCS / cache directories that no graph builder,
55/// search index, or rebuild target should care about. The substrings
56/// are anchored with `/` on both sides where appropriate so they don't
57/// false-match (e.g. `/.git/` matches `.../my-repo/.git/HEAD` but not
58/// a file literally named `.gitignore`).
59pub const DEFAULT_SKIP_SUBSTRINGS: &[&str] = &[
60 "/.git/", // git objects + index churn on any git operation
61 "/target/", // Cargo build artifacts (worst storm offender)
62 "/node_modules/", // npm/yarn install storms + cache writes
63 "/__pycache__/", // CPython bytecode dirs
64 "/.venv/", // Python venv internals
65 "/build/", // generic build outputs across many tools
66 "/dist/", // generic build/distribution outputs
67 "/.DS_Store", // macOS Finder metadata churn
68];
69
70/// Default file extensions to skip (without the leading dot).
71pub const DEFAULT_SKIP_EXTENSIONS: &[&str] = &[
72 "pyc", "pyo", // CPython bytecode files
73 "swp", "swo", // vim swap files
74 "tmp", // atomic-save temp files
75];
76
77/// Configuration for a [`watch_with_config`] call. Controls which
78/// events reach the callback.
79#[derive(Clone, Debug)]
80pub struct WatchConfig {
81 /// Substrings to skip. A path containing any of these (anywhere)
82 /// is dropped before the callback fires. Matching is
83 /// case-sensitive and allocation-free.
84 pub skip_substrings: Vec<String>,
85 /// File extensions (without leading dot) to skip. Matching uses
86 /// the path's last extension via [`Path::extension`] and is
87 /// case-sensitive.
88 pub skip_extensions: Vec<String>,
89}
90
91impl Default for WatchConfig {
92 /// The recommended default: skip [`DEFAULT_SKIP_SUBSTRINGS`] +
93 /// [`DEFAULT_SKIP_EXTENSIONS`]. Most consumers want this — see
94 /// [`unfiltered`](Self::unfiltered) for the escape hatch.
95 fn default() -> Self {
96 Self {
97 skip_substrings: DEFAULT_SKIP_SUBSTRINGS
98 .iter()
99 .map(|s| (*s).to_string())
100 .collect(),
101 skip_extensions: DEFAULT_SKIP_EXTENSIONS
102 .iter()
103 .map(|s| (*s).to_string())
104 .collect(),
105 }
106 }
107}
108
109impl WatchConfig {
110 /// Empty skip set — every event reaches the callback. Use when
111 /// you genuinely want raw FS events (test fixtures, log-every-
112 /// change diagnostic modes, or future consumers with a reason to
113 /// see `.git/objects/...` writes).
114 pub fn unfiltered() -> Self {
115 Self {
116 skip_substrings: Vec::new(),
117 skip_extensions: Vec::new(),
118 }
119 }
120
121 /// Test a path against the active skip set. `true` → skip; `false`
122 /// → forward to callback. Public so consumers building their own
123 /// orchestration over the same conventions can reuse the predicate
124 /// without re-deriving it.
125 pub fn is_skipped(&self, path: &Path) -> bool {
126 // Substring match against the full path. UTF-8 fallback is
127 // lossy: paths that aren't valid UTF-8 skip the substring
128 // check (we still run the extension check below). On the
129 // platforms we care about (macOS / Linux / Windows) this is
130 // never the hot path's bottleneck.
131 if let Some(s) = path.to_str() {
132 for needle in &self.skip_substrings {
133 if s.contains(needle.as_str()) {
134 return true;
135 }
136 }
137 }
138 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
139 for skip in &self.skip_extensions {
140 if ext == skip {
141 return true;
142 }
143 }
144 }
145 false
146 }
147}
148
149/// Apply a [`WatchConfig`]'s skip filter to a batch of event paths,
150/// keeping only those that should reach the callback. The debouncer
151/// drops the whole batch (no callback at all) when this returns empty —
152/// the pure-noise-storm case (`cargo build`'s `target/` churn, a `git`
153/// operation's `.git/` writes). Extracted as a free function so the
154/// retention decision is unit-testable without depending on a real
155/// watcher's platform-specific event-path semantics.
156fn retain_unskipped(
157 config: &WatchConfig,
158 paths: impl IntoIterator<Item = PathBuf>,
159) -> Vec<PathBuf> {
160 paths
161 .into_iter()
162 .filter(|p| !config.is_skipped(p))
163 .collect()
164}
165
166/// Active watcher handle. Drop to stop watching.
167pub struct WatchHandle {
168 _debouncer: Debouncer<notify_debouncer_mini::notify::RecommendedWatcher>,
169}
170
171/// Spawn a recursive debounced watcher on `dir` using the default
172/// [`WatchConfig`] (skips conventional noise paths — `.git/`,
173/// `target/`, `node_modules/`, etc.).
174///
175/// Returns a handle whose `Drop` impl tears the watcher down. Errors
176/// surface synchronously if the path is not a directory or the platform
177/// watcher refuses to register.
178///
179/// For control over the skip set, use [`watch_with_config`].
180pub fn watch(
181 dir: &Path,
182 on_change: Option<ChangeHandler>,
183 debounce: Option<Duration>,
184) -> Result<WatchHandle> {
185 watch_with_config(dir, on_change, debounce, WatchConfig::default())
186}
187
188/// Spawn a recursive debounced watcher with an explicit
189/// [`WatchConfig`]. Behaves like [`watch`] except the skip set is
190/// caller-controlled — pass [`WatchConfig::unfiltered`] to receive
191/// every event, or build a custom config to add / remove patterns.
192pub fn watch_with_config(
193 dir: &Path,
194 on_change: Option<ChangeHandler>,
195 debounce: Option<Duration>,
196 config: WatchConfig,
197) -> Result<WatchHandle> {
198 if !dir.is_dir() {
199 anyhow::bail!("--watch path is not a directory: {}", dir.display());
200 }
201 let debounce = debounce.unwrap_or(DEFAULT_DEBOUNCE);
202 let dir_for_log = dir.to_path_buf();
203 let on_change = on_change.unwrap_or_else(|| {
204 Arc::new(|_| {
205 // No-op callback when no downstream consumer is configured.
206 })
207 });
208
209 let mut debouncer = new_debouncer(debounce, move |result: DebounceEventResult| match result {
210 Ok(events) => {
211 // Drop skipped events before they're handed to the
212 // callback or counted in the log line. Empty post-filter
213 // batches (a pure-noise storm like `cargo build`'s
214 // `target/` churn) return without a callback invocation
215 // at all.
216 let paths = retain_unskipped(&config, events.into_iter().map(|e| e.path));
217 if paths.is_empty() {
218 return;
219 }
220 tracing::info!(
221 root = %dir_for_log.display(),
222 changed = paths.len(),
223 "watch: file change debounced"
224 );
225 on_change(&paths);
226 }
227 Err(e) => {
228 tracing::warn!(error = %e, "watch: error from notify");
229 }
230 })
231 .context("failed to construct file-system debouncer")?;
232
233 debouncer
234 .watcher()
235 .watch(dir, RecursiveMode::Recursive)
236 .with_context(|| format!("failed to watch {}", dir.display()))?;
237
238 tracing::info!(root = %dir.display(), debounce_ms = debounce.as_millis() as u64, "watch: active");
239 Ok(WatchHandle {
240 _debouncer: debouncer,
241 })
242}
243
244#[cfg(test)]
245mod tests {
246 use super::*;
247 use std::sync::atomic::{AtomicUsize, Ordering};
248
249 #[test]
250 fn watch_rejects_non_directory() {
251 let result = watch(Path::new("/this/does/not/exist"), None, None);
252 assert!(result.is_err());
253 }
254
255 #[test]
256 fn watch_starts_and_drops_clean() {
257 let dir = tempfile::tempdir().unwrap();
258 let _handle = watch(dir.path(), None, Some(Duration::from_millis(100))).unwrap();
259 // Drop at end of scope tears it down without panicking.
260 }
261
262 #[test]
263 fn callback_fires_on_file_change() {
264 use std::thread::sleep;
265 let dir = tempfile::tempdir().unwrap();
266 let counter = Arc::new(AtomicUsize::new(0));
267 let counter_for_cb = counter.clone();
268 let cb: ChangeHandler = Arc::new(move |_paths: &[PathBuf]| {
269 counter_for_cb.fetch_add(1, Ordering::SeqCst);
270 });
271 let _handle = watch(dir.path(), Some(cb), Some(Duration::from_millis(100))).unwrap();
272 sleep(Duration::from_millis(50)); // let watcher settle
273 std::fs::write(dir.path().join("a.txt"), "hi").unwrap();
274 sleep(Duration::from_millis(400)); // debounce + buffer
275 assert!(
276 counter.load(Ordering::SeqCst) >= 1,
277 "expected callback to fire at least once after file write"
278 );
279 }
280
281 // ── skip-pattern coverage ───────────────────────────────────────
282
283 #[test]
284 fn default_config_skips_git_dir() {
285 let cfg = WatchConfig::default();
286 assert!(cfg.is_skipped(Path::new("/repo/.git/HEAD")));
287 assert!(cfg.is_skipped(Path::new("/repo/.git/objects/ab/cdef")));
288 }
289
290 #[test]
291 fn default_config_skips_target_dir() {
292 let cfg = WatchConfig::default();
293 assert!(cfg.is_skipped(Path::new("/repo/target/debug/foo.rlib")));
294 assert!(cfg.is_skipped(Path::new("/repo/target/release/build/x.o")));
295 }
296
297 #[test]
298 fn default_config_skips_node_modules() {
299 let cfg = WatchConfig::default();
300 assert!(cfg.is_skipped(Path::new("/repo/node_modules/@scope/package/index.js")));
301 }
302
303 #[test]
304 fn default_config_skips_python_bytecode() {
305 let cfg = WatchConfig::default();
306 assert!(cfg.is_skipped(Path::new("/repo/pkg/__pycache__/m.cpython-312.pyc")));
307 assert!(cfg.is_skipped(Path::new("/repo/lib.pyc")));
308 }
309
310 #[test]
311 fn default_config_skips_editor_swap() {
312 let cfg = WatchConfig::default();
313 assert!(cfg.is_skipped(Path::new("/repo/src/main.rs.swp")));
314 assert!(cfg.is_skipped(Path::new("/repo/draft.tmp")));
315 }
316
317 #[test]
318 fn default_config_passes_source_files() {
319 let cfg = WatchConfig::default();
320 // Files with these patterns OUTSIDE the skip dirs should pass.
321 assert!(!cfg.is_skipped(Path::new("/repo/src/main.rs")));
322 assert!(!cfg.is_skipped(Path::new("/repo/lib.py")));
323 assert!(!cfg.is_skipped(Path::new("/repo/index.ts")));
324 // A literal `.gitignore` (not under `.git/`) should pass.
325 assert!(!cfg.is_skipped(Path::new("/repo/.gitignore")));
326 }
327
328 #[test]
329 fn unfiltered_config_skips_nothing() {
330 let cfg = WatchConfig::unfiltered();
331 assert!(!cfg.is_skipped(Path::new("/repo/.git/HEAD")));
332 assert!(!cfg.is_skipped(Path::new("/repo/target/foo.rlib")));
333 assert!(!cfg.is_skipped(Path::new("/repo/lib.pyc")));
334 }
335
336 #[test]
337 fn custom_config_round_trip() {
338 let cfg = WatchConfig {
339 skip_substrings: vec!["/secret/".to_string()],
340 skip_extensions: vec!["bak".to_string()],
341 };
342 assert!(cfg.is_skipped(Path::new("/repo/secret/key.txt")));
343 assert!(cfg.is_skipped(Path::new("/repo/file.bak")));
344 // Substrings from the default set are NOT in this config:
345 assert!(!cfg.is_skipped(Path::new("/repo/.git/HEAD")));
346 assert!(!cfg.is_skipped(Path::new("/repo/lib.pyc")));
347 }
348
349 #[test]
350 fn default_skip_substrings_are_anchored() {
351 let cfg = WatchConfig::default();
352 // `/target/` (not `target/`) so a file literally named `target`
353 // at the repo root doesn't false-match.
354 assert!(!cfg.is_skipped(Path::new("/repo/target")));
355 // But `/repo/target/...` does:
356 assert!(cfg.is_skipped(Path::new("/repo/target/foo")));
357 }
358
359 // The debouncer fires the callback iff `retain_unskipped` returns a
360 // non-empty batch. We test that retention decision directly rather
361 // than against a live watcher: a real-FS "noise-only batch" test is
362 // inherently flaky across platforms, because inotify (Linux) and
363 // FSEvents (macOS) report different event paths for the same writes
364 // (e.g. a write inside `target/` can surface a modify event on the
365 // bare `target` directory entry on Linux but not on macOS). The
366 // positive wiring is covered by `callback_fires_on_file_change`.
367
368 #[test]
369 fn noise_only_batch_retains_nothing() {
370 let cfg = WatchConfig::default();
371 // A pure `cargo build` / `git` storm — every path is noise.
372 let batch = vec![
373 PathBuf::from("/repo/target/debug/deps/a.rlib"),
374 PathBuf::from("/repo/target/release/build/x.o"),
375 PathBuf::from("/repo/.git/objects/ab/cdef"),
376 PathBuf::from("/repo/pkg/__pycache__/m.cpython-312.pyc"),
377 PathBuf::from("/repo/lib.pyc"),
378 ];
379 // Empty result → the debouncer returns without firing the callback.
380 assert!(retain_unskipped(&cfg, batch).is_empty());
381 }
382
383 #[test]
384 fn mixed_batch_retains_only_non_noise() {
385 let cfg = WatchConfig::default();
386 let batch = vec![
387 PathBuf::from("/repo/target/debug/deps/a.rlib"), // noise
388 PathBuf::from("/repo/src/main.rs"), // source — keep
389 PathBuf::from("/repo/.git/HEAD"), // noise
390 PathBuf::from("/repo/lib.py"), // source — keep
391 ];
392 let kept = retain_unskipped(&cfg, batch);
393 assert_eq!(
394 kept,
395 vec![
396 PathBuf::from("/repo/src/main.rs"),
397 PathBuf::from("/repo/lib.py"),
398 ]
399 );
400 }
401
402 #[test]
403 fn unfiltered_config_retains_everything() {
404 let cfg = WatchConfig::unfiltered();
405 let batch = vec![
406 PathBuf::from("/repo/target/debug/a.rlib"),
407 PathBuf::from("/repo/.git/HEAD"),
408 ];
409 // Nothing is dropped → the callback sees the raw batch.
410 assert_eq!(retain_unskipped(&cfg, batch.clone()), batch);
411 }
412}