Skip to main content

codesearch/watch/
mod.rs

1use anyhow::{anyhow, Result};
2use notify::{RecommendedWatcher, RecursiveMode, Watcher};
3use notify_debouncer_full::{new_debouncer, DebounceEventResult, Debouncer, FileIdMap};
4use std::collections::HashSet;
5use std::path::{Path, PathBuf};
6use std::sync::mpsc::{channel, Receiver};
7use std::time::Duration;
8
9use crate::cache::normalize_path;
10
11/// Normalize a path from notify events to a consistent format.
12/// Strips UNC prefix (`\\?\`) and converts backslashes to forward slashes
13/// so paths match the format used by FileMetaStore and VectorStore.
14fn normalize_event_path(path: &Path) -> PathBuf {
15    PathBuf::from(normalize_path(path))
16}
17
18/// File extensions that should trigger re-indexing (whitelist approach)
19/// This includes code files and configuration files
20const INDEXABLE_EXTENSIONS: &[&str] = &[
21    // Rust
22    "rs",
23    // JavaScript/TypeScript
24    "js",
25    "mjs",
26    "cjs",
27    "jsx",
28    "ts",
29    "mts",
30    "cts",
31    "tsx",
32    // Python
33    "py",
34    "pyw",
35    "pyi",
36    // C/C++
37    "c",
38    "h",
39    "cpp",
40    "cc",
41    "cxx",
42    "hpp",
43    "hxx",
44    // C#
45    "cs",
46    "csx",
47    // Java/Kotlin
48    "java",
49    "kt",
50    "kts",
51    // Go
52    "go",
53    // Ruby
54    "rb",
55    "rake",
56    // PHP
57    "php",
58    // Swift
59    "swift",
60    // Shell/Scripts
61    "sh",
62    "bash",
63    "zsh",
64    "fish",
65    "ps1",
66    "psm1",
67    "psd1",
68    // Web
69    "html",
70    "htm",
71    "css",
72    "scss",
73    "sass",
74    "less",
75    "vue",
76    "svelte",
77    // Config/Data
78    "json",
79    "jsonc",
80    "json5",
81    "yaml",
82    "yml",
83    "toml",
84    "xml",
85    "ini",
86    "conf",
87    "config",
88    // .NET
89    "csproj",
90    "sln",
91    "props",
92    "targets",
93    "razor",
94    "cshtml",
95    // SQL
96    "sql",
97    // Markdown/Docs
98    "md",
99    "markdown",
100    "rst",
101    // Other
102    "graphql",
103    "gql",
104    "proto",
105    "dockerfile",
106];
107
108/// Directories that should always be ignored
109const IGNORED_DIRS: &[&str] = &[
110    ".git",
111    ".codesearch.db",
112    "node_modules",
113    "target",
114    ".venv",
115    "venv",
116    "__pycache__",
117    ".cache",
118    "dist",
119    "build",
120    "out",
121    "bin",
122    "obj",
123    ".vs",
124    ".idea",
125    ".vscode",
126    "packages",
127    ".nuget",
128];
129
130/// Types of file system events we care about
131#[derive(Debug, Clone, PartialEq, Eq)]
132#[allow(dead_code)] // Renamed variant reserved for future rename detection
133pub enum FileEvent {
134    /// File was created or modified
135    Modified(PathBuf),
136    /// File was deleted
137    Deleted(PathBuf),
138    /// File was renamed (from, to)
139    Renamed(PathBuf, PathBuf),
140}
141
142/// File watcher for incremental indexing
143///
144/// Uses notify-debouncer-full for efficient debounced file watching.
145/// Improvements over osgrep:
146/// 1. Native Rust implementation (faster than Node.js chokidar)
147/// 2. Built-in debouncing (configurable)
148/// 3. Batched events for efficient processing
149pub struct FileWatcher {
150    root: PathBuf,
151    debouncer: Option<Debouncer<RecommendedWatcher, FileIdMap>>,
152    receiver: Option<Receiver<DebounceEventResult>>,
153}
154
155impl FileWatcher {
156    /// Create a new file watcher for the given root directory
157    pub fn new(root: PathBuf) -> Self {
158        Self {
159            root,
160            debouncer: None,
161            receiver: None,
162        }
163    }
164
165    /// Start watching for file changes
166    pub fn start(&mut self, debounce_ms: u64) -> Result<()> {
167        let (tx, rx) = channel();
168
169        let debouncer = new_debouncer(
170            Duration::from_millis(debounce_ms),
171            None, // No tick rate
172            tx,
173        )
174        .map_err(|e| anyhow!("Failed to create file watcher: {}", e))?;
175
176        self.receiver = Some(rx);
177        self.debouncer = Some(debouncer);
178
179        // Start watching the root directory
180        if let Some(ref mut debouncer) = self.debouncer {
181            debouncer
182                .watcher()
183                .watch(&self.root, RecursiveMode::Recursive)
184                .map_err(|e| anyhow!("Failed to watch directory: {}", e))?;
185
186            // Also watch with the cache (for file ID tracking)
187            debouncer
188                .cache()
189                .add_root(&self.root, RecursiveMode::Recursive);
190        }
191
192        Ok(())
193    }
194
195    /// Check if the watcher is currently started (collecting events)
196    pub fn is_started(&self) -> bool {
197        self.debouncer.is_some()
198    }
199
200    /// Stop watching
201    pub fn stop(&mut self) {
202        if let Some(ref mut debouncer) = self.debouncer {
203            let _ = debouncer.watcher().unwatch(&self.root);
204        }
205        self.debouncer = None;
206        self.receiver = None;
207    }
208
209    /// Check if a path is in an ignored directory (.git, node_modules, etc.)
210    fn is_in_ignored_dir(&self, path: &Path) -> bool {
211        for component in path.components() {
212            if let Some(name) = component.as_os_str().to_str() {
213                if IGNORED_DIRS.contains(&name) {
214                    return true;
215                }
216            }
217        }
218        false
219    }
220
221    /// Check if a path should be watched (whitelist approach)
222    /// Only returns true for indexable code/config files
223    fn is_watchable(&self, path: &Path) -> bool {
224        // Check if path is in an ignored directory
225        if self.is_in_ignored_dir(path) {
226            return false;
227        }
228
229        // Must be a file with an indexable extension
230        if let Some(ext) = path.extension() {
231            if let Some(ext_str) = ext.to_str() {
232                return INDEXABLE_EXTENSIONS.contains(&ext_str.to_lowercase().as_str());
233            }
234        }
235
236        // Special case: Dockerfile (no extension)
237        if let Some(name) = path.file_name() {
238            let name_str = name.to_string_lossy().to_lowercase();
239            if name_str == "dockerfile" || name_str == "makefile" || name_str == "cmakelists.txt" {
240                return true;
241            }
242        }
243
244        false
245    }
246
247    /// Poll for file events (non-blocking)
248    /// Returns a batch of deduplicated events
249    pub fn poll_events(&self) -> Vec<FileEvent> {
250        let Some(ref receiver) = self.receiver else {
251            return vec![];
252        };
253
254        let mut events = Vec::new();
255        let mut seen_paths = HashSet::new();
256
257        // Drain all available events
258        while let Ok(result) = receiver.try_recv() {
259            match result {
260                Ok(debounced_events) => {
261                    for event in debounced_events {
262                        for raw_path in &event.paths {
263                            // Normalize path: strip UNC prefix, convert backslashes
264                            let path = normalize_event_path(raw_path);
265
266                            // Skip ignored directories
267                            if self.is_in_ignored_dir(&path) || seen_paths.contains(&path) {
268                                continue;
269                            }
270                            seen_paths.insert(path.clone());
271
272                            // Convert to our event type
273                            use notify::EventKind;
274                            match event.kind {
275                                EventKind::Create(_) | EventKind::Modify(_) => {
276                                    // For creates/modifies, only process indexable files
277                                    if self.is_watchable(&path) && raw_path.exists() {
278                                        events.push(FileEvent::Modified(path));
279                                    }
280                                }
281                                EventKind::Remove(_) => {
282                                    // For removals, don't filter by extension - directory
283                                    // deletions on Windows may only report the directory
284                                    // path (no file extension), not individual files
285                                    events.push(FileEvent::Deleted(path));
286                                }
287                                _ => {}
288                            }
289                        }
290                    }
291                }
292                Err(errors) => {
293                    for error in errors {
294                        tracing::warn!("File watch error: {:?}", error);
295                    }
296                }
297            }
298        }
299
300        events
301    }
302
303    /// Block and wait for events (with timeout)
304    pub fn wait_for_events(&self, timeout: Duration) -> Vec<FileEvent> {
305        let Some(ref receiver) = self.receiver else {
306            return vec![];
307        };
308
309        let mut events = Vec::new();
310        let mut seen_paths = HashSet::new();
311
312        // Wait for first event
313        match receiver.recv_timeout(timeout) {
314            Ok(result) => {
315                self.process_debounce_result(result, &mut events, &mut seen_paths);
316            }
317            Err(_) => return events, // Timeout or disconnected
318        }
319
320        // Drain any additional events that came in
321        while let Ok(result) = receiver.try_recv() {
322            self.process_debounce_result(result, &mut events, &mut seen_paths);
323        }
324
325        events
326    }
327
328    fn process_debounce_result(
329        &self,
330        result: DebounceEventResult,
331        events: &mut Vec<FileEvent>,
332        seen_paths: &mut HashSet<PathBuf>,
333    ) {
334        match result {
335            Ok(debounced_events) => {
336                for event in debounced_events {
337                    for raw_path in &event.paths {
338                        // Normalize path: strip UNC prefix, convert backslashes
339                        let path = normalize_event_path(raw_path);
340
341                        // Skip ignored directories and duplicates
342                        if self.is_in_ignored_dir(&path) || seen_paths.contains(&path) {
343                            continue;
344                        }
345                        seen_paths.insert(path.clone());
346
347                        use notify::EventKind;
348                        match event.kind {
349                            EventKind::Create(_) | EventKind::Modify(_) => {
350                                // For creates/modifies, only process indexable files
351                                if self.is_watchable(&path) && raw_path.exists() {
352                                    events.push(FileEvent::Modified(path));
353                                }
354                            }
355                            EventKind::Remove(_) => {
356                                // For removals, don't filter by extension - directory
357                                // deletions on Windows may only report the directory
358                                // path (no file extension), not individual files
359                                events.push(FileEvent::Deleted(path));
360                            }
361                            _ => {}
362                        }
363                    }
364                }
365            }
366            Err(errors) => {
367                for error in errors {
368                    tracing::warn!("File watch error: {:?}", error);
369                }
370            }
371        }
372    }
373}
374
375impl Drop for FileWatcher {
376    fn drop(&mut self) {
377        self.stop();
378    }
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384    use std::fs;
385    use tempfile::tempdir;
386
387    #[test]
388    fn test_is_watchable() {
389        let watcher = FileWatcher::new(PathBuf::from("/tmp"));
390
391        // Should NOT watch (ignored dirs)
392        assert!(!watcher.is_watchable(Path::new("/tmp/.git/config")));
393        assert!(!watcher.is_watchable(Path::new("/tmp/node_modules/foo/index.js")));
394        assert!(!watcher.is_watchable(Path::new("/tmp/target/debug/main")));
395        assert!(!watcher.is_watchable(Path::new("/tmp/.codesearch.db/data")));
396
397        // Should NOT watch (non-indexable extensions)
398        assert!(!watcher.is_watchable(Path::new("/tmp/Cargo.lock")));
399        assert!(!watcher.is_watchable(Path::new("/tmp/debug.log")));
400        assert!(!watcher.is_watchable(Path::new("/tmp/image.png")));
401        assert!(!watcher.is_watchable(Path::new("/tmp/data.bin")));
402
403        // SHOULD watch (code files)
404        assert!(watcher.is_watchable(Path::new("/tmp/src/main.rs")));
405        assert!(watcher.is_watchable(Path::new("/tmp/src/lib.ts")));
406        assert!(watcher.is_watchable(Path::new("/tmp/Program.cs")));
407        assert!(watcher.is_watchable(Path::new("/tmp/app.py")));
408
409        // SHOULD watch (config files)
410        assert!(watcher.is_watchable(Path::new("/tmp/config.json")));
411        assert!(watcher.is_watchable(Path::new("/tmp/settings.yaml")));
412        assert!(watcher.is_watchable(Path::new("/tmp/Cargo.toml")));
413        assert!(watcher.is_watchable(Path::new("/tmp/appsettings.xml")));
414
415        // SHOULD watch (special files)
416        assert!(watcher.is_watchable(Path::new("/tmp/Dockerfile")));
417        assert!(watcher.is_watchable(Path::new("/tmp/Makefile")));
418    }
419
420    #[test]
421    #[ignore] // Requires actual filesystem events
422    fn test_file_watcher() {
423        let dir = tempdir().unwrap();
424        let mut watcher = FileWatcher::new(dir.path().to_path_buf());
425
426        watcher.start(100).unwrap();
427
428        // Create a file
429        let test_file = dir.path().join("test.rs");
430        fs::write(&test_file, "fn main() {}").unwrap();
431
432        // Wait for events
433        std::thread::sleep(Duration::from_millis(200));
434        let events = watcher.poll_events();
435
436        assert!(!events.is_empty());
437    }
438}