Skip to main content

codemem_engine/watch/
mod.rs

1//! Watch module: Real-time file watcher for Codemem.
2//!
3//! Uses `notify` with debouncing to detect file changes and trigger re-indexing.
4//! Respects `.gitignore` and common ignore patterns.
5
6use crossbeam_channel::Receiver;
7use ignore::gitignore::{Gitignore, GitignoreBuilder};
8use notify_debouncer_mini::new_debouncer;
9use std::collections::HashSet;
10use std::path::{Path, PathBuf};
11use std::sync::Arc;
12use std::time::Duration;
13
14/// Events emitted by the file watcher.
15#[derive(Debug, Clone)]
16pub enum WatchEvent {
17    FileChanged(PathBuf),
18    FileCreated(PathBuf),
19    FileDeleted(PathBuf),
20}
21
22/// Default ignore directory names.
23const IGNORE_DIRS: &[&str] = &[
24    "node_modules",
25    "target",
26    ".git",
27    "__pycache__",
28    ".venv",
29    "venv",
30    ".mypy_cache",
31    ".pytest_cache",
32    "dist",
33    "build",
34    ".next",
35    "vendor",
36    ".cargo",
37];
38
39/// Watchable file extensions (code files).
40const WATCHABLE_EXTENSIONS: &[&str] = &[
41    "rs", "ts", "tsx", "js", "jsx", "py", "go", "c", "cpp", "cc", "cxx", "h", "hpp", "java", "rb",
42    "cs", "kt", "kts", "swift", "php", "scala", "sc", "tf", "hcl", "tfvars", "toml", "json",
43    "yaml", "yml",
44];
45
46/// Check if a file extension is watchable.
47pub fn is_watchable(path: &Path) -> bool {
48    path.extension()
49        .and_then(|ext| ext.to_str())
50        .map(|ext| WATCHABLE_EXTENSIONS.contains(&ext))
51        .unwrap_or(false)
52}
53
54/// Check if a path should be ignored.
55///
56/// Uses the provided `Gitignore` matcher first (checking the full path and
57/// each ancestor directory), then falls back to the hardcoded `IGNORE_DIRS`
58/// list for paths not covered by `.gitignore`.
59///
60/// `is_dir` indicates whether the path is a directory. Pass `false` for file
61/// events from the watcher to avoid a redundant `stat` syscall per event.
62pub fn should_ignore(path: &Path, gitignore: Option<&Gitignore>, is_dir: bool) -> bool {
63    if let Some(gi) = gitignore {
64        // Check the file itself
65        if gi.matched(path, is_dir).is_ignore() {
66            return true;
67        }
68        // Check each ancestor directory against the gitignore.
69        // NOTE: This traverses all the way to the filesystem root. In practice
70        // this is harmless because gitignore patterns only match relative to the
71        // gitignore root, but a future improvement could accept the project root
72        // and stop traversal there.
73        let mut current = path.to_path_buf();
74        while current.pop() {
75            if gi.matched(&current, true).is_ignore() {
76                return true;
77            }
78        }
79    }
80    // Fallback to hardcoded dirs
81    for component in path.components() {
82        if let std::path::Component::Normal(name) = component {
83            if let Some(name_str) = name.to_str() {
84                if IGNORE_DIRS.contains(&name_str) {
85                    return true;
86                }
87            }
88        }
89    }
90    false
91}
92
93/// Build a `Gitignore` matcher from a project root.
94///
95/// Reads `.gitignore` if present, and also adds the hardcoded `IGNORE_DIRS`
96/// as fallback patterns.
97pub fn build_gitignore(root: &Path) -> Option<Gitignore> {
98    let mut builder = GitignoreBuilder::new(root);
99    // Add .gitignore if it exists
100    if let Some(err) = builder.add(root.join(".gitignore")) {
101        tracing::debug!("No .gitignore found: {err}");
102    }
103    // Add fallback patterns (use glob-style to match as directories anywhere)
104    for dir in IGNORE_DIRS {
105        let _ = builder.add_line(None, &format!("{dir}/"));
106    }
107    builder.build().ok()
108}
109
110/// Detect programming language from file extension.
111pub fn detect_language(path: &Path) -> Option<&'static str> {
112    path.extension()
113        .and_then(|ext| ext.to_str())
114        .and_then(|ext| match ext {
115            "rs" => Some("rust"),
116            "ts" | "tsx" => Some("typescript"),
117            "js" | "jsx" => Some("javascript"),
118            "py" => Some("python"),
119            "go" => Some("go"),
120            "c" | "h" => Some("c"),
121            "cpp" | "cc" | "cxx" | "hpp" => Some("cpp"),
122            "java" => Some("java"),
123            "rb" => Some("ruby"),
124            "cs" => Some("csharp"),
125            "kt" | "kts" => Some("kotlin"),
126            "swift" => Some("swift"),
127            "php" => Some("php"),
128            "scala" | "sc" => Some("scala"),
129            "tf" | "hcl" | "tfvars" => Some("hcl"),
130            _ => None,
131        })
132}
133
134/// File watcher that monitors a directory for changes with 50ms debouncing.
135pub struct FileWatcher {
136    _debouncer: notify_debouncer_mini::Debouncer<notify::RecommendedWatcher>,
137    receiver: Receiver<WatchEvent>,
138    #[allow(dead_code)]
139    gitignore: Arc<Option<Gitignore>>,
140}
141
142impl FileWatcher {
143    /// Create a new file watcher for the given root directory.
144    pub fn new(root: &Path) -> Result<Self, codemem_core::CodememError> {
145        let (tx, rx) = crossbeam_channel::unbounded::<WatchEvent>();
146        let event_tx = tx;
147
148        let gitignore = Arc::new(build_gitignore(root));
149        let gi_clone = Arc::clone(&gitignore);
150
151        // Track files we've already seen so we can distinguish create vs modify.
152        let known_files = std::sync::Mutex::new(HashSet::<PathBuf>::new());
153
154        let mut debouncer = new_debouncer(
155            Duration::from_millis(50),
156            move |res: Result<Vec<notify_debouncer_mini::DebouncedEvent>, notify::Error>| match res
157            {
158                Ok(events) => {
159                    let mut seen = HashSet::new();
160                    for event in events {
161                        let path = event.path;
162                        if !seen.insert(path.clone()) {
163                            continue;
164                        }
165                        // Watcher events are always files, so pass is_dir=false
166                        // to avoid a stat syscall per event.
167                        if should_ignore(&path, gi_clone.as_ref().as_ref(), false)
168                            || !is_watchable(&path)
169                        {
170                            continue;
171                        }
172                        // Determine event type from filesystem state + known-files
173                        // set rather than the debouncer event kind, which varies
174                        // across platforms (FSEvents on macOS vs inotify on Linux).
175                        let watch_event = if path.exists() {
176                            if let Ok(mut known) = known_files.lock() {
177                                // Prevent unbounded growth: clear when exceeding 50K entries.
178                                // After clearing, all subsequent files will appear as "created"
179                                // until the set repopulates, which is acceptable.
180                                if known.len() > 50_000 {
181                                    known.clear();
182                                }
183                                if known.insert(path.clone()) {
184                                    WatchEvent::FileCreated(path)
185                                } else {
186                                    WatchEvent::FileChanged(path)
187                                }
188                            } else {
189                                WatchEvent::FileChanged(path)
190                            }
191                        } else {
192                            if let Ok(mut known) = known_files.lock() {
193                                known.remove(&path);
194                            }
195                            WatchEvent::FileDeleted(path)
196                        };
197                        let _ = event_tx.send(watch_event);
198                    }
199                }
200                Err(e) => {
201                    tracing::error!("Watch error: {e}");
202                }
203            },
204        )
205        .map_err(|e| {
206            codemem_core::CodememError::Io(std::io::Error::other(format!(
207                "Failed to create debouncer: {e}"
208            )))
209        })?;
210
211        debouncer
212            .watcher()
213            .watch(root, notify::RecursiveMode::Recursive)
214            .map_err(|e| {
215                codemem_core::CodememError::Io(std::io::Error::other(format!(
216                    "Failed to watch {}: {e}",
217                    root.display()
218                )))
219            })?;
220
221        tracing::info!("Watching {} for changes", root.display());
222
223        Ok(Self {
224            _debouncer: debouncer,
225            receiver: rx,
226            gitignore,
227        })
228    }
229
230    /// Get the receiver for watch events.
231    pub fn receiver(&self) -> &Receiver<WatchEvent> {
232        &self.receiver
233    }
234}
235
236#[cfg(test)]
237#[path = "tests/lib_tests.rs"]
238mod tests;