Skip to main content

codemem_watch/
lib.rs

1//! codemem-watch: Real-time file watcher for Codemem.
2//!
3//! Uses `notify` with debouncing to detect file changes and trigger re-indexing.
4//! Respects `.gitignore` and common ignore patterns.
5
6use crossbeam_channel::Receiver;
7use ignore::gitignore::{Gitignore, GitignoreBuilder};
8use notify_debouncer_mini::{new_debouncer, DebouncedEventKind};
9use std::collections::HashSet;
10use std::path::{Path, PathBuf};
11use std::sync::Arc;
12use std::time::Duration;
13
14/// Events emitted by the file watcher.
15#[derive(Debug, Clone)]
16pub enum WatchEvent {
17    FileChanged(PathBuf),
18    FileCreated(PathBuf),
19    FileDeleted(PathBuf),
20}
21
22/// Default ignore directory names.
23const IGNORE_DIRS: &[&str] = &[
24    "node_modules",
25    "target",
26    ".git",
27    "__pycache__",
28    ".venv",
29    "venv",
30    ".mypy_cache",
31    ".pytest_cache",
32    "dist",
33    "build",
34    ".next",
35    "vendor",
36    ".cargo",
37];
38
39/// Watchable file extensions (code files).
40const WATCHABLE_EXTENSIONS: &[&str] = &[
41    "rs", "ts", "tsx", "js", "jsx", "py", "go", "c", "cpp", "cc", "cxx", "h", "hpp", "java", "rb",
42    "cs", "kt", "kts", "swift", "php", "scala", "sc", "tf", "hcl", "tfvars", "toml", "json",
43    "yaml", "yml",
44];
45
46/// Check if a file extension is watchable.
47pub fn is_watchable(path: &Path) -> bool {
48    path.extension()
49        .and_then(|ext| ext.to_str())
50        .map(|ext| WATCHABLE_EXTENSIONS.contains(&ext))
51        .unwrap_or(false)
52}
53
54/// Check if a path should be ignored.
55///
56/// Uses the provided `Gitignore` matcher first (checking the full path and
57/// each ancestor directory), then falls back to the hardcoded `IGNORE_DIRS`
58/// list for paths not covered by `.gitignore`.
59pub fn should_ignore(path: &Path, gitignore: Option<&Gitignore>) -> bool {
60    if let Some(gi) = gitignore {
61        // Check the file itself
62        if gi.matched(path, path.is_dir()).is_ignore() {
63            return true;
64        }
65        // Check each ancestor directory against the gitignore
66        let mut current = path.to_path_buf();
67        while current.pop() {
68            if gi.matched(&current, true).is_ignore() {
69                return true;
70            }
71        }
72    }
73    // Fallback to hardcoded dirs
74    for component in path.components() {
75        if let std::path::Component::Normal(name) = component {
76            if let Some(name_str) = name.to_str() {
77                if IGNORE_DIRS.contains(&name_str) {
78                    return true;
79                }
80            }
81        }
82    }
83    false
84}
85
86/// Build a `Gitignore` matcher from a project root.
87///
88/// Reads `.gitignore` if present, and also adds the hardcoded `IGNORE_DIRS`
89/// as fallback patterns.
90pub fn build_gitignore(root: &Path) -> Option<Gitignore> {
91    let mut builder = GitignoreBuilder::new(root);
92    // Add .gitignore if it exists
93    if let Some(err) = builder.add(root.join(".gitignore")) {
94        tracing::debug!("No .gitignore found: {err}");
95    }
96    // Add fallback patterns (use glob-style to match as directories anywhere)
97    for dir in IGNORE_DIRS {
98        let _ = builder.add_line(None, &format!("{dir}/"));
99    }
100    builder.build().ok()
101}
102
103/// Detect programming language from file extension.
104pub fn detect_language(path: &Path) -> Option<&'static str> {
105    path.extension()
106        .and_then(|ext| ext.to_str())
107        .and_then(|ext| match ext {
108            "rs" => Some("rust"),
109            "ts" | "tsx" => Some("typescript"),
110            "js" | "jsx" => Some("javascript"),
111            "py" => Some("python"),
112            "go" => Some("go"),
113            "c" | "h" => Some("c"),
114            "cpp" | "cc" | "cxx" | "hpp" => Some("cpp"),
115            "java" => Some("java"),
116            "rb" => Some("ruby"),
117            "cs" => Some("csharp"),
118            "kt" | "kts" => Some("kotlin"),
119            "swift" => Some("swift"),
120            "php" => Some("php"),
121            "scala" | "sc" => Some("scala"),
122            "tf" | "hcl" | "tfvars" => Some("hcl"),
123            _ => None,
124        })
125}
126
127/// File watcher that monitors a directory for changes with 50ms debouncing.
128pub struct FileWatcher {
129    _debouncer: notify_debouncer_mini::Debouncer<notify::RecommendedWatcher>,
130    receiver: Receiver<WatchEvent>,
131    #[allow(dead_code)]
132    gitignore: Arc<Option<Gitignore>>,
133}
134
135impl FileWatcher {
136    /// Create a new file watcher for the given root directory.
137    pub fn new(root: &Path) -> Result<Self, codemem_core::CodememError> {
138        let (tx, rx) = crossbeam_channel::unbounded::<WatchEvent>();
139        let event_tx = tx;
140
141        let gitignore = Arc::new(build_gitignore(root));
142        let gi_clone = Arc::clone(&gitignore);
143
144        // Track files we've already seen so we can distinguish create vs modify.
145        let known_files = std::sync::Mutex::new(HashSet::<PathBuf>::new());
146
147        let mut debouncer = new_debouncer(
148            Duration::from_millis(50),
149            move |res: Result<Vec<notify_debouncer_mini::DebouncedEvent>, notify::Error>| match res
150            {
151                Ok(events) => {
152                    let mut seen = HashSet::new();
153                    for event in events {
154                        let path = event.path;
155                        if !seen.insert(path.clone()) {
156                            continue;
157                        }
158                        if should_ignore(&path, gi_clone.as_ref().as_ref()) || !is_watchable(&path)
159                        {
160                            continue;
161                        }
162                        let watch_event = match event.kind {
163                            DebouncedEventKind::Any => {
164                                if path.exists() {
165                                    if let Ok(mut known) = known_files.lock() {
166                                        if known.insert(path.clone()) {
167                                            WatchEvent::FileCreated(path)
168                                        } else {
169                                            WatchEvent::FileChanged(path)
170                                        }
171                                    } else {
172                                        WatchEvent::FileChanged(path)
173                                    }
174                                } else {
175                                    if let Ok(mut known) = known_files.lock() {
176                                        known.remove(&path);
177                                    }
178                                    WatchEvent::FileDeleted(path)
179                                }
180                            }
181                            DebouncedEventKind::AnyContinuous => WatchEvent::FileChanged(path),
182                            _ => WatchEvent::FileChanged(path),
183                        };
184                        let _ = event_tx.send(watch_event);
185                    }
186                }
187                Err(e) => {
188                    tracing::error!("Watch error: {e}");
189                }
190            },
191        )
192        .map_err(|e| {
193            codemem_core::CodememError::Io(std::io::Error::other(format!(
194                "Failed to create debouncer: {e}"
195            )))
196        })?;
197
198        debouncer
199            .watcher()
200            .watch(root, notify::RecursiveMode::Recursive)
201            .map_err(|e| {
202                codemem_core::CodememError::Io(std::io::Error::other(format!(
203                    "Failed to watch {}: {e}",
204                    root.display()
205                )))
206            })?;
207
208        tracing::info!("Watching {} for changes", root.display());
209
210        Ok(Self {
211            _debouncer: debouncer,
212            receiver: rx,
213            gitignore,
214        })
215    }
216
217    /// Get the receiver for watch events.
218    pub fn receiver(&self) -> &Receiver<WatchEvent> {
219        &self.receiver
220    }
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226
227    #[test]
228    fn test_is_watchable() {
229        assert!(is_watchable(Path::new("src/main.rs")));
230        assert!(is_watchable(Path::new("index.ts")));
231        assert!(is_watchable(Path::new("app.py")));
232        assert!(is_watchable(Path::new("main.go")));
233        assert!(!is_watchable(Path::new("image.png")));
234        assert!(!is_watchable(Path::new("binary.exe")));
235    }
236
237    #[test]
238    fn test_should_ignore_without_gitignore() {
239        assert!(should_ignore(
240            Path::new("project/node_modules/foo/bar.js"),
241            None
242        ));
243        assert!(should_ignore(
244            Path::new("project/target/debug/build.rs"),
245            None
246        ));
247        assert!(should_ignore(Path::new(".git/config"), None));
248        assert!(!should_ignore(Path::new("src/main.rs"), None));
249        assert!(!should_ignore(Path::new("lib/utils.ts"), None));
250    }
251
252    #[test]
253    fn test_should_ignore_with_gitignore() {
254        let dir = tempfile::tempdir().unwrap();
255        let gitignore_path = dir.path().join(".gitignore");
256        std::fs::write(&gitignore_path, "*.log\nsecrets/\n").unwrap();
257
258        let gi = build_gitignore(dir.path()).unwrap();
259
260        // Matches .gitignore pattern (*.log)
261        assert!(should_ignore(&dir.path().join("debug.log"), Some(&gi)));
262
263        // Matches .gitignore pattern (secrets/) -- also caught by hardcoded fallback check
264        // The gitignore matcher matches against paths under the root.
265        // For directory patterns, the path must be checked as a directory.
266        assert!(should_ignore(
267            &dir.path().join("secrets/key.txt"),
268            Some(&gi)
269        ));
270
271        // Matches hardcoded IGNORE_DIRS via the fallback component check
272        assert!(should_ignore(
273            &dir.path().join("node_modules/foo.js"),
274            Some(&gi)
275        ));
276
277        // Not ignored
278        assert!(!should_ignore(&dir.path().join("src/main.rs"), Some(&gi)));
279    }
280
281    #[test]
282    fn test_build_gitignore_without_file() {
283        let dir = tempfile::tempdir().unwrap();
284        // No .gitignore file exists
285        let gi = build_gitignore(dir.path());
286        // Should still return Some since we add fallback patterns
287        assert!(gi.is_some());
288
289        // Hardcoded dirs are still caught by the should_ignore fallback
290        let gi = gi.unwrap();
291        assert!(should_ignore(
292            &dir.path().join("node_modules/foo.js"),
293            Some(&gi)
294        ));
295    }
296
297    #[test]
298    fn test_detect_language() {
299        assert_eq!(detect_language(Path::new("main.rs")), Some("rust"));
300        assert_eq!(detect_language(Path::new("app.tsx")), Some("typescript"));
301        assert_eq!(detect_language(Path::new("script.py")), Some("python"));
302        assert_eq!(detect_language(Path::new("main.go")), Some("go"));
303        assert_eq!(detect_language(Path::new("readme.md")), None);
304    }
305
306    #[test]
307    fn test_new_file_emits_file_created() {
308        let dir = tempfile::tempdir().unwrap();
309        let watcher = FileWatcher::new(dir.path()).unwrap();
310        let rx = watcher.receiver();
311
312        // Create a new watchable file after the watcher starts
313        let file_path = dir.path().join("hello.rs");
314        std::fs::write(&file_path, "fn main() {}").unwrap();
315
316        // Wait for the debounced event (50ms debounce + margin)
317        let event = rx.recv_timeout(Duration::from_secs(2));
318        assert!(event.is_ok(), "expected a watch event for new file");
319        assert!(
320            matches!(event.unwrap(), WatchEvent::FileCreated(_)),
321            "new file should emit FileCreated"
322        );
323    }
324
325    #[test]
326    fn test_modified_file_emits_file_changed() {
327        let dir = tempfile::tempdir().unwrap();
328
329        // Create file before the watcher starts so it's not "new"
330        let file_path = dir.path().join("existing.rs");
331        std::fs::write(&file_path, "fn main() {}").unwrap();
332
333        let watcher = FileWatcher::new(dir.path()).unwrap();
334        let rx = watcher.receiver();
335
336        // First touch: watcher hasn't seen it yet, so this is FileCreated
337        std::fs::write(&file_path, "fn main() { println!(); }").unwrap();
338        let first = rx.recv_timeout(Duration::from_secs(2));
339        assert!(first.is_ok(), "expected event for first write");
340        assert!(matches!(first.unwrap(), WatchEvent::FileCreated(_)));
341
342        // Second touch: watcher has now seen it, so this should be FileChanged
343        std::thread::sleep(Duration::from_millis(100));
344        std::fs::write(&file_path, "fn main() { eprintln!(); }").unwrap();
345        let second = rx.recv_timeout(Duration::from_secs(2));
346        assert!(second.is_ok(), "expected event for second write");
347        assert!(
348            matches!(second.unwrap(), WatchEvent::FileChanged(_)),
349            "subsequent modification should emit FileChanged"
350        );
351    }
352}