Skip to main content

ygrep_core/watcher/
mod.rs

1//! File system watcher for incremental index updates
2
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5use std::time::Duration;
6
7use notify_debouncer_full::{new_debouncer, notify::RecursiveMode, DebounceEventResult};
8use parking_lot::Mutex;
9use tokio::sync::mpsc;
10
11use crate::config::IndexerConfig;
12use crate::error::{Result, YgrepError};
13
14/// Events emitted by the file watcher
15#[derive(Debug, Clone)]
16pub enum WatchEvent {
17    /// File was created or modified
18    Changed(PathBuf),
19    /// File was deleted
20    Deleted(PathBuf),
21    /// Directory was created
22    DirCreated(PathBuf),
23    /// Directory was deleted
24    DirDeleted(PathBuf),
25    /// Error occurred while watching
26    Error(String),
27}
28
29type PlatformDebouncer = notify_debouncer_full::Debouncer<
30    notify_debouncer_full::notify::RecommendedWatcher,
31    notify_debouncer_full::RecommendedCache,
32>;
33
34/// File system watcher with debouncing
35pub struct FileWatcher {
36    root: PathBuf,
37    #[allow(dead_code)]
38    config: IndexerConfig,
39    debouncer: PlatformDebouncer,
40    event_rx: mpsc::UnboundedReceiver<WatchEvent>,
41    /// All paths being watched (root + symlink targets)
42    watched_paths: Vec<PathBuf>,
43}
44
45impl FileWatcher {
46    /// Create a new file watcher for the given directory
47    pub fn new(root: PathBuf, config: IndexerConfig) -> Result<Self> {
48        let (event_tx, event_rx) = mpsc::unbounded_channel();
49        let event_tx = Arc::new(Mutex::new(event_tx));
50
51        // Find symlink targets upfront so we can pass them to the event handler
52        let symlink_targets = if config.follow_symlinks {
53            find_symlink_targets(&root)
54        } else {
55            vec![]
56        };
57
58        // Build list of all watched paths
59        let mut watched_paths = vec![root.clone()];
60        watched_paths.extend(symlink_targets.clone());
61        let watched_paths_for_closure = watched_paths.clone();
62
63        // Clone for the closure
64        let config_clone = config.clone();
65
66        // Create debouncer with 500ms delay
67        let debouncer = new_debouncer(
68            Duration::from_millis(500),
69            None,
70            move |result: DebounceEventResult| {
71                use std::collections::HashSet;
72
73                let tx = event_tx.lock();
74                match result {
75                    Ok(events) => {
76                        // Deduplicate events by path to avoid processing same file twice
77                        let mut seen_changed: HashSet<PathBuf> = HashSet::new();
78                        let mut seen_deleted: HashSet<PathBuf> = HashSet::new();
79
80                        for event in events {
81                            let watch_events = process_notify_event(
82                                &event,
83                                &watched_paths_for_closure,
84                                &config_clone,
85                            );
86                            for e in watch_events {
87                                match &e {
88                                    WatchEvent::Changed(p) => {
89                                        if seen_changed.insert(p.clone()) {
90                                            let _ = tx.send(e);
91                                        }
92                                    }
93                                    WatchEvent::Deleted(p) => {
94                                        if seen_deleted.insert(p.clone()) {
95                                            let _ = tx.send(e);
96                                        }
97                                    }
98                                    _ => {
99                                        let _ = tx.send(e);
100                                    }
101                                }
102                            }
103                        }
104                    }
105                    Err(errors) => {
106                        for e in errors {
107                            let _ = tx.send(WatchEvent::Error(e.to_string()));
108                        }
109                    }
110                }
111            },
112        )
113        .map_err(|e| YgrepError::WatchError(e.to_string()))?;
114
115        Ok(Self {
116            root,
117            config,
118            debouncer,
119            event_rx,
120            watched_paths,
121        })
122    }
123
124    /// Start watching the directory
125    pub fn start(&mut self) -> Result<()> {
126        // Watch all paths (root + symlink targets found during construction)
127        for path in &self.watched_paths {
128            match self.debouncer.watch(path, RecursiveMode::Recursive) {
129                Ok(()) => {
130                    if path == &self.root {
131                        tracing::info!("Started watching: {}", path.display());
132                    } else {
133                        tracing::info!("Also watching symlink target: {}", path.display());
134                    }
135                }
136                Err(e) => {
137                    if path == &self.root {
138                        return Err(YgrepError::WatchError(e.to_string()));
139                    } else {
140                        tracing::warn!("Failed to watch symlink target {}: {}", path.display(), e);
141                    }
142                }
143            }
144        }
145
146        Ok(())
147    }
148
149    /// Stop watching all paths (root + symlink targets)
150    pub fn stop(&mut self) -> Result<()> {
151        for path in &self.watched_paths {
152            match self.debouncer.unwatch(path) {
153                Ok(()) => {
154                    tracing::info!("Stopped watching: {}", path.display());
155                }
156                Err(e) => {
157                    tracing::warn!("Failed to unwatch {}: {}", path.display(), e);
158                }
159            }
160        }
161        Ok(())
162    }
163
164    /// Get the next watch event (async)
165    pub async fn next_event(&mut self) -> Option<WatchEvent> {
166        self.event_rx.recv().await
167    }
168
169    /// Try to get the next watch event without waiting (returns None if no event queued)
170    pub fn try_next_event(&mut self) -> Option<WatchEvent> {
171        self.event_rx.try_recv().ok()
172    }
173
174    /// Get the root directory being watched
175    pub fn root(&self) -> &Path {
176        &self.root
177    }
178}
179
180/// Process a notify event and convert to WatchEvent(s)
181fn process_notify_event(
182    event: &notify_debouncer_full::DebouncedEvent,
183    watched_paths: &[PathBuf],
184    config: &IndexerConfig,
185) -> Vec<WatchEvent> {
186    use notify::EventKind;
187
188    let mut events = Vec::new();
189
190    for path in &event.paths {
191        // Skip if path is not under any watched path
192        let is_under_watched = watched_paths.iter().any(|wp| path.starts_with(wp));
193        if !is_under_watched {
194            continue;
195        }
196
197        // Skip hidden files/directories
198        if is_hidden(path) {
199            continue;
200        }
201
202        // Skip ignored directories
203        if is_ignored_dir(path) {
204            continue;
205        }
206
207        // Skip files matching ignore patterns
208        if matches_ignore_pattern(path, config) {
209            continue;
210        }
211
212        match event.kind {
213            EventKind::Create(_) => {
214                if path.is_dir() {
215                    events.push(WatchEvent::DirCreated(path.clone()));
216                } else if path.is_file() {
217                    events.push(WatchEvent::Changed(path.clone()));
218                }
219            }
220            EventKind::Modify(_) => {
221                if path.is_file() {
222                    events.push(WatchEvent::Changed(path.clone()));
223                }
224            }
225            EventKind::Remove(_) => {
226                // Can't check if it was a file or dir since it's deleted
227                // We'll handle both cases in the indexer
228                events.push(WatchEvent::Deleted(path.clone()));
229            }
230            _ => {}
231        }
232    }
233
234    events
235}
236
237/// Check if a path is hidden (starts with .)
238fn is_hidden(path: &Path) -> bool {
239    path.components().any(|c| {
240        c.as_os_str()
241            .to_str()
242            .map(|s| s.starts_with('.'))
243            .unwrap_or(false)
244    })
245}
246
247/// Find all symlink targets in a directory tree
248/// Returns the canonical paths of directories that are symlinked
249fn find_symlink_targets(root: &Path) -> Vec<PathBuf> {
250    use std::collections::HashSet;
251    use walkdir::WalkDir;
252
253    let mut targets = HashSet::new();
254
255    for entry in WalkDir::new(root)
256        .follow_links(false) // Don't follow links during walk
257        .into_iter()
258        .filter_map(|e| e.ok())
259    {
260        let path = entry.path();
261
262        // Check if this is a symlink to a directory
263        if path.is_symlink() {
264            if let Ok(target) = std::fs::read_link(path) {
265                // Resolve to absolute path
266                let absolute_target = if target.is_absolute() {
267                    target
268                } else {
269                    path.parent().map(|p| p.join(&target)).unwrap_or(target)
270                };
271
272                // Canonicalize to resolve any .. or . components
273                if let Ok(canonical) = std::fs::canonicalize(&absolute_target) {
274                    if canonical.is_dir() && !is_ignored_dir(&canonical) {
275                        targets.insert(canonical);
276                    }
277                }
278            }
279        }
280    }
281
282    targets.into_iter().collect()
283}
284
285/// Check if path is in an ignored directory
286fn is_ignored_dir(path: &Path) -> bool {
287    const IGNORED_DIRS: &[&str] = &[
288        "node_modules",
289        "vendor",
290        "target",
291        "dist",
292        "build",
293        "cache",
294        ".git",
295        "__pycache__",
296        "logs",
297        "tmp",
298    ];
299
300    path.components().any(|c| {
301        c.as_os_str()
302            .to_str()
303            .map(|s| IGNORED_DIRS.contains(&s))
304            .unwrap_or(false)
305    })
306}
307
308/// Check if path matches custom ignore patterns
309fn matches_ignore_pattern(path: &Path, config: &IndexerConfig) -> bool {
310    let path_str = path.to_string_lossy();
311
312    for pattern in &config.ignore_patterns {
313        if glob_match(pattern, &path_str) {
314            return true;
315        }
316    }
317
318    false
319}
320
321/// Simple glob matching (copied from walker.rs for consistency)
322fn glob_match(pattern: &str, path: &str) -> bool {
323    // Handle **/dir/** patterns (match dir anywhere in path)
324    if pattern.starts_with("**/") && pattern.ends_with("/**") {
325        let dir_name = &pattern[3..pattern.len() - 3];
326        return path.contains(&format!("/{}/", dir_name))
327            || path.starts_with(&format!("{}/", dir_name))
328            || path.ends_with(&format!("/{}", dir_name));
329    }
330
331    // Handle **/*.ext patterns (match extension anywhere)
332    if pattern.starts_with("**/*.") {
333        let ext = &pattern[5..];
334        return path.ends_with(&format!(".{}", ext));
335    }
336
337    // Handle **/something patterns (match at end)
338    if pattern.starts_with("**/") {
339        let suffix = &pattern[3..];
340        return path.ends_with(suffix) || path.ends_with(&format!("/{}", suffix));
341    }
342
343    // Handle something/** patterns (match at start)
344    if pattern.ends_with("/**") {
345        let prefix = &pattern[..pattern.len() - 3];
346        return path.starts_with(prefix) || path.contains(&format!("/{}", prefix));
347    }
348
349    // Handle simple * patterns (*.ext)
350    if pattern.starts_with("*.") {
351        let ext = &pattern[2..];
352        return path.ends_with(&format!(".{}", ext));
353    }
354
355    // Exact match or path component match
356    path == pattern
357        || path.ends_with(&format!("/{}", pattern))
358        || path.contains(&format!("/{}/", pattern))
359}
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364
365    #[test]
366    fn test_is_hidden() {
367        assert!(is_hidden(Path::new("/foo/.git/config")));
368        assert!(is_hidden(Path::new("/foo/.hidden")));
369        assert!(!is_hidden(Path::new("/foo/bar/baz.rs")));
370    }
371
372    #[test]
373    fn test_is_hidden_root_not_hidden() {
374        // A path with no hidden components
375        assert!(!is_hidden(Path::new("/usr/local/bin")));
376    }
377
378    #[test]
379    fn test_is_hidden_nested_in_hidden() {
380        // File nested inside a hidden directory
381        assert!(is_hidden(Path::new("/project/.cache/data/file.txt")));
382    }
383
384    #[test]
385    fn test_is_ignored_dir() {
386        assert!(is_ignored_dir(Path::new("/foo/node_modules/bar")));
387        assert!(is_ignored_dir(Path::new("/foo/vendor/package")));
388        assert!(!is_ignored_dir(Path::new("/foo/src/main.rs")));
389    }
390
391    #[test]
392    fn test_matches_ignore_pattern_with_config() {
393        let mut config = IndexerConfig::default();
394        config.ignore_patterns = vec!["**/*.log".to_string(), "**/temp/**".to_string()];
395
396        assert!(matches_ignore_pattern(
397            Path::new("/project/debug.log"),
398            &config
399        ));
400        assert!(matches_ignore_pattern(
401            Path::new("/project/temp/cache.txt"),
402            &config
403        ));
404        assert!(!matches_ignore_pattern(
405            Path::new("/project/src/main.rs"),
406            &config
407        ));
408    }
409
410    #[test]
411    fn test_glob_match_patterns() {
412        // **/dir/** patterns
413        assert!(glob_match(
414            "**/node_modules/**",
415            "/project/node_modules/pkg/index.js"
416        ));
417        assert!(!glob_match("**/node_modules/**", "/project/src/main.rs"));
418
419        // **/*.ext patterns
420        assert!(glob_match("**/*.log", "/var/logs/app.log"));
421        assert!(!glob_match("**/*.log", "/var/logs/app.txt"));
422
423        // *.ext patterns
424        assert!(glob_match("*.pyc", "module.pyc"));
425        assert!(!glob_match("*.pyc", "module.py"));
426
427        // Exact/component match
428        assert!(glob_match("Cargo.lock", "/project/Cargo.lock"));
429    }
430}