ygrep_core/watcher/
mod.rs1use std::path::{Path, PathBuf};
4use std::sync::Arc;
5use std::time::Duration;
6
7use notify_debouncer_full::{new_debouncer, notify::RecursiveMode, DebounceEventResult};
8use parking_lot::Mutex;
9use tokio::sync::mpsc;
10
11use crate::config::IndexerConfig;
12use crate::error::{Result, YgrepError};
13
14#[derive(Debug, Clone)]
16pub enum WatchEvent {
17 Changed(PathBuf),
19 Deleted(PathBuf),
21 DirCreated(PathBuf),
23 DirDeleted(PathBuf),
25 Error(String),
27}
28
29type PlatformDebouncer = notify_debouncer_full::Debouncer<
30 notify_debouncer_full::notify::RecommendedWatcher,
31 notify_debouncer_full::RecommendedCache,
32>;
33
34pub struct FileWatcher {
36 root: PathBuf,
37 #[allow(dead_code)]
38 config: IndexerConfig,
39 debouncer: PlatformDebouncer,
40 event_rx: mpsc::UnboundedReceiver<WatchEvent>,
41 watched_paths: Vec<PathBuf>,
43}
44
45impl FileWatcher {
46 pub fn new(root: PathBuf, config: IndexerConfig) -> Result<Self> {
48 let (event_tx, event_rx) = mpsc::unbounded_channel();
49 let event_tx = Arc::new(Mutex::new(event_tx));
50
51 let symlink_targets = if config.follow_symlinks {
53 find_symlink_targets(&root)
54 } else {
55 vec![]
56 };
57
58 let mut watched_paths = vec![root.clone()];
60 watched_paths.extend(symlink_targets.clone());
61 let watched_paths_for_closure = watched_paths.clone();
62
63 let config_clone = config.clone();
65
66 let debouncer = new_debouncer(
68 Duration::from_millis(500),
69 None,
70 move |result: DebounceEventResult| {
71 use std::collections::HashSet;
72
73 let tx = event_tx.lock();
74 match result {
75 Ok(events) => {
76 let mut seen_changed: HashSet<PathBuf> = HashSet::new();
78 let mut seen_deleted: HashSet<PathBuf> = HashSet::new();
79
80 for event in events {
81 let watch_events = process_notify_event(
82 &event,
83 &watched_paths_for_closure,
84 &config_clone,
85 );
86 for e in watch_events {
87 match &e {
88 WatchEvent::Changed(p) => {
89 if seen_changed.insert(p.clone()) {
90 let _ = tx.send(e);
91 }
92 }
93 WatchEvent::Deleted(p) => {
94 if seen_deleted.insert(p.clone()) {
95 let _ = tx.send(e);
96 }
97 }
98 _ => {
99 let _ = tx.send(e);
100 }
101 }
102 }
103 }
104 }
105 Err(errors) => {
106 for e in errors {
107 let _ = tx.send(WatchEvent::Error(e.to_string()));
108 }
109 }
110 }
111 },
112 )
113 .map_err(|e| YgrepError::WatchError(e.to_string()))?;
114
115 Ok(Self {
116 root,
117 config,
118 debouncer,
119 event_rx,
120 watched_paths,
121 })
122 }
123
124 pub fn start(&mut self) -> Result<()> {
126 for path in &self.watched_paths {
128 match self.debouncer.watch(path, RecursiveMode::Recursive) {
129 Ok(()) => {
130 if path == &self.root {
131 tracing::info!("Started watching: {}", path.display());
132 } else {
133 tracing::info!("Also watching symlink target: {}", path.display());
134 }
135 }
136 Err(e) => {
137 if path == &self.root {
138 return Err(YgrepError::WatchError(e.to_string()));
139 } else {
140 tracing::warn!("Failed to watch symlink target {}: {}", path.display(), e);
141 }
142 }
143 }
144 }
145
146 Ok(())
147 }
148
149 pub fn stop(&mut self) -> Result<()> {
151 for path in &self.watched_paths {
152 match self.debouncer.unwatch(path) {
153 Ok(()) => {
154 tracing::info!("Stopped watching: {}", path.display());
155 }
156 Err(e) => {
157 tracing::warn!("Failed to unwatch {}: {}", path.display(), e);
158 }
159 }
160 }
161 Ok(())
162 }
163
164 pub async fn next_event(&mut self) -> Option<WatchEvent> {
166 self.event_rx.recv().await
167 }
168
169 pub fn root(&self) -> &Path {
171 &self.root
172 }
173}
174
175fn process_notify_event(
177 event: ¬ify_debouncer_full::DebouncedEvent,
178 watched_paths: &[PathBuf],
179 config: &IndexerConfig,
180) -> Vec<WatchEvent> {
181 use notify::EventKind;
182
183 let mut events = Vec::new();
184
185 for path in &event.paths {
186 let is_under_watched = watched_paths.iter().any(|wp| path.starts_with(wp));
188 if !is_under_watched {
189 continue;
190 }
191
192 if is_hidden(path) {
194 continue;
195 }
196
197 if is_ignored_dir(path) {
199 continue;
200 }
201
202 if matches_ignore_pattern(path, config) {
204 continue;
205 }
206
207 match event.kind {
208 EventKind::Create(_) => {
209 if path.is_dir() {
210 events.push(WatchEvent::DirCreated(path.clone()));
211 } else if path.is_file() {
212 events.push(WatchEvent::Changed(path.clone()));
213 }
214 }
215 EventKind::Modify(_) => {
216 if path.is_file() {
217 events.push(WatchEvent::Changed(path.clone()));
218 }
219 }
220 EventKind::Remove(_) => {
221 events.push(WatchEvent::Deleted(path.clone()));
224 }
225 _ => {}
226 }
227 }
228
229 events
230}
231
232fn is_hidden(path: &Path) -> bool {
234 path.components().any(|c| {
235 c.as_os_str()
236 .to_str()
237 .map(|s| s.starts_with('.'))
238 .unwrap_or(false)
239 })
240}
241
242fn find_symlink_targets(root: &Path) -> Vec<PathBuf> {
245 use std::collections::HashSet;
246 use walkdir::WalkDir;
247
248 let mut targets = HashSet::new();
249
250 for entry in WalkDir::new(root)
251 .follow_links(false) .into_iter()
253 .filter_map(|e| e.ok())
254 {
255 let path = entry.path();
256
257 if path.is_symlink() {
259 if let Ok(target) = std::fs::read_link(path) {
260 let absolute_target = if target.is_absolute() {
262 target
263 } else {
264 path.parent().map(|p| p.join(&target)).unwrap_or(target)
265 };
266
267 if let Ok(canonical) = std::fs::canonicalize(&absolute_target) {
269 if canonical.is_dir() && !is_ignored_dir(&canonical) {
270 targets.insert(canonical);
271 }
272 }
273 }
274 }
275 }
276
277 targets.into_iter().collect()
278}
279
280fn is_ignored_dir(path: &Path) -> bool {
282 const IGNORED_DIRS: &[&str] = &[
283 "node_modules",
284 "vendor",
285 "target",
286 "dist",
287 "build",
288 "cache",
289 ".git",
290 "__pycache__",
291 "logs",
292 "tmp",
293 ];
294
295 path.components().any(|c| {
296 c.as_os_str()
297 .to_str()
298 .map(|s| IGNORED_DIRS.contains(&s))
299 .unwrap_or(false)
300 })
301}
302
303fn matches_ignore_pattern(path: &Path, config: &IndexerConfig) -> bool {
305 let path_str = path.to_string_lossy();
306
307 for pattern in &config.ignore_patterns {
308 if glob_match(pattern, &path_str) {
309 return true;
310 }
311 }
312
313 false
314}
315
316fn glob_match(pattern: &str, path: &str) -> bool {
318 if pattern.starts_with("**/") && pattern.ends_with("/**") {
320 let dir_name = &pattern[3..pattern.len() - 3];
321 return path.contains(&format!("/{}/", dir_name))
322 || path.starts_with(&format!("{}/", dir_name))
323 || path.ends_with(&format!("/{}", dir_name));
324 }
325
326 if pattern.starts_with("**/*.") {
328 let ext = &pattern[5..];
329 return path.ends_with(&format!(".{}", ext));
330 }
331
332 if pattern.starts_with("**/") {
334 let suffix = &pattern[3..];
335 return path.ends_with(suffix) || path.ends_with(&format!("/{}", suffix));
336 }
337
338 if pattern.ends_with("/**") {
340 let prefix = &pattern[..pattern.len() - 3];
341 return path.starts_with(prefix) || path.contains(&format!("/{}", prefix));
342 }
343
344 if pattern.starts_with("*.") {
346 let ext = &pattern[2..];
347 return path.ends_with(&format!(".{}", ext));
348 }
349
350 path == pattern
352 || path.ends_with(&format!("/{}", pattern))
353 || path.contains(&format!("/{}/", pattern))
354}
355
356#[cfg(test)]
357mod tests {
358 use super::*;
359
360 #[test]
361 fn test_is_hidden() {
362 assert!(is_hidden(Path::new("/foo/.git/config")));
363 assert!(is_hidden(Path::new("/foo/.hidden")));
364 assert!(!is_hidden(Path::new("/foo/bar/baz.rs")));
365 }
366
367 #[test]
368 fn test_is_hidden_root_not_hidden() {
369 assert!(!is_hidden(Path::new("/usr/local/bin")));
371 }
372
373 #[test]
374 fn test_is_hidden_nested_in_hidden() {
375 assert!(is_hidden(Path::new("/project/.cache/data/file.txt")));
377 }
378
379 #[test]
380 fn test_is_ignored_dir() {
381 assert!(is_ignored_dir(Path::new("/foo/node_modules/bar")));
382 assert!(is_ignored_dir(Path::new("/foo/vendor/package")));
383 assert!(!is_ignored_dir(Path::new("/foo/src/main.rs")));
384 }
385
386 #[test]
387 fn test_matches_ignore_pattern_with_config() {
388 let mut config = IndexerConfig::default();
389 config.ignore_patterns = vec!["**/*.log".to_string(), "**/temp/**".to_string()];
390
391 assert!(matches_ignore_pattern(
392 Path::new("/project/debug.log"),
393 &config
394 ));
395 assert!(matches_ignore_pattern(
396 Path::new("/project/temp/cache.txt"),
397 &config
398 ));
399 assert!(!matches_ignore_pattern(
400 Path::new("/project/src/main.rs"),
401 &config
402 ));
403 }
404
405 #[test]
406 fn test_glob_match_patterns() {
407 assert!(glob_match(
409 "**/node_modules/**",
410 "/project/node_modules/pkg/index.js"
411 ));
412 assert!(!glob_match("**/node_modules/**", "/project/src/main.rs"));
413
414 assert!(glob_match("**/*.log", "/var/logs/app.log"));
416 assert!(!glob_match("**/*.log", "/var/logs/app.txt"));
417
418 assert!(glob_match("*.pyc", "module.pyc"));
420 assert!(!glob_match("*.pyc", "module.py"));
421
422 assert!(glob_match("Cargo.lock", "/project/Cargo.lock"));
424 }
425}