ygrep_core/watcher/
mod.rs1use std::path::{Path, PathBuf};
4use std::sync::Arc;
5use std::time::Duration;
6
7use notify_debouncer_full::{new_debouncer, notify::RecursiveMode, DebounceEventResult};
8use parking_lot::Mutex;
9use tokio::sync::mpsc;
10
11use crate::config::IndexerConfig;
12use crate::error::{Result, YgrepError};
13
14#[derive(Debug, Clone)]
16pub enum WatchEvent {
17 Changed(PathBuf),
19 Deleted(PathBuf),
21 DirCreated(PathBuf),
23 DirDeleted(PathBuf),
25 Error(String),
27}
28
29type PlatformDebouncer = notify_debouncer_full::Debouncer<
30 notify_debouncer_full::notify::RecommendedWatcher,
31 notify_debouncer_full::RecommendedCache,
32>;
33
34pub struct FileWatcher {
36 root: PathBuf,
37 #[allow(dead_code)]
38 config: IndexerConfig,
39 debouncer: PlatformDebouncer,
40 event_rx: mpsc::UnboundedReceiver<WatchEvent>,
41 watched_paths: Vec<PathBuf>,
43}
44
45impl FileWatcher {
46 pub fn new(root: PathBuf, config: IndexerConfig) -> Result<Self> {
48 let (event_tx, event_rx) = mpsc::unbounded_channel();
49 let event_tx = Arc::new(Mutex::new(event_tx));
50
51 let symlink_targets = if config.follow_symlinks {
53 find_symlink_targets(&root)
54 } else {
55 vec![]
56 };
57
58 let mut watched_paths = vec![root.clone()];
60 watched_paths.extend(symlink_targets.clone());
61 let watched_paths_for_closure = watched_paths.clone();
62
63 let config_clone = config.clone();
65
66 let debouncer = new_debouncer(
68 Duration::from_millis(500),
69 None,
70 move |result: DebounceEventResult| {
71 use std::collections::HashSet;
72
73 let tx = event_tx.lock();
74 match result {
75 Ok(events) => {
76 let mut seen_changed: HashSet<PathBuf> = HashSet::new();
78 let mut seen_deleted: HashSet<PathBuf> = HashSet::new();
79
80 for event in events {
81 let watch_events = process_notify_event(
82 &event,
83 &watched_paths_for_closure,
84 &config_clone,
85 );
86 for e in watch_events {
87 match &e {
88 WatchEvent::Changed(p) => {
89 if seen_changed.insert(p.clone()) {
90 let _ = tx.send(e);
91 }
92 }
93 WatchEvent::Deleted(p) => {
94 if seen_deleted.insert(p.clone()) {
95 let _ = tx.send(e);
96 }
97 }
98 _ => {
99 let _ = tx.send(e);
100 }
101 }
102 }
103 }
104 }
105 Err(errors) => {
106 for e in errors {
107 let _ = tx.send(WatchEvent::Error(e.to_string()));
108 }
109 }
110 }
111 },
112 )
113 .map_err(|e| YgrepError::WatchError(e.to_string()))?;
114
115 Ok(Self {
116 root,
117 config,
118 debouncer,
119 event_rx,
120 watched_paths,
121 })
122 }
123
124 pub fn start(&mut self) -> Result<()> {
126 for path in &self.watched_paths {
128 match self.debouncer.watch(path, RecursiveMode::Recursive) {
129 Ok(()) => {
130 if path == &self.root {
131 tracing::info!("Started watching: {}", path.display());
132 } else {
133 tracing::info!("Also watching symlink target: {}", path.display());
134 }
135 }
136 Err(e) => {
137 if path == &self.root {
138 return Err(YgrepError::WatchError(e.to_string()));
139 } else {
140 tracing::warn!("Failed to watch symlink target {}: {}", path.display(), e);
141 }
142 }
143 }
144 }
145
146 Ok(())
147 }
148
149 pub fn stop(&mut self) -> Result<()> {
151 for path in &self.watched_paths {
152 match self.debouncer.unwatch(path) {
153 Ok(()) => {
154 tracing::info!("Stopped watching: {}", path.display());
155 }
156 Err(e) => {
157 tracing::warn!("Failed to unwatch {}: {}", path.display(), e);
158 }
159 }
160 }
161 Ok(())
162 }
163
164 pub async fn next_event(&mut self) -> Option<WatchEvent> {
166 self.event_rx.recv().await
167 }
168
169 pub fn try_next_event(&mut self) -> Option<WatchEvent> {
171 self.event_rx.try_recv().ok()
172 }
173
174 pub fn root(&self) -> &Path {
176 &self.root
177 }
178}
179
180fn process_notify_event(
182 event: ¬ify_debouncer_full::DebouncedEvent,
183 watched_paths: &[PathBuf],
184 config: &IndexerConfig,
185) -> Vec<WatchEvent> {
186 use notify::EventKind;
187
188 let mut events = Vec::new();
189
190 for path in &event.paths {
191 let is_under_watched = watched_paths.iter().any(|wp| path.starts_with(wp));
193 if !is_under_watched {
194 continue;
195 }
196
197 if is_hidden(path) {
199 continue;
200 }
201
202 if is_ignored_dir(path) {
204 continue;
205 }
206
207 if matches_ignore_pattern(path, config) {
209 continue;
210 }
211
212 match event.kind {
213 EventKind::Create(_) => {
214 if path.is_dir() {
215 events.push(WatchEvent::DirCreated(path.clone()));
216 } else if path.is_file() {
217 events.push(WatchEvent::Changed(path.clone()));
218 }
219 }
220 EventKind::Modify(_) => {
221 if path.is_file() {
222 events.push(WatchEvent::Changed(path.clone()));
223 }
224 }
225 EventKind::Remove(_) => {
226 events.push(WatchEvent::Deleted(path.clone()));
229 }
230 _ => {}
231 }
232 }
233
234 events
235}
236
237fn is_hidden(path: &Path) -> bool {
239 path.components().any(|c| {
240 c.as_os_str()
241 .to_str()
242 .map(|s| s.starts_with('.'))
243 .unwrap_or(false)
244 })
245}
246
247fn find_symlink_targets(root: &Path) -> Vec<PathBuf> {
250 use std::collections::HashSet;
251 use walkdir::WalkDir;
252
253 let mut targets = HashSet::new();
254
255 for entry in WalkDir::new(root)
256 .follow_links(false) .into_iter()
258 .filter_map(|e| e.ok())
259 {
260 let path = entry.path();
261
262 if path.is_symlink() {
264 if let Ok(target) = std::fs::read_link(path) {
265 let absolute_target = if target.is_absolute() {
267 target
268 } else {
269 path.parent().map(|p| p.join(&target)).unwrap_or(target)
270 };
271
272 if let Ok(canonical) = std::fs::canonicalize(&absolute_target) {
274 if canonical.is_dir() && !is_ignored_dir(&canonical) {
275 targets.insert(canonical);
276 }
277 }
278 }
279 }
280 }
281
282 targets.into_iter().collect()
283}
284
285fn is_ignored_dir(path: &Path) -> bool {
287 const IGNORED_DIRS: &[&str] = &[
288 "node_modules",
289 "vendor",
290 "target",
291 "dist",
292 "build",
293 "cache",
294 ".git",
295 "__pycache__",
296 "logs",
297 "tmp",
298 ];
299
300 path.components().any(|c| {
301 c.as_os_str()
302 .to_str()
303 .map(|s| IGNORED_DIRS.contains(&s))
304 .unwrap_or(false)
305 })
306}
307
308fn matches_ignore_pattern(path: &Path, config: &IndexerConfig) -> bool {
310 let path_str = path.to_string_lossy();
311
312 for pattern in &config.ignore_patterns {
313 if glob_match(pattern, &path_str) {
314 return true;
315 }
316 }
317
318 false
319}
320
321fn glob_match(pattern: &str, path: &str) -> bool {
323 if pattern.starts_with("**/") && pattern.ends_with("/**") {
325 let dir_name = &pattern[3..pattern.len() - 3];
326 return path.contains(&format!("/{}/", dir_name))
327 || path.starts_with(&format!("{}/", dir_name))
328 || path.ends_with(&format!("/{}", dir_name));
329 }
330
331 if pattern.starts_with("**/*.") {
333 let ext = &pattern[5..];
334 return path.ends_with(&format!(".{}", ext));
335 }
336
337 if pattern.starts_with("**/") {
339 let suffix = &pattern[3..];
340 return path.ends_with(suffix) || path.ends_with(&format!("/{}", suffix));
341 }
342
343 if pattern.ends_with("/**") {
345 let prefix = &pattern[..pattern.len() - 3];
346 return path.starts_with(prefix) || path.contains(&format!("/{}", prefix));
347 }
348
349 if pattern.starts_with("*.") {
351 let ext = &pattern[2..];
352 return path.ends_with(&format!(".{}", ext));
353 }
354
355 path == pattern
357 || path.ends_with(&format!("/{}", pattern))
358 || path.contains(&format!("/{}/", pattern))
359}
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364
365 #[test]
366 fn test_is_hidden() {
367 assert!(is_hidden(Path::new("/foo/.git/config")));
368 assert!(is_hidden(Path::new("/foo/.hidden")));
369 assert!(!is_hidden(Path::new("/foo/bar/baz.rs")));
370 }
371
372 #[test]
373 fn test_is_hidden_root_not_hidden() {
374 assert!(!is_hidden(Path::new("/usr/local/bin")));
376 }
377
378 #[test]
379 fn test_is_hidden_nested_in_hidden() {
380 assert!(is_hidden(Path::new("/project/.cache/data/file.txt")));
382 }
383
384 #[test]
385 fn test_is_ignored_dir() {
386 assert!(is_ignored_dir(Path::new("/foo/node_modules/bar")));
387 assert!(is_ignored_dir(Path::new("/foo/vendor/package")));
388 assert!(!is_ignored_dir(Path::new("/foo/src/main.rs")));
389 }
390
391 #[test]
392 fn test_matches_ignore_pattern_with_config() {
393 let mut config = IndexerConfig::default();
394 config.ignore_patterns = vec!["**/*.log".to_string(), "**/temp/**".to_string()];
395
396 assert!(matches_ignore_pattern(
397 Path::new("/project/debug.log"),
398 &config
399 ));
400 assert!(matches_ignore_pattern(
401 Path::new("/project/temp/cache.txt"),
402 &config
403 ));
404 assert!(!matches_ignore_pattern(
405 Path::new("/project/src/main.rs"),
406 &config
407 ));
408 }
409
410 #[test]
411 fn test_glob_match_patterns() {
412 assert!(glob_match(
414 "**/node_modules/**",
415 "/project/node_modules/pkg/index.js"
416 ));
417 assert!(!glob_match("**/node_modules/**", "/project/src/main.rs"));
418
419 assert!(glob_match("**/*.log", "/var/logs/app.log"));
421 assert!(!glob_match("**/*.log", "/var/logs/app.txt"));
422
423 assert!(glob_match("*.pyc", "module.pyc"));
425 assert!(!glob_match("*.pyc", "module.py"));
426
427 assert!(glob_match("Cargo.lock", "/project/Cargo.lock"));
429 }
430}