Skip to main content

krait/index/
watcher.rs

1//! File watcher for proactive cache invalidation.
2//!
3//! Watches the project directory for file changes and maintains an in-memory
4//! set of dirty paths, eliminating per-query BLAKE3 hashing.
5
6use std::collections::HashSet;
7use std::path::Path;
8use std::sync::atomic::{AtomicBool, Ordering};
9use std::sync::{Arc, RwLock};
10use std::time::Duration;
11
12use notify_debouncer_full::notify::{EventKind, RecommendedWatcher, Watcher};
13use notify_debouncer_full::{new_debouncer, Debouncer, FileIdMap};
14use tracing::{debug, info, warn};
15
16use crate::lsp::diagnostics::DiagnosticStore;
17
18/// Thread-safe set of file paths known to be dirty (changed since last index).
19#[derive(Clone)]
20pub struct DirtyFiles {
21    inner: Arc<RwLock<HashSet<String>>>,
22    /// When true, all files are considered dirty (watcher overflow recovery).
23    poisoned: Arc<AtomicBool>,
24}
25
26impl Default for DirtyFiles {
27    fn default() -> Self {
28        Self::new()
29    }
30}
31
32impl DirtyFiles {
33    #[must_use]
34    pub fn new() -> Self {
35        Self {
36            inner: Arc::new(RwLock::new(HashSet::new())),
37            poisoned: Arc::new(AtomicBool::new(false)),
38        }
39    }
40
41    /// Mark a relative path as dirty.
42    pub fn mark_dirty(&self, rel_path: String) {
43        match self.inner.write() {
44            Ok(mut set) => {
45                set.insert(rel_path);
46            }
47            Err(_) => {
48                // Lock poisoned — poison the dirty set so all files are considered stale
49                self.poison();
50            }
51        }
52    }
53
54    /// Check if a relative path is dirty.
55    ///
56    /// Returns `true` if the file is known to have changed, or if the watcher
57    /// is poisoned (overflow occurred).
58    #[must_use]
59    pub fn is_dirty(&self, rel_path: &str) -> bool {
60        if self.poisoned.load(Ordering::Relaxed) {
61            return true;
62        }
63        self.inner.read().is_ok_and(|set| set.contains(rel_path))
64    }
65
66    /// Clear all dirty entries and reset poison flag.
67    ///
68    /// Called after re-indexing (`krait init`).
69    pub fn clear(&self) {
70        if let Ok(mut set) = self.inner.write() {
71            set.clear();
72        }
73        self.poisoned.store(false, Ordering::Relaxed);
74    }
75
76    /// Number of dirty files.
77    #[must_use]
78    pub fn len(&self) -> usize {
79        self.inner.read().map_or(0, |set| set.len())
80    }
81
82    /// Whether the dirty set is empty.
83    #[must_use]
84    pub fn is_empty(&self) -> bool {
85        self.len() == 0
86    }
87
88    /// Whether the watcher is poisoned (overflow occurred).
89    #[must_use]
90    pub fn is_poisoned(&self) -> bool {
91        self.poisoned.load(Ordering::Relaxed)
92    }
93
94    /// Mark the watcher as poisoned — all files considered dirty.
95    pub(crate) fn poison(&self) {
96        self.poisoned.store(true, Ordering::Relaxed);
97    }
98}
99
100/// Debounce window for file change events.
101const DEBOUNCE_MS: u64 = 500;
102
103/// Start watching the project root for file changes.
104///
105/// Returns the debouncer handle (must be kept alive). Dropping it stops the watcher.
106///
107/// # Errors
108/// Returns an error if the watcher can't be created or the project root can't be watched.
109pub fn start_watcher(
110    project_root: &Path,
111    extensions: &[String],
112    dirty_files: DirtyFiles,
113    diagnostic_store: Option<Arc<DiagnosticStore>>,
114) -> anyhow::Result<Debouncer<RecommendedWatcher, FileIdMap>> {
115    // Canonicalize to match FSEvents paths on macOS
116    let canonical_root = project_root
117        .canonicalize()
118        .unwrap_or_else(|_| project_root.to_path_buf());
119    let ext_set: HashSet<String> = extensions.iter().cloned().collect();
120    let df = dirty_files;
121
122    let mut debouncer = new_debouncer(
123        Duration::from_millis(DEBOUNCE_MS),
124        None,
125        move |result: notify_debouncer_full::DebounceEventResult| match result {
126            Ok(events) => {
127                for event in events {
128                    match event.kind {
129                        // For renames, mark both old and new paths dirty
130                        EventKind::Modify(
131                            notify_debouncer_full::notify::event::ModifyKind::Name(_),
132                        ) => {
133                            for path in &event.paths {
134                                if let Some(rel) = to_relative(path, &canonical_root, &ext_set) {
135                                    debug!("file renamed: {rel}");
136                                    df.mark_dirty(rel);
137                                    if let Some(store) = &diagnostic_store {
138                                        store.clear(path);
139                                    }
140                                }
141                            }
142                        }
143                        EventKind::Create(_) | EventKind::Modify(_) | EventKind::Remove(_) => {
144                            for path in &event.paths {
145                                if let Some(rel) = to_relative(path, &canonical_root, &ext_set) {
146                                    debug!("file changed: {rel}");
147                                    df.mark_dirty(rel);
148                                    if let Some(store) = &diagnostic_store {
149                                        store.clear(path);
150                                    }
151                                }
152                            }
153                        }
154                        _ => {}
155                    }
156                }
157            }
158            Err(errors) => {
159                warn!("watcher errors: {:?} — entering full re-check mode", errors);
160                df.poison();
161                // Schedule recovery: clear poison after 30s so BLAKE3 fallback
162                // eventually stops considering every file dirty.
163                let inner_clone = Arc::clone(&df.inner);
164                let poisoned_clone = Arc::clone(&df.poisoned);
165                std::thread::spawn(move || {
166                    std::thread::sleep(Duration::from_secs(30));
167                    poisoned_clone.store(false, Ordering::Relaxed);
168                    if let Ok(mut set) = inner_clone.write() {
169                        set.clear();
170                    }
171                    info!("watcher: poison cleared after 30s recovery window");
172                });
173            }
174        },
175    )?;
176
177    debouncer.watcher().watch(
178        project_root,
179        notify_debouncer_full::notify::RecursiveMode::Recursive,
180    )?;
181
182    info!("file watcher started on {}", project_root.display());
183    Ok(debouncer)
184}
185
186/// Convert an absolute path to a relative path if it has an indexed extension.
187fn to_relative(path: &Path, canonical_root: &Path, extensions: &HashSet<String>) -> Option<String> {
188    // Check extension first (cheapest filter)
189    let ext = path.extension()?.to_str()?;
190    if !extensions.contains(ext) {
191        return None;
192    }
193
194    // Try canonical root first
195    if let Ok(r) = path.strip_prefix(canonical_root) {
196        return Some(r.to_string_lossy().to_string());
197    }
198
199    // Fallback: canonicalize the event path (resolves symlinks) and try again
200    if let Ok(canonical_path) = path.canonicalize() {
201        if let Ok(r) = canonical_path.strip_prefix(canonical_root) {
202            return Some(r.to_string_lossy().to_string());
203        }
204    }
205
206    None
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    #[test]
214    fn dirty_files_basic() {
215        let df = DirtyFiles::new();
216        assert!(!df.is_dirty("src/lib.rs"));
217        assert_eq!(df.len(), 0);
218
219        df.mark_dirty("src/lib.rs".to_string());
220        assert!(df.is_dirty("src/lib.rs"));
221        assert!(!df.is_dirty("src/main.rs"));
222        assert_eq!(df.len(), 1);
223    }
224
225    #[test]
226    fn dirty_files_clear() {
227        let df = DirtyFiles::new();
228        df.mark_dirty("a.rs".to_string());
229        df.mark_dirty("b.rs".to_string());
230        assert_eq!(df.len(), 2);
231
232        df.clear();
233        assert_eq!(df.len(), 0);
234        assert!(!df.is_dirty("a.rs"));
235    }
236
237    #[test]
238    fn dirty_files_poison() {
239        let df = DirtyFiles::new();
240        assert!(!df.is_poisoned());
241        assert!(!df.is_dirty("any_file.rs"));
242
243        df.poison();
244        assert!(df.is_poisoned());
245        assert!(df.is_dirty("any_file.rs"));
246        assert!(df.is_dirty("literally_anything"));
247    }
248
249    #[test]
250    fn dirty_files_clear_resets_poison() {
251        let df = DirtyFiles::new();
252        df.poison();
253        assert!(df.is_poisoned());
254
255        df.clear();
256        assert!(!df.is_poisoned());
257        assert!(!df.is_dirty("test.rs"));
258    }
259
260    #[test]
261    fn dirty_files_clone_shares_state() {
262        let df1 = DirtyFiles::new();
263        let df2 = df1.clone();
264
265        df1.mark_dirty("shared.rs".to_string());
266        assert!(df2.is_dirty("shared.rs"));
267    }
268
269    #[test]
270    fn to_relative_filters_extension() {
271        let root = Path::new("/project");
272        let exts: HashSet<String> = ["rs", "ts"].iter().map(|s| (*s).to_string()).collect();
273
274        assert!(to_relative(Path::new("/project/src/lib.rs"), root, &exts).is_some());
275        assert!(to_relative(Path::new("/project/src/app.ts"), root, &exts).is_some());
276        assert!(to_relative(Path::new("/project/README.md"), root, &exts).is_none());
277        assert!(to_relative(Path::new("/project/Cargo.toml"), root, &exts).is_none());
278    }
279
280    #[test]
281    fn to_relative_strips_prefix() {
282        let root = Path::new("/project");
283        let exts: HashSet<String> = ["rs"].iter().map(|s| (*s).to_string()).collect();
284
285        let rel = to_relative(Path::new("/project/src/lib.rs"), root, &exts);
286        assert_eq!(rel, Some("src/lib.rs".to_string()));
287    }
288
289    #[test]
290    fn to_relative_outside_root_returns_none() {
291        let root = Path::new("/project");
292        let exts: HashSet<String> = ["rs"].iter().map(|s| (*s).to_string()).collect();
293
294        assert!(to_relative(Path::new("/other/src/lib.rs"), root, &exts).is_none());
295    }
296}