Skip to main content

lean_ctx/core/
watcher.rs

1//! File watcher for automatic incremental re-indexing.
2//!
3//! Monitors the project directory for file changes and triggers
4//! incremental BM25 + embedding index updates. Uses debouncing
5//! to avoid thrashing during rapid edits (e.g., auto-save).
6//!
7//! Architecture:
8//! - Background task polls filesystem for changes (no native fs events dependency)
9//! - Debounces rapid changes (configurable, default 2s)
10//! - Only re-indexes changed files (via content hash comparison)
11//! - Notifies subscribers when index is updated
12
13use std::collections::HashMap;
14use std::path::{Path, PathBuf};
15use std::sync::atomic::AtomicBool;
16use std::sync::Arc;
17use std::time::{Duration, SystemTime};
18
19use md5::{Digest, Md5};
20
21const DEFAULT_POLL_INTERVAL: Duration = Duration::from_secs(5);
22const DEFAULT_DEBOUNCE: Duration = Duration::from_secs(2);
23const MAX_TRACKED_FILES: usize = 5000;
24
25pub struct WatcherConfig {
26    pub poll_interval: Duration,
27    pub debounce: Duration,
28    pub root: PathBuf,
29}
30
31impl Default for WatcherConfig {
32    fn default() -> Self {
33        Self {
34            poll_interval: DEFAULT_POLL_INTERVAL,
35            debounce: DEFAULT_DEBOUNCE,
36            root: PathBuf::from("."),
37        }
38    }
39}
40
41/// Tracks file modification state for change detection.
42#[derive(Debug)]
43pub struct FileTracker {
44    states: HashMap<PathBuf, FileState>,
45    root: PathBuf,
46}
47
48#[derive(Debug, Clone)]
49struct FileState {
50    modified: SystemTime,
51    size: u64,
52    content_hash: Option<String>,
53}
54
55/// Result of a file scan — lists which files changed.
56#[derive(Debug, Clone)]
57pub struct ScanResult {
58    pub added: Vec<PathBuf>,
59    pub modified: Vec<PathBuf>,
60    pub removed: Vec<PathBuf>,
61}
62
63impl ScanResult {
64    pub fn has_changes(&self) -> bool {
65        !self.added.is_empty() || !self.modified.is_empty() || !self.removed.is_empty()
66    }
67
68    pub fn total_changes(&self) -> usize {
69        self.added.len() + self.modified.len() + self.removed.len()
70    }
71
72    pub fn changed_files(&self) -> Vec<&PathBuf> {
73        self.added
74            .iter()
75            .chain(self.modified.iter())
76            .collect()
77    }
78}
79
80impl FileTracker {
81    pub fn new(root: &Path) -> Self {
82        Self {
83            states: HashMap::new(),
84            root: root.to_path_buf(),
85        }
86    }
87
88    /// Scan the directory and detect changes since last scan.
89    pub fn scan(&mut self) -> ScanResult {
90        let mut current_files: HashMap<PathBuf, FileState> = HashMap::new();
91
92        let walker = ignore::WalkBuilder::new(&self.root)
93            .hidden(true)
94            .git_ignore(true)
95            .max_depth(Some(10))
96            .build();
97
98        let mut count = 0usize;
99        for entry in walker.flatten() {
100            if count >= MAX_TRACKED_FILES {
101                break;
102            }
103            let path = entry.path().to_path_buf();
104            if !path.is_file() || !is_indexable(&path) {
105                continue;
106            }
107
108            if let Ok(meta) = std::fs::metadata(&path) {
109                let modified = meta.modified().unwrap_or(SystemTime::UNIX_EPOCH);
110                let size = meta.len();
111                current_files.insert(
112                    path,
113                    FileState {
114                        modified,
115                        size,
116                        content_hash: None,
117                    },
118                );
119                count += 1;
120            }
121        }
122
123        let mut added = Vec::new();
124        let mut modified = Vec::new();
125        let mut removed = Vec::new();
126
127        for (path, state) in &current_files {
128            match self.states.get(path) {
129                None => added.push(path.clone()),
130                Some(old) => {
131                    if old.modified != state.modified || old.size != state.size {
132                        if has_content_changed(path, old) {
133                            modified.push(path.clone());
134                        }
135                    }
136                }
137            }
138        }
139
140        for path in self.states.keys() {
141            if !current_files.contains_key(path) {
142                removed.push(path.clone());
143            }
144        }
145
146        self.states = current_files;
147
148        ScanResult {
149            added,
150            modified,
151            removed,
152        }
153    }
154
155    pub fn tracked_count(&self) -> usize {
156        self.states.len()
157    }
158}
159
160/// Shared flag to control watcher lifecycle.
161pub fn create_stop_flag() -> Arc<AtomicBool> {
162    Arc::new(AtomicBool::new(false))
163}
164
165fn is_indexable(path: &Path) -> bool {
166    let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
167    matches!(
168        ext,
169        "rs" | "ts"
170            | "tsx"
171            | "js"
172            | "jsx"
173            | "py"
174            | "go"
175            | "java"
176            | "c"
177            | "cpp"
178            | "h"
179            | "hpp"
180            | "rb"
181            | "cs"
182            | "kt"
183            | "swift"
184            | "php"
185            | "scala"
186            | "ex"
187            | "exs"
188            | "zig"
189            | "lua"
190            | "dart"
191            | "vue"
192            | "svelte"
193    )
194}
195
196fn has_content_changed(path: &Path, old_state: &FileState) -> bool {
197    if let Some(ref old_hash) = old_state.content_hash {
198        if let Ok(content) = std::fs::read(path) {
199            let new_hash = hash_bytes(&content);
200            return &new_hash != old_hash;
201        }
202    }
203    true
204}
205
206fn hash_bytes(data: &[u8]) -> String {
207    let mut hasher = Md5::new();
208    hasher.update(data);
209    format!("{:x}", hasher.finalize())
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215    use std::fs;
216
217    #[test]
218    fn is_indexable_code_files() {
219        assert!(is_indexable(Path::new("main.rs")));
220        assert!(is_indexable(Path::new("app.tsx")));
221        assert!(is_indexable(Path::new("server.go")));
222        assert!(!is_indexable(Path::new("readme.md")));
223        assert!(!is_indexable(Path::new("image.png")));
224        assert!(!is_indexable(Path::new("data.json")));
225    }
226
227    #[test]
228    fn tracker_detects_new_files() {
229        let dir = std::env::temp_dir().join("lean_ctx_watcher_test_new");
230        let _ = fs::remove_dir_all(&dir);
231        fs::create_dir_all(&dir).unwrap();
232        fs::write(dir.join("test.rs"), "fn main() {}").unwrap();
233
234        let mut tracker = FileTracker::new(&dir);
235        let result = tracker.scan();
236        assert!(result.added.len() >= 1, "should detect new file");
237        assert!(result.modified.is_empty());
238        assert!(result.removed.is_empty());
239        assert!(result.has_changes());
240
241        let _ = fs::remove_dir_all(&dir);
242    }
243
244    #[test]
245    fn tracker_detects_no_changes_on_rescan() {
246        let dir = std::env::temp_dir().join("lean_ctx_watcher_test_stable");
247        let _ = fs::remove_dir_all(&dir);
248        fs::create_dir_all(&dir).unwrap();
249        fs::write(dir.join("stable.rs"), "fn main() {}").unwrap();
250
251        let mut tracker = FileTracker::new(&dir);
252        let _ = tracker.scan();
253
254        let result = tracker.scan();
255        assert!(result.added.is_empty());
256        assert!(result.modified.is_empty());
257        assert!(result.removed.is_empty());
258        assert!(!result.has_changes());
259
260        let _ = fs::remove_dir_all(&dir);
261    }
262
263    #[test]
264    fn tracker_detects_removed_files() {
265        let dir = std::env::temp_dir().join("lean_ctx_watcher_test_rm");
266        let _ = fs::remove_dir_all(&dir);
267        fs::create_dir_all(&dir).unwrap();
268        let file = dir.join("temp.rs");
269        fs::write(&file, "fn main() {}").unwrap();
270
271        let mut tracker = FileTracker::new(&dir);
272        let _ = tracker.scan();
273
274        fs::remove_file(&file).unwrap();
275        let result = tracker.scan();
276        assert!(!result.removed.is_empty(), "should detect removed file");
277
278        let _ = fs::remove_dir_all(&dir);
279    }
280
281    #[test]
282    fn scan_result_methods() {
283        let result = ScanResult {
284            added: vec![PathBuf::from("a.rs")],
285            modified: vec![PathBuf::from("b.rs")],
286            removed: vec![PathBuf::from("c.rs")],
287        };
288        assert!(result.has_changes());
289        assert_eq!(result.total_changes(), 3);
290        assert_eq!(result.changed_files().len(), 2);
291    }
292
293    #[test]
294    fn empty_scan_result() {
295        let result = ScanResult {
296            added: vec![],
297            modified: vec![],
298            removed: vec![],
299        };
300        assert!(!result.has_changes());
301        assert_eq!(result.total_changes(), 0);
302    }
303}