1use std::cmp::Reverse;
23use std::collections::BinaryHeap;
24use std::collections::HashMap;
25use std::path::{Path, PathBuf};
26use std::sync::Arc;
27
28use arc_swap::ArcSwap;
29use ignore::WalkBuilder;
30use ignore::gitignore::GitignoreBuilder;
31
32use nucleo_matcher::pattern::{CaseMatching, Normalization, Pattern};
33use nucleo_matcher::{Config, Matcher, Utf32String};
34use notify::Watcher;
35
36
37pub struct FileEntry {
40 pub path: PathBuf,
42 utf32: Utf32String,
44}
45
46pub struct FileIndex {
47 files: Vec<FileEntry>,
48 trigrams: HashMap<[u8; 3], Vec<usize>>,
51}
52
53impl std::fmt::Debug for FileIndex {
54 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55 write!(f, "FileIndex({} files)", self.files.len())
56 }
57}
58
59impl FileIndex {
60 pub fn build_with_max_depth(root: &Path, max_depth: Option<usize>) -> Self {
64 let mut files = Vec::with_capacity(4096);
65 let mut trigrams: HashMap<[u8; 3], Vec<usize>> = HashMap::new();
66
67 let mut builder = WalkBuilder::new(root);
68 builder
69 .hidden(false) .git_ignore(true) .git_exclude(true)
72 .parents(true);
73
74 if let Some(d) = max_depth {
75 builder.max_depth(Some(d));
76 }
77
78 for result in builder.build() {
79 let Ok(entry) = result else { continue };
80 let Some(ft) = entry.file_type() else {
81 continue;
82 };
83 if !ft.is_file() {
84 continue;
85 }
86
87 let path = entry.path();
88 let rel = path.strip_prefix(root).unwrap_or(path);
89
90 if rel.components().any(|c| {
93 c.as_os_str()
94 .to_str()
95 .map(|s| matches!(s, "target" | "node_modules" | "__pycache__"))
96 .unwrap_or(false)
97 }) {
98 continue;
99 }
100
101 let s = rel.to_string_lossy().replace('\\', "/");
102 let idx = files.len();
103 index_trigrams(s.as_bytes(), idx, &mut trigrams);
104 files.push(FileEntry {
105 path: rel.to_path_buf(),
106 utf32: Utf32String::from(s.as_str()),
107 });
108 }
109
110 FileIndex { files, trigrams }
111 }
112
113 pub fn build(root: &Path) -> Self {
115 Self::build_with_max_depth(root, None)
116 }
117
118 #[allow(dead_code)]
121 pub fn from_paths(paths: Vec<PathBuf>) -> Self {
122 let mut files = Vec::with_capacity(paths.len());
123 let mut trigrams: HashMap<[u8; 3], Vec<usize>> = HashMap::new();
124
125 for path in paths {
126 let s = path.to_string_lossy().replace('\\', "/");
127 let idx = files.len();
128 index_trigrams(s.as_bytes(), idx, &mut trigrams);
129 files.push(FileEntry {
130 utf32: Utf32String::from(s.as_str()),
131 path,
132 });
133 }
134
135 FileIndex { files, trigrams }
136 }
137
138 #[allow(dead_code)]
139 pub fn files(&self) -> &[FileEntry] {
140 &self.files
141 }
142
143 fn trigram_candidate_indices(&self, query: &str) -> Option<Vec<usize>> {
149 let q: Vec<u8> = query.bytes().filter(|&b| b != b' ').collect();
153 if q.len() < 3 {
154 return None;
155 }
156
157 let first = [
158 q[0].to_ascii_lowercase(),
159 q[1].to_ascii_lowercase(),
160 q[2].to_ascii_lowercase(),
161 ];
162
163 let a = self.trigrams.get(&first)?;
164
165 if q.len() < 6 {
167 return Some(a.clone());
168 }
169
170 let last = [
171 q[q.len() - 3].to_ascii_lowercase(),
172 q[q.len() - 2].to_ascii_lowercase(),
173 q[q.len() - 1].to_ascii_lowercase(),
174 ];
175
176 let b = match self.trigrams.get(&last) {
177 Some(v) => v,
178 None => return Some(vec![]),
179 };
180
181 let mut out = Vec::with_capacity(a.len().min(b.len()));
183 let set: std::collections::HashSet<_> = b.iter().copied().collect();
184
185 for &idx in a {
186 if set.contains(&idx) {
187 out.push(idx);
188 }
189 }
190
191 Some(out)
192 }
193}
194
195fn index_trigrams(bytes: &[u8], idx: usize, map: &mut HashMap<[u8; 3], Vec<usize>>) {
197 use std::collections::HashSet;
198 let mut seen = HashSet::new();
199 for tri in bytes.windows(3) {
200 let key = [
201 tri[0].to_ascii_lowercase(),
202 tri[1].to_ascii_lowercase(),
203 tri[2].to_ascii_lowercase(),
204 ];
205 if seen.insert(key) {
206 map.entry(key).or_default().push(idx);
207 }
208 }
209}
210
211pub type SharedFileIndex = Arc<ArcSwap<Option<FileIndex>>>;
218
219pub fn spawn_indexer(root: PathBuf) -> SharedFileIndex {
224 let shared: SharedFileIndex = Arc::new(ArcSwap::from_pointee(None));
225 let out = shared.clone();
226 tokio::task::spawn_blocking(move || {
227 let idx = FileIndex::build(&root);
228 log::info!("file index built ({} files)", idx.files.len());
229 out.store(Arc::new(Some(idx)));
230 });
231 shared
232}
233
234fn should_trigger_notify_event(kind: ¬ify::EventKind) -> bool {
235 match kind {
236 notify::EventKind::Create(_) => true,
237 notify::EventKind::Remove(_) => true,
238 notify::EventKind::Modify(mod_kind) => {
239 matches!(mod_kind, notify::event::ModifyKind::Name(_))
241 }
242 _ => false,
243 }
244}
245
246fn is_path_relevant(root: &Path, path: &Path) -> bool {
253 if let Ok(_rel) = path.strip_prefix(root) {
256 let mut gbuilder = GitignoreBuilder::new(root);
261
262 let mut walker = WalkBuilder::new(root);
263 walker.hidden(false).git_ignore(false).git_exclude(false);
264
265 for result in walker.build() {
266 if let Ok(entry) = result
267 && let Some(name_os) = entry.path().file_name()
268 && let Some(name) = name_os.to_str()
269 && name == ".gitignore" {
270 let _ = gbuilder.add(entry.path());
271 }
272 }
273
274 let git_info_exclude = root.join(".git").join("info").join("exclude");
276 if git_info_exclude.is_file() {
277 let _ = gbuilder.add(git_info_exclude);
278 }
279
280 let gi = match gbuilder.build() {
281 Ok(g) => g,
282 Err(_) => ignore::gitignore::Gitignore::empty(),
283 };
284
285 let is_dir = match path.metadata() {
286 Ok(md) => md.is_dir(),
287 Err(_) => path.extension().is_none(),
288 };
289
290 !gi.matched(path, is_dir).is_ignore()
291 } else {
292 true
293 }
294}
295
296fn is_event_relevant(root: &Path, event: ¬ify::Event) -> bool {
298 event.paths.iter().any(|p| is_path_relevant(root, p))
299}
300
301pub fn spawn_watcher(root: PathBuf, index: SharedFileIndex) {
309 let (trigger_tx, mut trigger_rx) = tokio::sync::mpsc::unbounded_channel::<()>();
310
311 {
316 let root = root.clone();
317 let trigger_tx_clone = trigger_tx.clone();
318
319 std::thread::spawn(move || {
320 let watcher_root = root.clone();
325 let mut watcher = match notify::RecommendedWatcher::new(
326 move |res: Result<notify::Event, notify::Error>| {
327 match res {
328 Ok(event) => {
329 if !is_event_relevant(&watcher_root, &event) {
333 return;
334 }
335
336 if should_trigger_notify_event(&event.kind) {
337 let _ = trigger_tx_clone.send(());
339 }
340 }
341 Err(e) => log::warn!("file watcher error: {e}"),
342 }
343 },
344 notify::Config::default(),
345 ) {
346 Ok(w) => w,
347 Err(e) => {
348 log::warn!("file watcher: failed to create watcher: {e}");
349 return;
350 }
351 };
352
353 if let Err(e) = watcher.watch(&root, notify::RecursiveMode::Recursive) {
354 log::warn!("file watcher: failed to watch {root:?}: {e}");
355 return;
356 }
357
358 let (_tx_keepalive, rx_keepalive) = std::sync::mpsc::channel::<()>();
361 let _ = rx_keepalive.recv();
362 });
363 }
364
365 tokio::spawn(async move {
370 let mut rebuild: Option<tokio::task::JoinHandle<()>> = None;
371 while trigger_rx.recv().await.is_some() {
372 while trigger_rx.try_recv().is_ok() {}
374
375 tokio::time::sleep(std::time::Duration::from_millis(200)).await;
378
379 while trigger_rx.try_recv().is_ok() {}
381
382 if let Some(h) = rebuild.take() {
385 h.abort();
386 }
387 let idx = index.clone();
388 let r = root.clone();
389 rebuild = Some(tokio::task::spawn_blocking(move || {
390 idx.store(Arc::new(Some(FileIndex::build(&r))));
391 }));
392 }
393 });
394}
395
396pub struct NucleoSearch {
403 matcher: Matcher,
404}
405
406impl std::fmt::Debug for NucleoSearch {
407 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
408 f.write_str("NucleoSearch")
409 }
410}
411
412impl Default for NucleoSearch {
413 fn default() -> Self {
414 Self::new()
415 }
416}
417
418impl NucleoSearch {
419 pub fn new() -> Self {
420 Self {
421 matcher: Matcher::new(Config::DEFAULT),
422 }
423 }
424
425 pub fn search_top<'a>(
432 &mut self,
433 index: &'a FileIndex,
434 query: &str,
435 max: usize,
436 ) -> Vec<&'a FileEntry> {
437 if query.is_empty() {
438 return index.files.iter().take(max).collect();
439 }
440
441 let pattern = Pattern::parse(query, CaseMatching::Ignore, Normalization::Smart);
442
443 let mut heap: BinaryHeap<(Reverse<u32>, usize)> = BinaryHeap::with_capacity(max);
444
445 let mut process_candidate = |fi: usize| {
447 let entry = &index.files[fi];
448
449 let Some(score) = pattern.score(entry.utf32.slice(..), &mut self.matcher) else {
450 return;
451 };
452
453 if heap.len() < max {
454 heap.push((Reverse(score), fi));
455 } else if let Some(&(Reverse(min_score), _)) = heap.peek()
456 && score > min_score {
457 heap.pop();
458 heap.push((Reverse(score), fi));
459 }
460 };
461
462 if let Some(indices) = index.trigram_candidate_indices(query) {
464 for fi in indices {
465 process_candidate(fi);
466 }
467 } else {
468 for fi in 0..index.files.len() {
469 process_candidate(fi);
470 }
471 }
472
473 let mut results: Vec<(u32, usize)> = heap
475 .into_iter()
476 .map(|(Reverse(score), idx)| (score, idx))
477 .collect();
478
479 results.sort_unstable_by(|a, b| b.0.cmp(&a.0));
480
481 results
482 .into_iter()
483 .map(|(_, idx)| &index.files[idx])
484 .collect()
485 }
486
487 #[allow(dead_code)]
489 pub fn search<'a>(&mut self, index: &'a FileIndex, query: &str) -> Vec<&'a FileEntry> {
490 self.search_top(index, query, usize::MAX)
491 }
492}
493
494#[cfg(test)]
495mod tests {
496 use super::*;
497 use notify::event::{CreateKind, DataChange, ModifyKind, RenameMode};
498 use notify::Event;
499 use std::path::PathBuf;
500 use tempfile::tempdir;
501 use std::fs::{create_dir_all, write};
502
503 #[test]
504 fn should_trigger_on_create() {
505 let ev = Event { kind: notify::EventKind::Create(CreateKind::File), paths: vec![PathBuf::from("a")], attrs: Default::default() };
506 assert!(should_trigger_notify_event(&ev.kind));
507 }
508
509 #[test]
510 fn should_ignore_modify_data() {
511 let ev = Event { kind: notify::EventKind::Modify(ModifyKind::Data(DataChange::Content)), paths: vec![PathBuf::from("a")], attrs: Default::default() };
512 assert!(!should_trigger_notify_event(&ev.kind));
513 }
514
515 #[test]
516 fn should_trigger_rename() {
517 let ev = Event { kind: notify::EventKind::Modify(ModifyKind::Name(RenameMode::Both)), paths: vec![PathBuf::from("a")], attrs: Default::default() };
518 assert!(should_trigger_notify_event(&ev.kind));
519 }
520
521 #[test]
522 fn shallow_build_limits_depth() {
523 let td = tempdir().unwrap();
524 let root = td.path();
525 create_dir_all(root.join("a/b")).unwrap();
526 write(root.join("file1.txt"), b"").unwrap();
527 write(root.join("a").join("file2.txt"), b"").unwrap();
528 write(root.join("a").join("b").join("file3.txt"), b"").unwrap();
529
530 let idx_full = FileIndex::build_with_max_depth(root, None);
531 let paths_full: Vec<String> = idx_full.files.iter().map(|e| e.path.to_string_lossy().replace('\\', "/").to_string()).collect();
532 assert!(paths_full.iter().any(|p| p == "file1.txt"));
533 assert!(paths_full.iter().any(|p| p == "a/file2.txt"));
534 assert!(paths_full.iter().any(|p| p == "a/b/file3.txt"));
535
536 let idx_shallow = FileIndex::build_with_max_depth(root, Some(1));
537 let paths_shallow: Vec<String> = idx_shallow.files.iter().map(|e| e.path.to_string_lossy().replace('\\', "/").to_string()).collect();
538 assert!(paths_shallow.iter().any(|p| p == "file1.txt"));
539 assert!(!paths_shallow.iter().any(|p| p == "a/file2.txt"));
540 assert!(!paths_shallow.iter().any(|p| p == "a/b/file3.txt"));
541 }
542
543 #[test]
544 fn walkbuilder_respects_gitignore() {
545 let td = tempdir().unwrap();
546 let root = td.path();
547 write(root.join(".gitignore"), b"ignored.txt\n").unwrap();
549 write(root.join("ignored.txt"), b"").unwrap();
550 write(root.join("not_ignored.txt"), b"").unwrap();
551
552 let ev_ignored = Event { kind: notify::EventKind::Create(CreateKind::File), paths: vec![root.join("ignored.txt")], attrs: Default::default() };
554 assert!(!is_event_relevant(root, &ev_ignored));
555
556 let ev_not = Event { kind: notify::EventKind::Create(CreateKind::File), paths: vec![root.join("not_ignored.txt")], attrs: Default::default() };
558 assert!(is_event_relevant(root, &ev_not));
559 }
560}