1use crate::FFFStringStorage;
34use crate::background_watcher::BackgroundWatcher;
35use crate::bigram_filter::{BigramFilter, BigramIndexBuilder, BigramOverlay};
36use crate::error::Error;
37use crate::frecency::FrecencyTracker;
38use crate::git::GitStatusCache;
39use crate::grep::{GrepResult, GrepSearchOptions, grep_search, multi_grep_search};
40use crate::ignore::non_git_repo_overrides;
41use crate::query_tracker::QueryTracker;
42use crate::score::fuzzy_match_and_score_files;
43use crate::shared::{SharedFrecency, SharedPicker};
44use crate::simd_path::ArenaPtr;
45use crate::types::{
46 ContentCacheBudget, DirItem, DirSearchResult, FileItem, MixedItemRef, MixedSearchResult,
47 PaginationArgs, Score, ScoringContext, SearchResult,
48};
49use fff_query_parser::FFFQuery;
50use git2::{Repository, Status, StatusOptions};
51use rayon::prelude::*;
52use std::fmt::Debug;
53use std::path::{Path, PathBuf};
54use std::sync::{
55 Arc, LazyLock,
56 atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
57};
58use std::time::SystemTime;
59use tracing::{Level, debug, error, info, warn};
60
61static BACKGROUND_THREAD_POOL: LazyLock<rayon::ThreadPool> = LazyLock::new(|| {
65 let total = std::thread::available_parallelism()
66 .map(|p| p.get())
67 .unwrap_or(4);
68 let bg_threads = total.saturating_sub(2).max(1);
69 info!(
70 "Background pool: {} threads (system has {})",
71 bg_threads, total
72 );
73 rayon::ThreadPoolBuilder::new()
74 .num_threads(bg_threads)
75 .thread_name(|i| format!("fff-bg-{i}"))
76 .build()
77 .expect("failed to create background rayon pool")
78});
79
80#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
81pub enum FFFMode {
82 #[default]
83 Neovim,
84 Ai,
85}
86
87impl FFFMode {
88 pub fn is_ai(self) -> bool {
89 self == FFFMode::Ai
90 }
91}
92
93#[derive(Debug, Clone, Copy, Default)]
98pub struct FuzzySearchOptions<'a> {
99 pub max_threads: usize,
100 pub current_file: Option<&'a str>,
101 pub project_path: Option<&'a Path>,
102 pub combo_boost_score_multiplier: i32,
103 pub min_combo_count: u32,
104 pub pagination: PaginationArgs,
105}
106
107#[derive(Debug, Clone)]
108struct FileSync {
109 git_workdir: Option<PathBuf>,
110 files: Vec<FileItem>,
114 base_count: usize,
118 dirs: Vec<DirItem>,
123 overflow_builder: Option<crate::simd_path::ChunkedPathStoreBuilder>,
127 bigram_index: Option<Arc<BigramFilter>>,
131 bigram_overlay: Option<Arc<parking_lot::RwLock<BigramOverlay>>>,
133 chunked_paths: Option<crate::simd_path::ChunkedPathStore>,
137}
138
139impl FileSync {
140 fn new() -> Self {
141 Self {
142 files: Vec::new(),
143 base_count: 0,
144 dirs: Vec::new(),
145 overflow_builder: None,
146 git_workdir: None,
147 bigram_index: None,
148 bigram_overlay: None,
149 chunked_paths: None,
150 }
151 }
152
153 #[inline]
155 fn arena_base_ptr(&self) -> ArenaPtr {
156 self.chunked_paths
157 .as_ref()
158 .map(|s| s.as_arena_ptr())
159 .unwrap_or(ArenaPtr::null())
160 }
161
162 #[inline]
164 fn overflow_arena_ptr(&self) -> ArenaPtr {
165 self.overflow_builder
166 .as_ref()
167 .map(|b| b.as_arena_ptr())
168 .unwrap_or(self.arena_base_ptr())
169 }
170
171 #[inline]
173 fn arena_for_file(&self, file: &FileItem) -> ArenaPtr {
174 if file.is_overflow() {
175 self.overflow_arena_ptr()
176 } else {
177 self.arena_base_ptr()
178 }
179 }
180
181 #[inline]
184 fn files(&self) -> &[FileItem] {
185 &self.files
186 }
187
188 #[inline]
190 fn overflow_files(&self) -> &[FileItem] {
191 &self.files[self.base_count..]
192 }
193
194 #[inline]
196 fn get_file_mut(&mut self, index: usize) -> Option<&mut FileItem> {
197 self.files.get_mut(index)
198 }
199
200 #[inline]
203 fn find_file_index(&self, path: &Path, base_path: &Path) -> Result<usize, usize> {
204 let arena = self.arena_base_ptr();
205
206 let rel_path = match path.strip_prefix(base_path) {
208 Ok(r) => r.to_string_lossy(),
209 Err(_) => return Err(0),
210 };
211
212 let parent_end = rel_path
214 .rfind(std::path::is_separator)
215 .map(|i| i + 1)
216 .unwrap_or(0);
217 let dir_rel = &rel_path[..parent_end];
218 let filename = &rel_path[parent_end..];
219
220 let mut dir_buf = [0u8; crate::simd_path::PATH_BUF_SIZE];
223 let dir_idx = match self
224 .dirs
225 .binary_search_by(|d| d.read_relative_path(arena, &mut dir_buf).cmp(dir_rel))
226 {
227 Ok(idx) => idx as u32,
228 Err(_) => return Err(0), };
230
231 self.files[..self.base_count].binary_search_by(|f| {
233 f.parent_dir_index().cmp(&dir_idx).then_with(|| {
234 let fname = f.file_name(arena);
235 fname.as_str().cmp(filename)
236 })
237 })
238 }
239
240 fn find_overflow_index(&self, rel_path: &str) -> Option<usize> {
243 let overflow_arena = self.overflow_arena_ptr();
244 self.files[self.base_count..]
245 .iter()
246 .position(|f| f.relative_path_eq(overflow_arena, rel_path))
247 .map(|pos| self.base_count + pos)
248 }
249
250 fn insert_file(&mut self, position: usize, file: FileItem) {
252 self.files.insert(position, file);
253 }
254
255 fn retain_files_with_arena<F>(&mut self, mut predicate: F) -> usize
256 where
257 F: FnMut(&FileItem, ArenaPtr) -> bool,
258 {
259 let base_arena = self.arena_base_ptr();
260 let overflow_arena = self.overflow_arena_ptr();
261
262 let base_count = self.base_count;
263 let initial_len = self.files.len();
264 let base_retained = self.files[..base_count]
265 .iter()
266 .filter(|f| predicate(f, base_arena))
267 .count();
268
269 self.files.retain(|f| {
270 predicate(
271 f,
272 if f.is_overflow() {
273 overflow_arena
274 } else {
275 base_arena
276 },
277 )
278 });
279
280 self.base_count = base_retained;
281 initial_len - self.files.len()
282 }
283
284 fn insert_file_sorted(&mut self, file: FileItem, base_path: &Path) -> bool {
287 let arena = self.arena_base_ptr();
288 let abs_path = file.absolute_path(arena, base_path);
289 match self.find_file_index(&abs_path, base_path) {
290 Ok(_) => false, Err(position) => {
292 self.insert_file(position, file);
293 true
294 }
295 }
296 }
297}
298
299impl FileItem {
300 pub fn new(path: PathBuf, base_path: &Path, git_status: Option<Status>) -> (Self, String) {
301 let metadata = std::fs::metadata(&path).ok();
302 Self::new_with_metadata(path, base_path, git_status, metadata.as_ref())
303 }
304
305 fn new_with_metadata(
309 path: PathBuf,
310 base_path: &Path,
311 git_status: Option<Status>,
312 metadata: Option<&std::fs::Metadata>,
313 ) -> (Self, String) {
314 let path_buf = pathdiff::diff_paths(&path, base_path).unwrap_or_else(|| path.clone());
315 let relative_path = path_buf.to_string_lossy().into_owned();
316
317 let (size, modified) = match metadata {
318 Some(metadata) => {
319 let size = metadata.len();
320 let modified = metadata
321 .modified()
322 .ok()
323 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
324 .map_or(0, |d| d.as_secs());
325
326 (size, modified)
327 }
328 None => (0, 0),
329 };
330
331 let is_binary = is_known_binary_extension(&path);
332
333 let filename_start = relative_path
334 .rfind(std::path::is_separator)
335 .map(|i| i + 1)
336 .unwrap_or(0) as u16;
337
338 let item = Self::new_raw(filename_start, size, modified, git_status, is_binary);
339 (item, relative_path)
340 }
341
342 pub fn new_from_walk(
348 path: &Path,
349 base_path: &Path,
350 git_status: Option<Status>,
351 metadata: Option<&std::fs::Metadata>,
352 ) -> (Self, String) {
353 let (size, modified) = match metadata {
354 Some(metadata) => {
355 let size = metadata.len();
356 let modified = metadata
357 .modified()
358 .ok()
359 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
360 .map_or(0, |d| d.as_secs());
361 (size, modified)
362 }
363 None => (0, 0),
364 };
365
366 let is_binary = is_known_binary_extension(path);
367
368 let rel = pathdiff::diff_paths(path, base_path).unwrap_or_else(|| path.to_path_buf());
369 let rel_str = rel.to_string_lossy().into_owned();
370 let fname_offset = rel_str
371 .rfind(std::path::is_separator)
372 .map(|i| i + 1)
373 .unwrap_or(0) as u16;
374
375 let item = Self::new_raw(fname_offset, size, modified, git_status, is_binary);
376 (item, rel_str)
377 }
378
379 pub(crate) fn update_frecency_scores(
380 &mut self,
381 tracker: &FrecencyTracker,
382 arena: ArenaPtr,
383 base_path: &Path,
384 mode: FFFMode,
385 ) -> Result<(), Error> {
386 let mut abs_buf = [0u8; crate::simd_path::PATH_BUF_SIZE];
387 let abs = self.write_absolute_path(arena, base_path, &mut abs_buf);
388 self.access_frecency_score = tracker.get_access_score(abs, mode) as i16;
389 self.modification_frecency_score =
390 tracker.get_modification_score(self.modified, self.git_status, mode) as i16;
391
392 Ok(())
393 }
394}
395
396pub struct FilePickerOptions {
398 pub base_path: String,
399 pub enable_mmap_cache: bool,
401 pub enable_content_indexing: bool,
403 pub mode: FFFMode,
405 pub cache_budget: Option<ContentCacheBudget>,
408 pub watch: bool,
410}
411
412impl Default for FilePickerOptions {
413 fn default() -> Self {
414 Self {
415 base_path: ".".into(),
416 enable_mmap_cache: false,
417 enable_content_indexing: false,
418 mode: FFFMode::default(),
419 cache_budget: None,
420 watch: true,
421 }
422 }
423}
424
425pub struct FilePicker {
426 pub mode: FFFMode,
427 pub base_path: PathBuf,
428 pub is_scanning: Arc<AtomicBool>,
429 sync_data: FileSync,
430 cache_budget: Arc<ContentCacheBudget>,
431 has_explicit_cache_budget: bool,
432 watcher_ready: Arc<AtomicBool>,
433 scanned_files_count: Arc<AtomicUsize>,
434 background_watcher: Option<BackgroundWatcher>,
435 enable_mmap_cache: bool,
436 enable_content_indexing: bool,
437 watch: bool,
438 cancelled: Arc<AtomicBool>,
439 post_scan_busy: Arc<AtomicBool>,
448}
449
450impl std::fmt::Debug for FilePicker {
451 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
452 f.debug_struct("FilePicker")
453 .field("base_path", &self.base_path)
454 .field("sync_data", &self.sync_data)
455 .field("is_scanning", &self.is_scanning.load(Ordering::Relaxed))
456 .field(
457 "scanned_files_count",
458 &self.scanned_files_count.load(Ordering::Relaxed),
459 )
460 .finish_non_exhaustive()
461 }
462}
463
464impl FFFStringStorage for &FilePicker {
465 #[inline]
466 fn arena_for(&self, file: &FileItem) -> crate::simd_path::ArenaPtr {
467 self.sync_data.arena_for_file(file)
468 }
469
470 #[inline]
471 fn base_arena(&self) -> crate::simd_path::ArenaPtr {
472 self.sync_data.arena_base_ptr()
473 }
474
475 #[inline]
476 fn overflow_arena(&self) -> crate::simd_path::ArenaPtr {
477 self.sync_data.overflow_arena_ptr()
478 }
479}
480
481impl FilePicker {
482 pub fn base_path(&self) -> &Path {
483 &self.base_path
484 }
485
486 fn to_relative_path<'a>(&self, path: &'a Path) -> Option<&'a str> {
489 path.strip_prefix(&self.base_path)
490 .ok()
491 .and_then(|p| p.to_str())
492 }
493
494 pub fn need_enable_mmap_cache(&self) -> bool {
495 self.enable_mmap_cache
496 }
497
498 pub fn need_enable_content_indexing(&self) -> bool {
499 self.enable_content_indexing
500 }
501
502 pub fn need_watch(&self) -> bool {
503 self.watch
504 }
505
506 pub fn mode(&self) -> FFFMode {
507 self.mode
508 }
509
510 pub fn cache_budget(&self) -> &ContentCacheBudget {
511 &self.cache_budget
512 }
513
514 pub fn bigram_index(&self) -> Option<&BigramFilter> {
515 self.sync_data.bigram_index.as_deref()
516 }
517
518 pub fn bigram_overlay(&self) -> Option<&parking_lot::RwLock<BigramOverlay>> {
519 self.sync_data.bigram_overlay.as_deref()
520 }
521
522 pub fn get_file_mut(&mut self, index: usize) -> Option<&mut FileItem> {
523 self.sync_data.get_file_mut(index)
524 }
525
526 pub fn set_bigram_index(&mut self, index: BigramFilter, overlay: BigramOverlay) {
527 self.sync_data.bigram_index = Some(Arc::new(index));
528 self.sync_data.bigram_overlay = Some(Arc::new(parking_lot::RwLock::new(overlay)));
529 }
530
531 pub fn git_root(&self) -> Option<&Path> {
532 self.sync_data.git_workdir.as_deref()
533 }
534
535 pub fn get_files(&self) -> &[FileItem] {
539 self.sync_data.files()
540 }
541
542 pub fn get_overflow_files(&self) -> &[FileItem] {
543 self.sync_data.overflow_files()
544 }
545
546 pub fn get_dirs(&self) -> &[DirItem] {
548 &self.sync_data.dirs
549 }
550
551 pub fn arena_bytes(&self) -> (usize, usize, usize) {
554 let chunked = self
555 .sync_data
556 .chunked_paths
557 .as_ref()
558 .map_or(0, |s| s.heap_bytes());
559 (chunked, 0, 0)
560 }
561
562 #[tracing::instrument(level = "debug", skip(self))]
566 pub fn extract_watch_dirs(&self) -> Vec<PathBuf> {
567 let dir_table = &self.sync_data.dirs;
568
569 if !dir_table.is_empty() {
570 let base = self.base_path.as_path();
575 let arena = self.arena_base_ptr();
576 let mut all_dirs = Vec::with_capacity(dir_table.len() * 2);
577 let mut seen = std::collections::HashSet::with_capacity(dir_table.len() * 2);
578
579 for dir_item in dir_table {
580 let mut current = dir_item.absolute_path(arena, base);
581 while current.as_path() != base {
582 if !seen.insert(current.clone()) {
583 break; }
585 all_dirs.push(current.clone());
586 if !current.pop() {
587 break;
588 }
589 }
590 }
591
592 return all_dirs;
593 }
594
595 let files = self.sync_data.files();
597 let base = self.base_path.as_path();
598 let arena = self.arena_base_ptr();
599 let mut dirs = Vec::with_capacity(files.len() / 4);
600 let mut current = self.base_path.clone();
601
602 for file in files {
603 let abs = file.absolute_path(arena, base);
604 let Some(parent) = abs.parent() else {
605 continue;
606 };
607 if parent == current.as_path() {
608 continue;
609 }
610
611 while current.as_path() != base && !parent.starts_with(¤t) {
612 current.pop();
613 }
614
615 let Ok(remainder) = parent.strip_prefix(¤t) else {
616 continue;
617 };
618 for component in remainder.components() {
619 current.push(component);
620 dirs.push(current.clone());
621 }
622 }
623
624 dirs
625 }
626
627 pub fn new(options: FilePickerOptions) -> Result<Self, Error> {
631 let path = PathBuf::from(&options.base_path);
632 if !path.exists() {
633 error!("Base path does not exist: {}", options.base_path);
634 return Err(Error::InvalidPath(path));
635 }
636 if path.parent().is_none() {
637 error!("Refusing to index filesystem root: {}", path.display());
638 return Err(Error::FilesystemRoot(path));
639 }
640
641 let has_explicit_budget = options.cache_budget.is_some();
642 let initial_budget = options.cache_budget.unwrap_or_default();
643
644 Ok(FilePicker {
645 background_watcher: None,
646 base_path: path,
647 cache_budget: Arc::new(initial_budget),
648 cancelled: Arc::new(AtomicBool::new(false)),
649 has_explicit_cache_budget: has_explicit_budget,
650 is_scanning: Arc::new(AtomicBool::new(false)),
651 mode: options.mode,
652 post_scan_busy: Arc::new(AtomicBool::new(false)),
653 scanned_files_count: Arc::new(AtomicUsize::new(0)),
654 sync_data: FileSync::new(),
655 enable_mmap_cache: options.enable_mmap_cache,
656 enable_content_indexing: options.enable_content_indexing,
657 watch: options.watch,
658 watcher_ready: Arc::new(AtomicBool::new(false)),
659 })
660 }
661
662 pub fn new_with_shared_state(
665 shared_picker: SharedPicker,
666 shared_frecency: SharedFrecency,
667 options: FilePickerOptions,
668 ) -> Result<(), Error> {
669 let picker = Self::new(options)?;
670
671 info!(
672 "Spawning background threads: base_path={}, warmup={}, content_indexing={}, mode={:?}",
673 picker.base_path.display(),
674 picker.enable_mmap_cache,
675 picker.enable_content_indexing,
676 picker.mode,
677 );
678
679 let warmup = picker.enable_mmap_cache;
680 let content_indexing = picker.enable_content_indexing;
681 let watch = picker.watch;
682 let mode = picker.mode;
683
684 picker.is_scanning.store(true, Ordering::Release);
685
686 let scan_signal = Arc::clone(&picker.is_scanning);
687 let watcher_ready = Arc::clone(&picker.watcher_ready);
688 let synced_files_count = Arc::clone(&picker.scanned_files_count);
689 let cancelled = Arc::clone(&picker.cancelled);
690 let post_scan_busy = Arc::clone(&picker.post_scan_busy);
691 let path = picker.base_path.clone();
692
693 {
694 let mut guard = shared_picker.write()?;
695 *guard = Some(picker);
696 }
697
698 spawn_scan_and_watcher(
699 path,
700 scan_signal,
701 watcher_ready,
702 synced_files_count,
703 warmup,
704 content_indexing,
705 watch,
706 mode,
707 shared_picker,
708 shared_frecency,
709 cancelled,
710 post_scan_busy,
711 );
712
713 Ok(())
714 }
715
716 pub fn collect_files(&mut self) -> Result<(), Error> {
725 self.is_scanning.store(true, Ordering::Relaxed);
726 self.scanned_files_count.store(0, Ordering::Relaxed);
727
728 let empty_frecency = SharedFrecency::default();
729 let walk = walk_filesystem(
730 &self.base_path,
731 &self.scanned_files_count,
732 &empty_frecency,
733 self.mode,
734 )?;
735
736 self.sync_data = walk.sync;
737
738 if !self.has_explicit_cache_budget {
741 let file_count = self.sync_data.files().len();
742 self.cache_budget = Arc::new(ContentCacheBudget::new_for_repo(file_count));
743 } else {
744 self.cache_budget.reset();
745 }
746
747 if let Ok(Some(git_cache)) = walk.git_handle.join() {
749 let arena = self.arena_base_ptr();
750 for file in self.sync_data.files.iter_mut() {
751 file.git_status =
752 git_cache.lookup_status(&file.absolute_path(arena, &self.base_path));
753 }
754 }
755
756 self.is_scanning.store(false, Ordering::Relaxed);
757 Ok(())
758 }
759
760 pub fn spawn_background_watcher(
766 &mut self,
767 shared_picker: &SharedPicker,
768 shared_frecency: &SharedFrecency,
769 ) -> Result<(), Error> {
770 let git_workdir = self.sync_data.git_workdir.clone();
771 let watch_dirs = self.extract_watch_dirs();
772 let watcher = BackgroundWatcher::new(
773 self.base_path.clone(),
774 git_workdir,
775 shared_picker.clone(),
776 shared_frecency.clone(),
777 self.mode,
778 watch_dirs,
779 )?;
780 self.background_watcher = Some(watcher);
781 self.watcher_ready.store(true, Ordering::Release);
782 Ok(())
783 }
784
785 pub fn fuzzy_search<'q>(
793 &self,
794 query: &'q FFFQuery<'q>,
795 query_tracker: Option<&QueryTracker>,
796 options: FuzzySearchOptions<'q>,
797 ) -> SearchResult<'_> {
798 let files = self.get_files();
799 let max_threads = if options.max_threads == 0 {
800 std::thread::available_parallelism()
801 .map(|n| n.get())
802 .unwrap_or(4)
803 } else {
804 options.max_threads
805 };
806
807 debug!(
808 raw_query = ?query.raw_query,
809 pagination = ?options.pagination,
810 ?max_threads,
811 current_file = ?options.current_file,
812 "Fuzzy search",
813 );
814
815 let total_files = files.len();
816 let location = query.location;
817
818 let effective_query = match &query.fuzzy_query {
820 fff_query_parser::FuzzyQuery::Text(t) => *t,
821 fff_query_parser::FuzzyQuery::Parts(parts) if !parts.is_empty() => parts[0],
822 _ => query.raw_query.trim(),
823 };
824
825 let max_typos = (effective_query.len() as u16 / 4).clamp(2, 6);
827 let last_same_query_entry =
829 query_tracker
830 .zip(options.project_path)
831 .and_then(|(tracker, project_path)| {
832 tracker
833 .get_last_query_entry(
834 query.raw_query,
835 project_path,
836 options.min_combo_count,
837 )
838 .ok()
839 .flatten()
840 });
841
842 let context = ScoringContext {
843 query,
844 max_typos,
845 max_threads,
846 project_path: options.project_path,
847 current_file: options.current_file,
848 last_same_query_match: last_same_query_entry,
849 combo_boost_score_multiplier: options.combo_boost_score_multiplier,
850 min_combo_count: options.min_combo_count,
851 pagination: options.pagination,
852 };
853
854 let time = std::time::Instant::now();
855
856 let base_arena = self.sync_data.arena_base_ptr();
857 let overflow_arena = self
858 .sync_data
859 .overflow_builder
860 .as_ref()
861 .map(|b| b.as_arena_ptr())
862 .unwrap_or(base_arena);
863
864 let (items, scores, total_matched) = fuzzy_match_and_score_files(
865 files,
866 &context,
867 self.sync_data.base_count,
868 base_arena,
869 overflow_arena,
870 );
871
872 info!(
873 ?query,
874 completed_in = ?time.elapsed(),
875 total_matched,
876 returned_count = items.len(),
877 pagination = ?options.pagination,
878 "Fuzzy search completed",
879 );
880
881 SearchResult {
882 items,
883 scores,
884 total_matched,
885 total_files,
886 location,
887 }
888 }
889
890 pub fn fuzzy_search_directories<'q>(
894 &self,
895 query: &'q FFFQuery<'q>,
896 options: FuzzySearchOptions<'q>,
897 ) -> DirSearchResult<'_> {
898 let dirs = self.get_dirs();
899 let max_threads = if options.max_threads == 0 {
900 std::thread::available_parallelism()
901 .map(|n| n.get())
902 .unwrap_or(4)
903 } else {
904 options.max_threads
905 };
906
907 let total_dirs = dirs.len();
908
909 let effective_query = match &query.fuzzy_query {
910 fff_query_parser::FuzzyQuery::Text(t) => *t,
911 fff_query_parser::FuzzyQuery::Parts(parts) if !parts.is_empty() => parts[0],
912 _ => query.raw_query.trim(),
913 };
914
915 let max_typos = (effective_query.len() as u16 / 4).clamp(2, 6);
916
917 let context = ScoringContext {
918 query,
919 max_typos,
920 max_threads,
921 project_path: options.project_path,
922 current_file: options.current_file,
923 last_same_query_match: None,
924 combo_boost_score_multiplier: 0,
925 min_combo_count: 0,
926 pagination: options.pagination,
927 };
928
929 let arena = self.sync_data.arena_base_ptr();
930 let time = std::time::Instant::now();
931
932 let (items, scores, total_matched) =
933 crate::score::fuzzy_match_and_score_dirs(dirs, &context, arena);
934
935 info!(
936 ?query,
937 completed_in = ?time.elapsed(),
938 total_matched,
939 returned_count = items.len(),
940 "Directory search completed",
941 );
942
943 DirSearchResult {
944 items,
945 scores,
946 total_matched,
947 total_dirs,
948 }
949 }
950
951 pub fn fuzzy_search_mixed<'q>(
961 &self,
962 query: &'q FFFQuery<'q>,
963 query_tracker: Option<&QueryTracker>,
964 options: FuzzySearchOptions<'q>,
965 ) -> MixedSearchResult<'_> {
966 let location = query.location;
967 let page_offset = options.pagination.offset;
968 let page_limit = if options.pagination.limit > 0 {
969 options.pagination.limit
970 } else {
971 100
972 };
973
974 let dirs_only =
975 query.raw_query.ends_with(std::path::MAIN_SEPARATOR) || query.raw_query.ends_with('/');
976
977 let internal_limit = page_offset.saturating_add(page_limit).saturating_mul(2);
979
980 let dir_options = FuzzySearchOptions {
981 pagination: PaginationArgs {
982 offset: 0,
983 limit: internal_limit,
984 },
985 ..options
986 };
987 let dir_results = self.fuzzy_search_directories(query, dir_options);
988
989 if dirs_only {
990 let total_matched = dir_results.total_matched;
991 let total_dirs = dir_results.total_dirs;
992
993 let mut merged: Vec<(MixedItemRef<'_>, Score)> =
994 Vec::with_capacity(dir_results.items.len());
995 for (dir, score) in dir_results.items.into_iter().zip(dir_results.scores) {
996 merged.push((MixedItemRef::Dir(dir), score));
997 }
998
999 if page_offset >= merged.len() {
1000 return MixedSearchResult {
1001 items: vec![],
1002 scores: vec![],
1003 total_matched,
1004 total_files: self.sync_data.files().len(),
1005 total_dirs,
1006 location,
1007 };
1008 }
1009
1010 let end = (page_offset + page_limit).min(merged.len());
1011 let page = merged.drain(page_offset..end);
1012 let (items, scores): (Vec<_>, Vec<_>) = page.unzip();
1013
1014 return MixedSearchResult {
1015 items,
1016 scores,
1017 total_matched,
1018 total_files: self.sync_data.files().len(),
1019 total_dirs,
1020 location,
1021 };
1022 }
1023
1024 let file_options = FuzzySearchOptions {
1025 pagination: PaginationArgs {
1026 offset: 0,
1027 limit: internal_limit,
1028 },
1029 ..options
1030 };
1031 let file_results = self.fuzzy_search(query, query_tracker, file_options);
1032
1033 let total_matched = file_results.total_matched + dir_results.total_matched;
1035 let total_files = file_results.total_files;
1036 let total_dirs = dir_results.total_dirs;
1037
1038 let mut merged: Vec<(MixedItemRef<'_>, Score)> =
1039 Vec::with_capacity(file_results.items.len() + dir_results.items.len());
1040
1041 for (file, score) in file_results.items.into_iter().zip(file_results.scores) {
1042 merged.push((MixedItemRef::File(file), score));
1043 }
1044 for (dir, score) in dir_results.items.into_iter().zip(dir_results.scores) {
1045 merged.push((MixedItemRef::Dir(dir), score));
1046 }
1047
1048 merged.sort_unstable_by_key(|b| std::cmp::Reverse(b.1.total));
1050
1051 if page_offset >= merged.len() {
1053 return MixedSearchResult {
1054 items: vec![],
1055 scores: vec![],
1056 total_matched,
1057 total_files,
1058 total_dirs,
1059 location,
1060 };
1061 }
1062
1063 let end = (page_offset + page_limit).min(merged.len());
1064 let page = merged.drain(page_offset..end);
1065 let (items, scores): (Vec<_>, Vec<_>) = page.unzip();
1066
1067 MixedSearchResult {
1068 items,
1069 scores,
1070 total_matched,
1071 total_files,
1072 total_dirs,
1073 location,
1074 }
1075 }
1076
1077 pub fn grep(&self, query: &FFFQuery<'_>, options: &GrepSearchOptions) -> GrepResult<'_> {
1082 let overlay_guard = self.sync_data.bigram_overlay.as_ref().map(|o| o.read());
1083 let arena = self.arena_base_ptr();
1084 let overflow_arena = self.sync_data.overflow_arena_ptr();
1085 let cancel = options.abort_signal.as_deref().unwrap_or(&self.cancelled);
1086
1087 grep_search(
1088 self.get_files(),
1089 query,
1090 options,
1091 self.cache_budget(),
1092 self.sync_data.bigram_index.as_deref(),
1093 overlay_guard.as_deref(),
1094 cancel,
1095 &self.base_path,
1096 arena,
1097 overflow_arena,
1098 )
1099 }
1100
1101 pub fn multi_grep(
1103 &self,
1104 patterns: &[&str],
1105 constraints: &[fff_query_parser::Constraint<'_>],
1106 options: &GrepSearchOptions,
1107 ) -> GrepResult<'_> {
1108 let overlay_guard = self.sync_data.bigram_overlay.as_ref().map(|o| o.read());
1109 let arena = self.arena_base_ptr();
1110 let overflow_arena = self.sync_data.overflow_arena_ptr();
1111 let cancel = options.abort_signal.as_deref().unwrap_or(&self.cancelled);
1112
1113 multi_grep_search(
1114 self.get_files(),
1115 patterns,
1116 constraints,
1117 options,
1118 self.cache_budget(),
1119 self.sync_data.bigram_index.as_deref(),
1120 overlay_guard.as_deref(),
1121 cancel,
1122 &self.base_path,
1123 arena,
1124 overflow_arena,
1125 )
1126 }
1127
1128 pub fn grep_without_overlay(
1130 &self,
1131 query: &FFFQuery<'_>,
1132 options: &GrepSearchOptions,
1133 ) -> GrepResult<'_> {
1134 let arena = self.arena_base_ptr();
1135 let overflow_arena = self.sync_data.overflow_arena_ptr();
1136 let cancel = options.abort_signal.as_deref().unwrap_or(&self.cancelled);
1137
1138 grep_search(
1139 self.get_files(),
1140 query,
1141 options,
1142 self.cache_budget(),
1143 self.sync_data.bigram_index.as_deref(),
1144 None,
1145 cancel,
1146 &self.base_path,
1147 arena,
1148 overflow_arena,
1149 )
1150 }
1151
1152 pub fn get_scan_progress(&self) -> ScanProgress {
1154 let scanned_count = self.scanned_files_count.load(Ordering::Relaxed);
1155 let is_scanning = self.is_scanning.load(Ordering::Relaxed);
1156 ScanProgress {
1157 scanned_files_count: scanned_count,
1158 is_scanning,
1159 is_watcher_ready: self.watcher_ready.load(Ordering::Relaxed),
1160 is_warmup_complete: self.sync_data.bigram_index.is_some(),
1161 }
1162 }
1163
1164 pub fn update_git_statuses(
1166 &mut self,
1167 status_cache: GitStatusCache,
1168 shared_frecency: &SharedFrecency,
1169 ) -> Result<(), Error> {
1170 debug!(
1171 statuses_count = status_cache.statuses_len(),
1172 "Updating git status",
1173 );
1174
1175 let mode = self.mode;
1176 let bp = self.base_path.clone();
1177 let arena = self.arena_base_ptr();
1178 let frecency = shared_frecency.read()?;
1179 status_cache
1180 .into_iter()
1181 .try_for_each(|(path, status)| -> Result<(), Error> {
1182 if let Some(file) = self.get_mut_file_by_path(&path) {
1183 file.git_status = Some(status);
1184 if let Some(ref f) = *frecency {
1185 file.update_frecency_scores(f, arena, &bp, mode)?;
1186 }
1187 let score = file.access_frecency_score as i32;
1189 let dir_idx = file.parent_dir_index() as usize;
1190 if let Some(dir) = self.sync_data.dirs.get_mut(dir_idx) {
1191 dir.update_frecency_if_larger(score);
1192 }
1193 } else {
1194 error!(?path, "Couldn't update the git status for path");
1195 }
1196 Ok(())
1197 })?;
1198
1199 Ok(())
1200 }
1201
1202 pub fn update_single_file_frecency(
1203 &mut self,
1204 file_path: impl AsRef<Path>,
1205 frecency_tracker: &FrecencyTracker,
1206 ) -> Result<(), Error> {
1207 let path = file_path.as_ref();
1208 let arena = self.arena_base_ptr();
1209 let rel = self.to_relative_path(path).unwrap_or("");
1210 let index = self
1211 .sync_data
1212 .find_file_index(path, &self.base_path)
1213 .ok()
1214 .or_else(|| self.sync_data.find_overflow_index(rel));
1215 if let Some(index) = index
1216 && let Some(file) = self.sync_data.get_file_mut(index)
1217 {
1218 file.update_frecency_scores(frecency_tracker, arena, &self.base_path, self.mode)?;
1219
1220 let score = file.access_frecency_score as i32;
1222 let dir_idx = file.parent_dir_index() as usize;
1223 if let Some(dir) = self.sync_data.dirs.get_mut(dir_idx) {
1224 dir.update_frecency_if_larger(score);
1225 }
1226 }
1227
1228 Ok(())
1229 }
1230
1231 pub fn get_file_by_path(&self, path: impl AsRef<Path>) -> Option<&FileItem> {
1232 self.sync_data
1233 .find_file_index(path.as_ref(), &self.base_path)
1234 .ok()
1235 .and_then(|index| self.sync_data.files().get(index))
1236 }
1237
1238 pub fn get_mut_file_by_path(&mut self, path: impl AsRef<Path>) -> Option<&mut FileItem> {
1239 let path = path.as_ref();
1240 let rel = self.to_relative_path(path).unwrap_or("");
1241 let index = self
1242 .sync_data
1243 .find_file_index(path, &self.base_path)
1244 .ok()
1245 .or_else(|| self.sync_data.find_overflow_index(rel));
1246 index.and_then(|i| self.sync_data.get_file_mut(i))
1247 }
1248
1249 pub fn add_file_sorted(&mut self, file: FileItem) -> Option<&FileItem> {
1251 let arena = self.arena_base_ptr();
1252 let path = file.absolute_path(arena, &self.base_path);
1253
1254 if self.sync_data.insert_file_sorted(file, &self.base_path) {
1255 self.sync_data
1257 .find_file_index(&path, &self.base_path)
1258 .ok()
1259 .and_then(|idx| self.sync_data.get_file_mut(idx))
1260 .map(|file_mut| &*file_mut) } else {
1262 warn!(
1264 "Trying to insert a file that already exists: {}",
1265 path.display()
1266 );
1267 self.sync_data
1268 .find_file_index(&path, &self.base_path)
1269 .ok()
1270 .and_then(|idx| self.sync_data.get_file_mut(idx))
1271 .map(|file_mut| &*file_mut) }
1273 }
1274
1275 #[tracing::instrument(skip(self), name = "timing_update", level = Level::DEBUG)]
1276 pub fn on_create_or_modify(&mut self, path: impl AsRef<Path> + Debug) -> Option<&FileItem> {
1277 let path = path.as_ref();
1278 let overlay = self.sync_data.bigram_overlay.as_ref().map(Arc::clone);
1279
1280 if let Ok(pos) = self.sync_data.find_file_index(path, &self.base_path) {
1281 let file = self.sync_data.get_file_mut(pos)?;
1282
1283 if file.is_deleted() {
1284 file.set_deleted(false);
1286 debug!(
1287 "on_create_or_modify: resurrected tombstoned file at index {}",
1288 pos
1289 );
1290 }
1291
1292 debug!(
1293 "on_create_or_modify: file EXISTS at index {}, updating metadata",
1294 pos
1295 );
1296
1297 let modified = match std::fs::metadata(path) {
1298 Ok(metadata) => metadata
1299 .modified()
1300 .ok()
1301 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok()),
1302 Err(e) => {
1303 error!("Failed to get metadata for {}: {}", path.display(), e);
1304 None
1305 }
1306 };
1307
1308 if let Some(modified) = modified {
1309 let modified = modified.as_secs();
1310 if file.modified < modified {
1311 file.modified = modified;
1312 file.invalidate_mmap(&self.cache_budget);
1313 }
1314 }
1315
1316 if let Some(ref overlay) = overlay
1318 && let Ok(content) = std::fs::read(path)
1319 {
1320 overlay.write().modify_file(pos, &content);
1321 }
1322
1323 return Some(&*file);
1324 }
1325
1326 let rel_path = self.to_relative_path(path).unwrap_or("");
1328 if let Some(abs_idx) = self.sync_data.find_overflow_index(rel_path) {
1329 let file = self.sync_data.get_file_mut(abs_idx)?;
1330 let modified = std::fs::metadata(path)
1331 .ok()
1332 .and_then(|m| m.modified().ok())
1333 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok());
1334 if let Some(modified) = modified {
1335 let modified = modified.as_secs();
1336 if file.modified < modified {
1337 file.modified = modified;
1338 file.invalidate_mmap(&self.cache_budget);
1339 }
1340 }
1341 return Some(&*file);
1342 }
1343
1344 debug!(
1346 "on_create_or_modify: file NEW, appending to overflow (base: {}, overflow: {})",
1347 self.sync_data.base_count,
1348 self.sync_data.overflow_files().len(),
1349 );
1350
1351 let (mut file_item, rel_path) = FileItem::new(path.to_path_buf(), &self.base_path, None);
1352
1353 let builder = self
1355 .sync_data
1356 .overflow_builder
1357 .get_or_insert_with(|| crate::simd_path::ChunkedPathStoreBuilder::new(64));
1358
1359 let cs = builder.add_file_immediate(&rel_path, file_item.path.filename_offset);
1360 file_item.set_path(cs);
1361 file_item.set_overflow(true);
1362 self.sync_data.files.push(file_item);
1363 self.sync_data.files.last()
1364 }
1365
1366 pub fn remove_file_by_path(&mut self, path: impl AsRef<Path>) -> bool {
1368 let path = path.as_ref();
1369 match self.sync_data.find_file_index(path, &self.base_path) {
1370 Ok(index) => {
1371 let file = &mut self.sync_data.files[index];
1372 file.set_deleted(true);
1373 file.invalidate_mmap(&self.cache_budget);
1374 if let Some(ref overlay) = self.sync_data.bigram_overlay {
1375 overlay.write().delete_file(index);
1376 }
1377 true
1378 }
1379 Err(_) => {
1380 let rel = self.to_relative_path(path).unwrap_or("");
1383 if let Some(abs_pos) = self.sync_data.find_overflow_index(rel) {
1384 self.sync_data.files.remove(abs_pos);
1385 true
1386 } else {
1387 false
1388 }
1389 }
1390 }
1391 }
1392
1393 pub fn remove_all_files_in_dir(&mut self, dir: impl AsRef<Path>) -> usize {
1395 let dir_path = dir.as_ref();
1396 let relative_dir = self.to_relative_path(dir_path).unwrap_or("").to_string();
1397
1398 let dir_prefix = if relative_dir.is_empty() {
1399 String::new()
1400 } else {
1401 format!("{}{}", relative_dir, std::path::MAIN_SEPARATOR)
1402 };
1403
1404 self.sync_data.retain_files_with_arena(|file, arena| {
1405 !file.relative_path_starts_with(arena, &dir_prefix)
1406 })
1407 }
1408
1409 pub fn cancel(&self) {
1411 self.cancelled.store(true, Ordering::Release);
1412 }
1413
1414 pub fn stop_background_monitor(&mut self) {
1415 if let Some(mut watcher) = self.background_watcher.take() {
1416 watcher.stop();
1417 }
1418 }
1419
1420 #[inline]
1421 pub(crate) fn arena_base_ptr(&self) -> ArenaPtr {
1422 self.sync_data.arena_base_ptr()
1423 }
1424
1425 pub(crate) fn spawn_post_rescan_rebuild(&self, shared_picker: SharedPicker) -> bool {
1427 if self.cancelled.load(Ordering::Relaxed) {
1428 return false;
1429 }
1430
1431 let post_scan_busy = Arc::clone(&self.post_scan_busy);
1432 let cancelled = Arc::clone(&self.cancelled);
1433 let auto_budget = !self.has_explicit_cache_budget;
1434 let do_warmup = self.enable_mmap_cache;
1435 let do_content_indexing = self.enable_content_indexing;
1436
1437 post_scan_busy.store(true, Ordering::Release);
1438
1439 std::thread::spawn(move || {
1440 let phase_start = std::time::Instant::now();
1441
1442 if auto_budget
1444 && !cancelled.load(Ordering::Acquire)
1445 && let Ok(mut guard) = shared_picker.write()
1446 && let Some(ref mut picker) = *guard
1447 && !picker.has_explicit_cache_budget
1448 {
1449 let file_count = picker.sync_data.files().len();
1450 picker.cache_budget = Arc::new(ContentCacheBudget::new_for_repo(file_count));
1451 }
1452
1453 let files_snapshot = if !cancelled.load(Ordering::Acquire) {
1457 shared_picker.read().ok().and_then(|guard| {
1458 guard.as_ref().map(|picker| {
1459 let files = picker.sync_data.files();
1460 let ptr = files.as_ptr();
1461 let len = files.len();
1462 let base_count = picker.sync_data.base_count;
1463 let budget = Arc::clone(&picker.cache_budget);
1464 let static_files: &[FileItem] =
1465 unsafe { std::slice::from_raw_parts(ptr, len) };
1466 (
1467 static_files,
1468 base_count,
1469 budget,
1470 picker.base_path().to_path_buf(),
1471 picker.arena_base_ptr(),
1472 )
1473 })
1474 })
1475 } else {
1476 None
1477 };
1478
1479 if let Some((files, base_count, budget, bp, arena)) = files_snapshot {
1480 if do_warmup && !cancelled.load(Ordering::Acquire) {
1482 let t = std::time::Instant::now();
1483 warmup_mmaps(files, &budget, &bp, arena);
1484 info!(
1485 "Rescan warmup completed in {:.2}s (cached {} files, {} bytes)",
1486 t.elapsed().as_secs_f64(),
1487 budget.cached_count.load(Ordering::Relaxed),
1488 budget.cached_bytes.load(Ordering::Relaxed),
1489 );
1490 }
1491
1492 if do_content_indexing && !cancelled.load(Ordering::Acquire) {
1494 let t = std::time::Instant::now();
1495 let base_files = &files[..base_count.min(files.len())];
1501 info!(
1502 "Rescan: starting bigram index build for {} files...",
1503 base_files.len()
1504 );
1505 let (index, content_binary) =
1506 build_bigram_index(base_files, &budget, &bp, arena);
1507 info!(
1508 "Rescan: bigram index ready in {:.2}s",
1509 t.elapsed().as_secs_f64()
1510 );
1511
1512 if let Ok(mut guard) = shared_picker.write()
1514 && let Some(ref mut picker) = *guard
1515 {
1516 for &idx in &content_binary {
1517 if let Some(file) = picker.sync_data.get_file_mut(idx) {
1518 file.set_binary(true);
1519 }
1520 }
1521
1522 picker.sync_data.bigram_index = Some(Arc::new(index));
1525 picker.sync_data.bigram_overlay = Some(Arc::new(parking_lot::RwLock::new(
1526 BigramOverlay::new(base_count),
1527 )));
1528 }
1529 }
1530 }
1531
1532 post_scan_busy.store(false, Ordering::Release);
1533 info!(
1534 "Rescan post-scan phase total: {:.2}s (warmup={}, content_indexing={})",
1535 phase_start.elapsed().as_secs_f64(),
1536 do_warmup,
1537 do_content_indexing,
1538 );
1539 });
1540
1541 true
1542 }
1543
1544 pub fn trigger_rescan(&mut self, shared_frecency: &SharedFrecency) -> Result<(), Error> {
1545 if self.is_scanning.load(Ordering::Relaxed) {
1546 debug!("Scan already in progress, skipping trigger_rescan");
1547 return Ok(());
1548 }
1549
1550 if self.post_scan_busy.load(Ordering::Acquire) {
1554 debug!("Post-scan bigram build in progress, skipping rescan");
1555 return Ok(());
1556 }
1557
1558 self.is_scanning.store(true, Ordering::Relaxed);
1559 self.scanned_files_count.store(0, Ordering::Relaxed);
1560
1561 let walk_result = walk_filesystem(
1562 &self.base_path,
1563 &self.scanned_files_count,
1564 shared_frecency,
1565 self.mode,
1566 );
1567
1568 match walk_result {
1569 Ok(walk) => {
1570 info!(
1571 "Filesystem rescan completed: found {} files",
1572 walk.sync.files.len()
1573 );
1574
1575 self.sync_data = walk.sync;
1576 self.cache_budget.reset();
1577
1578 if let Ok(Some(git_cache)) = walk.git_handle.join() {
1580 let frecency = shared_frecency.read().ok();
1581 let frecency_ref = frecency.as_ref().and_then(|f| f.as_ref());
1582 let mode = self.mode;
1583 let bp = &self.base_path;
1584 let arena = self.arena_base_ptr();
1585
1586 for dir in self.sync_data.dirs.iter() {
1588 dir.reset_frecency();
1589 }
1590
1591 let files = &mut self.sync_data.files;
1592 let dirs = &self.sync_data.dirs;
1593 BACKGROUND_THREAD_POOL.install(|| {
1594 files.par_iter_mut().for_each(|file| {
1595 file.git_status =
1596 git_cache.lookup_status(&file.absolute_path(arena, bp));
1597 if let Some(frecency) = frecency_ref {
1598 let _ = file.update_frecency_scores(frecency, arena, bp, mode);
1599 }
1600 let score = file.access_frecency_score as i32;
1601 if score > 0 {
1602 let dir_idx = file.parent_dir_index() as usize;
1603 if let Some(dir) = dirs.get(dir_idx) {
1604 dir.update_frecency_if_larger(score);
1605 }
1606 }
1607 });
1608 });
1609 }
1610
1611 }
1615 Err(error) => error!(?error, "Failed to scan file system"),
1616 }
1617
1618 self.is_scanning.store(false, Ordering::Relaxed);
1619 Ok(())
1620 }
1621
1622 pub fn is_scan_active(&self) -> bool {
1624 self.is_scanning.load(Ordering::Relaxed)
1625 }
1626
1627 pub fn scan_signal(&self) -> Arc<AtomicBool> {
1630 Arc::clone(&self.is_scanning)
1631 }
1632
1633 pub fn watcher_signal(&self) -> Arc<AtomicBool> {
1636 Arc::clone(&self.watcher_ready)
1637 }
1638}
1639
1640#[derive(Debug, Clone)]
1645pub struct ScanProgress {
1646 pub scanned_files_count: usize,
1647 pub is_scanning: bool,
1648 pub is_watcher_ready: bool,
1649 pub is_warmup_complete: bool,
1650}
1651
1652#[allow(clippy::too_many_arguments)]
1653fn spawn_scan_and_watcher(
1654 base_path: PathBuf,
1655 scan_signal: Arc<AtomicBool>,
1656 watcher_ready: Arc<AtomicBool>,
1657 synced_files_count: Arc<AtomicUsize>,
1658 enable_mmap_cache: bool,
1659 enable_content_indexing: bool,
1660 watch: bool,
1661 mode: FFFMode,
1662 shared_picker: SharedPicker,
1663 shared_frecency: SharedFrecency,
1664 cancelled: Arc<AtomicBool>,
1665 post_scan_busy: Arc<AtomicBool>,
1666) {
1667 std::thread::spawn(move || {
1668 info!("Starting initial file scan");
1671
1672 let git_workdir;
1673
1674 match walk_filesystem(&base_path, &synced_files_count, &shared_frecency, mode) {
1675 Ok(walk) => {
1676 if cancelled.load(Ordering::Acquire) {
1677 info!("Walk completed but picker was replaced, discarding results");
1678 scan_signal.store(false, Ordering::Relaxed);
1679 return;
1680 }
1681
1682 info!(
1683 "Initial filesystem walk completed: found {} files",
1684 walk.sync.files.len()
1685 );
1686
1687 git_workdir = walk.sync.git_workdir.clone();
1688 let git_handle = walk.git_handle;
1689
1690 let write_result = shared_picker.write().ok().map(|mut guard| {
1693 if let Some(ref mut picker) = *guard {
1694 picker.sync_data = walk.sync;
1695 picker.cache_budget.reset();
1696 }
1697 });
1698
1699 if write_result.is_none() {
1700 error!("Failed to write scan results into picker");
1701 }
1702
1703 scan_signal.store(false, Ordering::Relaxed);
1705 info!("Files indexed and searchable");
1706
1707 if !cancelled.load(Ordering::Acquire) {
1708 apply_git_status_and_frecency(
1709 &shared_picker,
1710 &shared_frecency,
1711 git_handle,
1712 mode,
1713 );
1714 }
1715 }
1716 Err(e) => {
1717 error!("Initial scan failed: {:?}", e);
1718 scan_signal.store(false, Ordering::Relaxed);
1719 watcher_ready.store(true, Ordering::Release);
1720 return;
1721 }
1722 }
1723
1724 if watch && !cancelled.load(Ordering::Acquire) {
1725 let watch_dirs = shared_picker
1726 .read()
1727 .ok()
1728 .and_then(|guard| guard.as_ref().map(|picker| picker.extract_watch_dirs()))
1729 .unwrap_or_default();
1730
1731 match BackgroundWatcher::new(
1732 base_path.clone(),
1733 git_workdir,
1734 shared_picker.clone(),
1735 shared_frecency.clone(),
1736 mode,
1737 watch_dirs,
1738 ) {
1739 Ok(watcher) => {
1740 info!("Background file watcher initialized successfully");
1741
1742 if cancelled.load(Ordering::Acquire) {
1743 info!("Picker was replaced, dropping orphaned watcher");
1744 drop(watcher);
1745 watcher_ready.store(true, Ordering::Release);
1746 return;
1747 }
1748
1749 let write_result = shared_picker.write().ok().map(|mut guard| {
1750 if let Some(ref mut picker) = *guard {
1751 picker.background_watcher = Some(watcher);
1752 }
1753 });
1754
1755 if write_result.is_none() {
1756 error!("Failed to store background watcher in picker");
1757 }
1758 }
1759 Err(e) => {
1760 error!("Failed to initialize background file watcher: {:?}", e);
1761 }
1762 }
1763 }
1764
1765 watcher_ready.store(true, Ordering::Release);
1766
1767 let need_post_scan =
1768 (enable_mmap_cache || enable_content_indexing) && !cancelled.load(Ordering::Acquire);
1769
1770 if need_post_scan {
1771 post_scan_busy.store(true, Ordering::Release);
1772 let phase_start = std::time::Instant::now();
1773
1774 if let Ok(mut guard) = shared_picker.write()
1776 && let Some(ref mut picker) = *guard
1777 && !picker.has_explicit_cache_budget
1778 {
1779 let file_count = picker.sync_data.files().len();
1780 picker.cache_budget = Arc::new(ContentCacheBudget::new_for_repo(file_count));
1781 info!(
1782 "Cache budget configured for {} files: max_files={}, max_bytes={}",
1783 file_count, picker.cache_budget.max_files, picker.cache_budget.max_bytes,
1784 );
1785 }
1786
1787 let files_snapshot: Option<(&[FileItem], usize, Arc<ContentCacheBudget>, ArenaPtr)> =
1792 if !cancelled.load(Ordering::Acquire) {
1793 let guard = shared_picker.read().ok();
1794 guard.and_then(|guard| {
1795 guard.as_ref().map(|picker| {
1796 let files = picker.sync_data.files();
1797 let ptr = files.as_ptr();
1798 let len = files.len();
1799 let base_count = picker.sync_data.base_count;
1800 let budget = Arc::clone(&picker.cache_budget);
1801 let arena = picker.arena_base_ptr();
1802 let static_files: &[FileItem] =
1806 unsafe { std::slice::from_raw_parts(ptr, len) };
1807 (static_files, base_count, budget, arena)
1808 })
1809 })
1810 } else {
1811 None
1812 };
1813
1814 if let Some((files, base_count, budget, arena)) = files_snapshot {
1818 if enable_mmap_cache && !cancelled.load(Ordering::Acquire) {
1819 let warmup_start = std::time::Instant::now();
1820 warmup_mmaps(files, &budget, &base_path, arena);
1821 info!(
1822 "Warmup completed in {:.2}s (cached {} files, {} bytes)",
1823 warmup_start.elapsed().as_secs_f64(),
1824 budget.cached_count.load(Ordering::Relaxed),
1825 budget.cached_bytes.load(Ordering::Relaxed),
1826 );
1827 }
1828
1829 if enable_content_indexing && !cancelled.load(Ordering::Acquire) {
1830 let base_files = &files[..base_count.min(files.len())];
1838 let (index, content_binary) =
1839 build_bigram_index(base_files, &budget, &base_path, arena);
1840
1841 if let Ok(mut guard) = shared_picker.write()
1842 && let Some(ref mut picker) = *guard
1843 {
1844 for &idx in &content_binary {
1845 if let Some(file) = picker.sync_data.get_file_mut(idx) {
1846 file.set_binary(true);
1847 }
1848 }
1849
1850 picker.sync_data.bigram_index = Some(Arc::new(index));
1851 picker.sync_data.bigram_overlay = Some(Arc::new(parking_lot::RwLock::new(
1852 BigramOverlay::new(base_count),
1853 )));
1854 }
1855 }
1856 }
1857
1858 post_scan_busy.store(false, Ordering::Release);
1859
1860 info!(
1861 "Post-scan phase total: {:.2}s (warmup={}, content_indexing={})",
1862 phase_start.elapsed().as_secs_f64(),
1863 enable_mmap_cache,
1864 enable_content_indexing,
1865 );
1866 }
1867
1868 });
1870}
1871
1872#[tracing::instrument(skip(files), name = "warmup_mmaps", level = Level::DEBUG)]
1883pub(crate) fn warmup_mmaps(
1884 files: &[FileItem],
1885 budget: &ContentCacheBudget,
1886 base_path: &Path,
1887 arena: ArenaPtr,
1888) {
1889 let max_files = budget.max_files;
1890 let max_bytes = budget.max_bytes;
1891 let max_file_size = budget.max_file_size;
1892
1893 let mut all: Vec<&FileItem> = files.iter().collect();
1896
1897 if all.len() > max_files {
1901 all.select_nth_unstable_by(max_files, |a, b| {
1902 let a_ok = !a.is_binary() && a.size > 0;
1903 let b_ok = !b.is_binary() && b.size > 0;
1904 match (a_ok, b_ok) {
1905 (true, false) => std::cmp::Ordering::Less,
1906 (false, true) => std::cmp::Ordering::Greater,
1907 (false, false) => std::cmp::Ordering::Equal,
1908 (true, true) => b.total_frecency_score().cmp(&a.total_frecency_score()),
1909 }
1910 });
1911 }
1912
1913 let to_warm = &all[..all.len().min(max_files)];
1914
1915 let warmed_bytes = AtomicU64::new(0);
1916 let budget_exhausted = AtomicBool::new(false);
1917
1918 BACKGROUND_THREAD_POOL.install(|| {
1919 to_warm.par_iter().for_each(|file| {
1920 if budget_exhausted.load(Ordering::Relaxed) {
1921 return;
1922 }
1923
1924 if file.is_binary() || file.size == 0 || file.size > max_file_size {
1925 return;
1926 }
1927
1928 let prev_bytes = warmed_bytes.fetch_add(file.size, Ordering::Relaxed);
1930 if prev_bytes + file.size > max_bytes {
1931 budget_exhausted.store(true, Ordering::Relaxed);
1932 return;
1933 }
1934
1935 if let Some(content) = file.get_content(arena, base_path, budget) {
1936 let _ = std::hint::black_box(content.first());
1937 }
1938 });
1939 });
1940}
1941
1942pub const BIGRAM_CONTENT_CAP: usize = 64 * 1024;
1946
1947#[tracing::instrument(skip_all, name = "Building Bigram Index", level = Level::DEBUG)]
1948pub(crate) fn build_bigram_index(
1949 files: &[FileItem],
1950 budget: &ContentCacheBudget,
1951 base_path: &Path,
1952 arena: ArenaPtr,
1953) -> (BigramFilter, Vec<usize>) {
1954 let start = std::time::Instant::now();
1955 info!("Building bigram index for {} files...", files.len());
1956 let builder = BigramIndexBuilder::new(files.len());
1957 let skip_builder = BigramIndexBuilder::new(files.len());
1958 let max_file_size = budget.max_file_size;
1959
1960 let content_binary: std::sync::Mutex<Vec<usize>> = std::sync::Mutex::new(Vec::new());
1964
1965 BACKGROUND_THREAD_POOL.install(|| {
1966 files.par_iter().enumerate().for_each(|(i, file)| {
1967 if file.is_binary() || file.size == 0 || file.size > max_file_size {
1968 return;
1969 }
1970 let data: Option<&[u8]>;
1973 let owned;
1974 if let Some(cached) = file.get_content(arena, base_path, budget) {
1975 if detect_binary_content(cached) {
1976 content_binary.lock().unwrap().push(i);
1977 return;
1978 }
1979 data = Some(cached);
1980 owned = None;
1981 } else if let Ok(read_data) = std::fs::read(file.absolute_path(arena, base_path)) {
1982 if detect_binary_content(&read_data) {
1983 content_binary.lock().unwrap().push(i);
1984 return;
1985 }
1986 data = None;
1987 owned = Some(read_data);
1988 } else {
1989 return;
1990 }
1991
1992 let content = data.unwrap_or_else(|| owned.as_ref().unwrap());
1993 let capped = &content[..content.len().min(BIGRAM_CONTENT_CAP)];
1994 builder.add_file_content(&skip_builder, i, capped);
1995 });
1996 });
1997
1998 let cols = builder.columns_used();
1999 let mut index = builder.compress(None);
2000
2001 let skip_index = skip_builder.compress(Some(12));
2007 index.set_skip_index(skip_index);
2008
2009 hint_allocator_collect();
2013
2014 info!(
2015 "Bigram index built in {:.2}s — {} dense columns for {} files",
2016 start.elapsed().as_secs_f64(),
2017 cols,
2018 files.len(),
2019 );
2020
2021 let binary_indices = content_binary.into_inner().unwrap();
2022 if !binary_indices.is_empty() {
2023 info!(
2024 "Bigram build detected {} content-binary files (not caught by extension)",
2025 binary_indices.len(),
2026 );
2027 }
2028
2029 (index, binary_indices)
2030}
2031
2032struct WalkResult {
2035 sync: FileSync,
2036 git_handle: std::thread::JoinHandle<Option<GitStatusCache>>,
2037}
2038
2039fn walk_filesystem(
2043 base_path: &Path,
2044 synced_files_count: &Arc<AtomicUsize>,
2045 shared_frecency: &SharedFrecency,
2046 mode: FFFMode,
2047) -> Result<WalkResult, Error> {
2048 use ignore::WalkBuilder;
2049
2050 let scan_start = std::time::Instant::now();
2051 info!("SCAN: Starting filesystem walk and git status (async)");
2052
2053 let git_workdir = Repository::discover(base_path)
2055 .ok()
2056 .and_then(|repo| repo.workdir().map(Path::to_path_buf));
2057
2058 if let Some(ref git_dir) = git_workdir {
2059 debug!("Git repository found at: {}", git_dir.display());
2060 } else {
2061 debug!("No git repository found for path: {}", base_path.display());
2062 }
2063
2064 let git_workdir_for_status = git_workdir.clone();
2066 let git_handle = std::thread::spawn(move || {
2067 GitStatusCache::read_git_status(
2068 git_workdir_for_status.as_deref(),
2069 StatusOptions::new()
2070 .include_untracked(true)
2071 .recurse_untracked_dirs(true)
2072 .exclude_submodules(true),
2073 )
2074 });
2075
2076 let is_git_repo = git_workdir.is_some();
2078 let bg_threads = BACKGROUND_THREAD_POOL.current_num_threads();
2079 let mut walk_builder = WalkBuilder::new(base_path);
2080 walk_builder
2081 .hidden(!is_git_repo)
2083 .git_ignore(true)
2084 .git_exclude(true)
2085 .git_global(true)
2086 .ignore(true)
2087 .follow_links(false)
2088 .threads(bg_threads);
2089
2090 if !is_git_repo && let Some(overrides) = non_git_repo_overrides(base_path) {
2091 walk_builder.overrides(overrides);
2092 }
2093
2094 let walker = walk_builder.build_parallel();
2095
2096 let walker_start = std::time::Instant::now();
2097 debug!("SCAN: Starting file walker");
2098
2099 let pairs = parking_lot::Mutex::new(Vec::<(FileItem, String)>::new());
2102
2103 walker.run(|| {
2104 let pairs = &pairs;
2105 let counter = Arc::clone(synced_files_count);
2106 let base_path = base_path.to_path_buf();
2107
2108 Box::new(move |result| {
2109 let Ok(entry) = result else {
2110 return ignore::WalkState::Continue;
2111 };
2112
2113 if entry.file_type().is_some_and(|ft| ft.is_file()) {
2114 let path = entry.path();
2115
2116 if is_git_file(path) {
2117 return ignore::WalkState::Continue;
2118 }
2119
2120 if !is_git_repo && is_known_binary_extension(path) {
2121 return ignore::WalkState::Continue;
2122 }
2123
2124 let metadata = entry.metadata().ok();
2125 let (file_item, rel_path) =
2126 FileItem::new_from_walk(path, &base_path, None, metadata.as_ref());
2127
2128 pairs.lock().push((file_item, rel_path));
2129 counter.fetch_add(1, Ordering::Relaxed);
2130 }
2131 ignore::WalkState::Continue
2132 })
2133 });
2134
2135 let mut pairs = pairs.into_inner();
2136
2137 info!(
2138 "SCAN: File walking completed in {:?} for {} files",
2139 walker_start.elapsed(),
2140 pairs.len(),
2141 );
2142
2143 BACKGROUND_THREAD_POOL.install(|| {
2146 pairs.par_sort_unstable_by(|(_, a), (_, b)| a.cmp(b));
2147 });
2148
2149 let mut files: Vec<FileItem> = Vec::with_capacity(pairs.len());
2154 let mut dirs: Vec<DirItem> = Vec::new();
2155 let mut builder = crate::simd_path::ChunkedPathStoreBuilder::new(pairs.len());
2156 let mut prev_dir: Option<String> = None;
2159 let mut current_dir_idx: u32 = 0;
2160
2161 for (mut file, rel) in pairs {
2162 let fname_offset = file.path.filename_offset as usize;
2163 let dir_part = &rel[..fname_offset];
2164
2165 if prev_dir.as_deref() != Some(dir_part) {
2166 let dir_cs = builder.add_dir_immediate(dir_part);
2167 let last_seg = if dir_part.is_empty() {
2169 0
2170 } else {
2171 let trimmed = dir_part.trim_end_matches(std::path::is_separator);
2172 trimmed
2173 .rfind(std::path::is_separator)
2174 .map(|i| i + 1)
2175 .unwrap_or(0) as u16
2176 };
2177 dirs.push(DirItem::new(dir_cs, last_seg));
2178 current_dir_idx = (dirs.len() - 1) as u32;
2179 prev_dir = Some(dir_part.to_string());
2180 }
2181
2182 let cs = builder.add_file_immediate(&rel, file.path.filename_offset);
2183 file.set_path(cs);
2184 file.set_parent_dir(current_dir_idx);
2185 files.push(file);
2186 }
2187 let chunked_paths = builder.finish();
2188 let arena = chunked_paths.as_arena_ptr();
2189
2190 let frecency = shared_frecency
2193 .read()
2194 .map_err(|_| Error::AcquireFrecencyLock)?;
2195 if let Some(frecency) = frecency.as_ref() {
2196 let dirs_ref = &dirs;
2197 BACKGROUND_THREAD_POOL.install(|| {
2198 files.par_iter_mut().for_each(|file| {
2199 let _ = file.update_frecency_scores(frecency, arena, base_path, mode);
2200 let score = file.access_frecency_score as i32;
2201 if score > 0 {
2202 let dir_idx = file.parent_dir_index() as usize;
2203 if let Some(dir) = dirs_ref.get(dir_idx) {
2204 dir.update_frecency_if_larger(score);
2205 }
2206 }
2207 });
2208 });
2209 }
2210 drop(frecency);
2211
2212 BACKGROUND_THREAD_POOL.install(|| {
2214 files.par_sort_unstable_by(|a, b| {
2215 a.parent_dir_index()
2216 .cmp(&b.parent_dir_index())
2217 .then_with(|| a.file_name(arena).cmp(&b.file_name(arena)))
2218 });
2219 });
2220
2221 hint_allocator_collect();
2223
2224 let file_item_size = std::mem::size_of::<FileItem>();
2225 let files_vec_bytes = files.len() * file_item_size;
2226 let dir_table_bytes = dirs.len() * std::mem::size_of::<DirItem>()
2227 + dirs
2228 .iter()
2229 .map(|d| d.relative_path(arena).len())
2230 .sum::<usize>();
2231
2232 let total_time = scan_start.elapsed();
2233 info!(
2234 "SCAN: Walk completed in {:?} ({} files, {} dirs, \
2235 chunked_store={:.2}MB, files_vec={:.2}MB, dirs={:.2}MB, FileItem={}B)",
2236 total_time,
2237 files.len(),
2238 dirs.len(),
2239 chunked_paths.heap_bytes() as f64 / 1_048_576.0,
2240 files_vec_bytes as f64 / 1_048_576.0,
2241 dir_table_bytes as f64 / 1_048_576.0,
2242 file_item_size,
2243 );
2244
2245 let base_count = files.len();
2246
2247 Ok(WalkResult {
2248 sync: FileSync {
2249 files,
2250 base_count,
2251 dirs,
2252 overflow_builder: None,
2253 git_workdir,
2254 bigram_index: None,
2255 bigram_overlay: None,
2256 chunked_paths: Some(chunked_paths),
2257 },
2258 git_handle,
2259 })
2260}
2261
2262fn apply_git_status_and_frecency(
2263 shared_picker: &SharedPicker,
2264 shared_frecency: &SharedFrecency,
2265 git_handle: std::thread::JoinHandle<Option<GitStatusCache>>,
2266 mode: FFFMode,
2267) {
2268 let join_start = std::time::Instant::now();
2269 let git_cache = match git_handle.join() {
2270 Ok(cache) => cache,
2271 Err(_) => {
2272 error!("Git status thread panicked");
2273 return;
2274 }
2275 };
2276 info!("SCAN: Git status ready in {:?}", join_start.elapsed());
2277
2278 let Some(git_cache) = git_cache else { return };
2279
2280 if let Ok(mut guard) = shared_picker.write()
2281 && let Some(ref mut picker) = *guard
2282 {
2283 let frecency = shared_frecency.read().ok();
2284 let frecency_ref = frecency.as_ref().and_then(|f| f.as_ref());
2285
2286 let bp = &picker.base_path;
2288 let arena = picker.arena_base_ptr();
2289
2290 for dir in picker.sync_data.dirs.iter() {
2292 dir.reset_frecency();
2293 }
2294
2295 let files = &mut picker.sync_data.files;
2296 let dirs = &picker.sync_data.dirs;
2297
2298 BACKGROUND_THREAD_POOL.install(|| {
2299 files.par_iter_mut().for_each(|file| {
2300 let mut buf = [0u8; crate::simd_path::PATH_BUF_SIZE];
2301 let absolute_path = file.write_absolute_path(arena, bp, &mut buf);
2302
2303 file.git_status = git_cache.lookup_status(absolute_path);
2304 if let Some(frecency) = frecency_ref {
2305 let _ = file.update_frecency_scores(frecency, arena, bp, mode);
2306 }
2307
2308 let score = file.access_frecency_score as i32;
2309 if score > 0 {
2310 let dir_idx = file.parent_dir_index() as usize;
2311 if let Some(dir) = dirs.get(dir_idx) {
2312 dir.update_frecency_if_larger(score);
2313 }
2314 }
2315 });
2316 });
2317
2318 info!(
2319 "SCAN: Applied git status to {} files ({} dirty)",
2320 picker.sync_data.files.len(),
2321 git_cache.statuses_len(),
2322 );
2323 }
2324}
2325
2326#[inline]
2327fn is_git_file(path: &Path) -> bool {
2328 path.to_str().is_some_and(|path| {
2329 if cfg!(target_family = "windows") {
2330 path.contains("\\.git\\")
2331 } else {
2332 path.contains("/.git/")
2333 }
2334 })
2335}
2336
2337#[inline]
2340fn is_known_binary_extension(path: &Path) -> bool {
2341 let Some(ext) = path.extension().and_then(|e| e.to_str()) else {
2342 return false;
2343 };
2344 matches!(
2345 ext,
2346 "png" | "jpg" | "jpeg" | "gif" | "bmp" | "ico" | "webp" | "tiff" | "tif" | "avif" |
2348 "heic" | "psd" | "icns" | "cur" | "raw" | "cr2" | "nef" | "dng" |
2349 "mp4" | "avi" | "mov" | "wmv" | "mkv" | "mp3" | "wav" | "flac" | "ogg" | "m4a" |
2351 "aac" | "webm" | "flv" | "mpg" | "mpeg" | "wma" | "opus" |
2352 "zip" | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar" | "zst" | "lz4" | "lzma" |
2354 "cab" | "cpio" |
2355 "deb" | "rpm" | "apk" | "dmg" | "msi" | "iso" | "nupkg" | "whl" | "egg" |
2357 "snap" | "appimage" | "flatpak" |
2358 "exe" | "dll" | "so" | "dylib" | "o" | "a" | "lib" | "bin" | "elf" |
2360 "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" |
2362 "db" | "sqlite" | "sqlite3" | "mdb" |
2364 "ttf" | "otf" | "woff" | "woff2" | "eot" |
2366 "class" | "pyc" | "pyo" | "wasm" | "dex" | "jar" | "war" |
2368 "npy" | "npz" | "pkl" | "pickle" | "h5" | "hdf5" | "pt" | "pth" | "onnx" |
2370 "safetensors" | "tfrecord" |
2371 "glb" | "fbx" | "blend" |
2373 "parquet" | "arrow" | "pb" |
2375 "DS_Store" | "suo"
2377 )
2378}
2379
2380#[inline]
2383pub(crate) fn detect_binary_content(content: &[u8]) -> bool {
2384 let check_len = content.len().min(512);
2385 content[..check_len].contains(&0)
2386}
2387
2388fn hint_allocator_collect() {
2392 #[cfg(feature = "mimalloc-collect")]
2393 {
2394 BACKGROUND_THREAD_POOL.broadcast(|_| unsafe { libmimalloc_sys::mi_collect(true) });
2398
2399 unsafe { libmimalloc_sys::mi_collect(true) };
2401 }
2402}