1use crate::FfsStringStorage;
34use crate::background_watcher::{BackgroundWatcher, is_git_file};
35use crate::bigram_filter::{BigramFilter, BigramOverlay};
36use crate::error::Error;
37use crate::frecency::FrecencyTracker;
38use crate::git::GitStatusCache;
39use crate::grep::{GrepResult, GrepSearchOptions, grep_search, multi_grep_search};
40use crate::ignore::non_git_repo_overrides;
41use crate::query_tracker::QueryTracker;
42use crate::scan::{ScanConfig, ScanJob, ScanSignals};
43use crate::score::fuzzy_match_and_score_files;
44use crate::shared::{SharedFilePicker, SharedFrecency};
45use crate::simd_path::{ArenaPtr, PATH_BUF_SIZE};
46use crate::stable_vec::StableVec;
47use crate::types::{
48 ContentCacheBudget, DirItem, DirSearchResult, FileItem, MixedItemRef, MixedSearchResult,
49 PaginationArgs, Score, ScoringContext, SearchResult,
50};
51use ffs_query_parser::FfsQuery;
52use git2::{Repository, Status};
53use rayon::prelude::*;
54use std::fmt::Debug;
55use std::ops::ControlFlow;
56use std::path::{Path, PathBuf};
57use std::sync::{
58 Arc, LazyLock,
59 atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
60};
61use std::thread::JoinHandle;
62use std::time::SystemTime;
63use tracing::{Level, debug, error, info, warn};
64
65pub(crate) const MAX_OVERFLOW_FILES: usize = 1024;
69
70pub(crate) static BACKGROUND_THREAD_POOL: LazyLock<rayon::ThreadPool> = LazyLock::new(|| {
74 let total = std::thread::available_parallelism()
75 .map(|p| p.get())
76 .unwrap_or(4);
77 let bg_threads = total.saturating_sub(2).max(1);
78 info!(
79 "Background pool: {} threads (system has {})",
80 bg_threads, total
81 );
82 rayon::ThreadPoolBuilder::new()
83 .num_threads(bg_threads)
84 .thread_name(|i| format!("ffs-bg-{i}"))
85 .start_handler(|_| {
86 #[cfg(target_os = "macos")]
91 unsafe {
92 let _ = libc::pthread_set_qos_class_self_np(
93 libc::qos_class_t::QOS_CLASS_USER_INITIATED,
94 0,
95 );
96 }
97 })
98 .build()
99 .expect("failed to create background rayon pool")
100});
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
103pub enum FfsMode {
104 #[default]
105 Neovim,
106 Ai,
107}
108
109impl FfsMode {
110 pub fn is_ai(self) -> bool {
111 self == FfsMode::Ai
112 }
113}
114
115#[derive(Debug, Clone, Copy, Default)]
120pub struct FuzzySearchOptions<'a> {
121 pub max_threads: usize,
122 pub current_file: Option<&'a str>,
123 pub project_path: Option<&'a Path>,
124 pub combo_boost_score_multiplier: i32,
125 pub min_combo_count: u32,
126 pub pagination: PaginationArgs,
127}
128
129#[derive(Debug, Clone)]
130pub(crate) struct FileSync {
131 pub(crate) git_workdir: Option<PathBuf>,
132 files: StableVec<FileItem>,
138 indexable_count: usize,
139 base_count: usize,
140 dirs: Vec<DirItem>,
143 overflow_builder: Option<crate::simd_path::ChunkedPathStoreBuilder>,
146 bigram_index: Option<Arc<BigramFilter>>,
150 bigram_overlay: Option<Arc<parking_lot::RwLock<BigramOverlay>>>,
152 chunked_paths: Option<crate::simd_path::ChunkedPathStore>,
156}
157
158impl FileSync {
159 fn new() -> Self {
160 Self {
161 files: StableVec::from_vec_with_reserve(Vec::new(), MAX_OVERFLOW_FILES),
162 indexable_count: 0,
163 base_count: 0,
164 dirs: Vec::new(),
165 overflow_builder: None,
166 git_workdir: None,
167 bigram_index: None,
168 bigram_overlay: None,
169 chunked_paths: None,
170 }
171 }
172
173 #[inline]
175 fn arena_base_ptr(&self) -> ArenaPtr {
176 self.chunked_paths
177 .as_ref()
178 .map(|s| s.as_arena_ptr())
179 .unwrap_or(ArenaPtr::null())
180 }
181
182 #[inline]
184 fn overflow_arena_ptr(&self) -> ArenaPtr {
185 self.overflow_builder
186 .as_ref()
187 .map(|b| b.as_arena_ptr())
188 .unwrap_or(self.arena_base_ptr())
189 }
190
191 #[inline]
193 fn arena_for_file(&self, file: &FileItem) -> ArenaPtr {
194 if file.is_overflow() {
195 self.overflow_arena_ptr()
196 } else {
197 self.arena_base_ptr()
198 }
199 }
200
201 #[inline]
204 fn files(&self) -> &[FileItem] {
205 &self.files
206 }
207
208 #[inline]
210 fn overflow_files(&self) -> &[FileItem] {
211 &self.files[self.base_count..]
212 }
213
214 #[inline]
216 fn get_file_mut(&mut self, index: usize) -> Option<&mut FileItem> {
217 self.files.get_mut(index)
218 }
219
220 #[inline]
223 fn find_file_index(&self, path: &Path, base_path: &Path) -> Result<usize, usize> {
224 let arena = self.arena_base_ptr();
225
226 let rel_path_owned: String = match path.strip_prefix(base_path) {
231 Ok(r) => normalize_relative_path(&r.to_string_lossy()).into_owned(),
232 Err(_) => {
233 #[cfg(windows)]
234 {
235 canonical_relative_path(path, base_path).ok_or(0usize)?
236 }
237 #[cfg(not(windows))]
238 {
239 return Err(0);
240 }
241 }
242 };
243 let rel_path: &str = &rel_path_owned;
244
245 let parent_end = rel_path
247 .rfind(std::path::is_separator)
248 .map(|i| i + 1)
249 .unwrap_or(0);
250 let dir_rel = &rel_path[..parent_end];
251 let filename = &rel_path[parent_end..];
252
253 let mut dir_buf = [0u8; crate::simd_path::PATH_BUF_SIZE];
256 let dir_idx = match self
257 .dirs
258 .binary_search_by(|d| d.read_relative_path(arena, &mut dir_buf).cmp(dir_rel))
259 {
260 Ok(idx) => idx as u32,
261 Err(_) => return Err(0), };
263
264 let cmp_key = |f: &FileItem| {
269 f.parent_dir_index().cmp(&dir_idx).then_with(|| {
270 let fname = f.file_name(arena);
271 fname.as_str().cmp(filename)
272 })
273 };
274
275 if self.indexable_count > 0
276 && let Ok(pos) = self.files[..self.indexable_count].binary_search_by(cmp_key)
277 {
278 return Ok(pos);
279 }
280
281 if self.indexable_count < self.base_count
282 && let Ok(rel_pos) =
283 self.files[self.indexable_count..self.base_count].binary_search_by(cmp_key)
284 {
285 return Ok(self.indexable_count + rel_pos);
286 }
287
288 Err(0)
289 }
290
291 fn find_overflow_index(&self, relative_path: &str) -> Option<usize> {
294 let overflow_arena = self.overflow_arena_ptr();
295 self.files[self.base_count..]
296 .iter()
297 .position(|f| f.relative_path_eq(overflow_arena, relative_path))
298 .map(|pos| self.base_count + pos)
299 }
300
301 fn retain_files_with_arena<F>(&mut self, mut predicate: F) -> usize
302 where
303 F: FnMut(&FileItem, ArenaPtr) -> bool,
304 {
305 let base_arena = self.arena_base_ptr();
306 let overflow_arena = self.overflow_arena_ptr();
307
308 let indexable_count = self.indexable_count;
309 let base_count = self.base_count;
310 let initial_len = self.files.len();
311
312 let indexable_retained = self.files[..indexable_count]
313 .iter()
314 .filter(|f| predicate(f, base_arena))
315 .count();
316 let base_retained = self.files[indexable_count..base_count]
317 .iter()
318 .filter(|f| predicate(f, base_arena))
319 .count()
320 + indexable_retained;
321
322 self.files.retain(|f| {
323 predicate(
324 f,
325 if f.is_overflow() {
326 overflow_arena
327 } else {
328 base_arena
329 },
330 )
331 });
332
333 self.indexable_count = indexable_retained;
334 self.base_count = base_retained;
335 initial_len - self.files.len()
336 }
337}
338
339impl FileItem {
340 pub fn new(path: PathBuf, base_path: &Path, git_status: Option<Status>) -> (Self, String) {
341 let metadata = std::fs::metadata(&path).ok();
342 Self::new_with_metadata(path, base_path, git_status, metadata.as_ref())
343 }
344
345 fn new_with_metadata(
349 path: PathBuf,
350 base_path: &Path,
351 git_status: Option<Status>,
352 metadata: Option<&std::fs::Metadata>,
353 ) -> (Self, String) {
354 let path_buf = pathdiff::diff_paths(&path, base_path).unwrap_or_else(|| path.clone());
355 let relative_path = path_buf.to_string_lossy().into_owned();
356
357 let (size, modified) = match metadata {
358 Some(metadata) => {
359 let size = metadata.len();
360 let modified = metadata
361 .modified()
362 .ok()
363 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
364 .map_or(0, |d| d.as_secs());
365
366 (size, modified)
367 }
368 None => (0, 0),
369 };
370
371 let is_binary = is_known_binary_extension(&path);
372
373 let filename_start = relative_path
374 .rfind(std::path::is_separator)
375 .map(|i| i + 1)
376 .unwrap_or(0) as u16;
377
378 let item = Self::new_raw(filename_start, size, modified, git_status, is_binary);
379 (item, relative_path)
380 }
381
382 pub fn new_from_walk(
388 path: &Path,
389 base_path: &Path,
390 git_status: Option<Status>,
391 metadata: Option<&std::fs::Metadata>,
392 ) -> (Self, String) {
393 let (size, modified) = match metadata {
394 Some(metadata) => {
395 let size = metadata.len();
396 let modified = metadata
397 .modified()
398 .ok()
399 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
400 .map_or(0, |d| d.as_secs());
401 (size, modified)
402 }
403 None => (0, 0),
404 };
405
406 let is_binary = is_known_binary_extension(path);
407
408 let rel = pathdiff::diff_paths(path, base_path).unwrap_or_else(|| path.to_path_buf());
409 let rel_str = rel.to_string_lossy().into_owned();
410 let fname_offset = rel_str
411 .rfind(std::path::is_separator)
412 .map(|i| i + 1)
413 .unwrap_or(0) as u16;
414
415 let item = Self::new_raw(fname_offset, size, modified, git_status, is_binary);
416 (item, rel_str)
417 }
418
419 pub(crate) fn update_frecency_scores(
420 &mut self,
421 tracker: &FrecencyTracker,
422 arena: ArenaPtr,
423 base_path: &Path,
424 mode: FfsMode,
425 ) -> Result<(), Error> {
426 let mut abs_buf = [0u8; crate::simd_path::PATH_BUF_SIZE];
427 let abs = self.write_absolute_path(arena, base_path, &mut abs_buf);
428 self.access_frecency_score = tracker.get_access_score(abs, mode) as i16;
429 self.modification_frecency_score =
430 tracker.get_modification_score(self.modified, self.git_status, mode) as i16;
431
432 Ok(())
433 }
434}
435
436pub struct FilePickerOptions {
438 pub base_path: String,
439 pub enable_mmap_cache: bool,
441 pub enable_content_indexing: bool,
443 pub mode: FfsMode,
445 pub cache_budget: Option<ContentCacheBudget>,
448 pub watch: bool,
450}
451
452impl Default for FilePickerOptions {
453 fn default() -> Self {
454 Self {
455 base_path: ".".into(),
456 enable_mmap_cache: false,
457 enable_content_indexing: false,
458 mode: FfsMode::default(),
459 cache_budget: None,
460 watch: true,
461 }
462 }
463}
464
465pub struct FilePicker {
466 pub mode: FfsMode,
467 pub base_path: PathBuf,
468 sync_data: FileSync,
469 pub(crate) signals: ScanSignals,
470 pub(crate) background_watcher: Option<BackgroundWatcher>,
471 cache_budget: Arc<ContentCacheBudget>,
472 has_explicit_cache_budget: bool,
473 scanned_files_count: Arc<AtomicUsize>,
474 enable_mmap_cache: bool,
475 enable_content_indexing: bool,
476 watch: bool,
477}
478
479impl std::fmt::Debug for FilePicker {
480 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
481 f.debug_struct("FilePicker")
482 .field("base_path", &self.base_path)
483 .field("sync_data", &self.sync_data)
484 .field(
485 "is_scanning",
486 &self.signals.scanning.load(Ordering::Relaxed),
487 )
488 .field(
489 "scanned_files_count",
490 &self.scanned_files_count.load(Ordering::Relaxed),
491 )
492 .finish_non_exhaustive()
493 }
494}
495
496impl FfsStringStorage for &FilePicker {
497 #[inline]
498 fn arena_for(&self, file: &FileItem) -> crate::simd_path::ArenaPtr {
499 self.sync_data.arena_for_file(file)
500 }
501
502 #[inline]
503 fn base_arena(&self) -> crate::simd_path::ArenaPtr {
504 self.sync_data.arena_base_ptr()
505 }
506
507 #[inline]
508 fn overflow_arena(&self) -> crate::simd_path::ArenaPtr {
509 self.sync_data.overflow_arena_ptr()
510 }
511}
512
513impl FilePicker {
514 pub fn base_path(&self) -> &Path {
515 &self.base_path
516 }
517
518 pub fn has_mmap_cache(&self) -> bool {
519 self.enable_mmap_cache
520 }
521
522 pub fn has_content_indexing(&self) -> bool {
523 self.enable_content_indexing
524 }
525
526 pub fn has_watcher(&self) -> bool {
527 self.watch
528 }
529
530 pub fn mode(&self) -> FfsMode {
531 self.mode
532 }
533
534 pub fn cache_budget(&self) -> &ContentCacheBudget {
535 &self.cache_budget
536 }
537
538 pub fn bigram_index(&self) -> Option<&BigramFilter> {
539 self.sync_data.bigram_index.as_deref()
540 }
541
542 pub fn bigram_overlay(&self) -> Option<&parking_lot::RwLock<BigramOverlay>> {
543 self.sync_data.bigram_overlay.as_deref()
544 }
545
546 pub fn get_file_mut(&mut self, index: usize) -> Option<&mut FileItem> {
547 self.sync_data.get_file_mut(index)
548 }
549
550 pub fn git_root(&self) -> Option<&Path> {
553 self.sync_data.git_workdir.as_deref()
554 }
555
556 pub fn has_explicit_cache_budget(&self) -> bool {
557 self.has_explicit_cache_budget
558 }
559
560 pub fn set_cache_budget(&mut self, budget: ContentCacheBudget) {
561 self.cache_budget = Arc::new(budget);
562 }
563
564 pub fn get_files(&self) -> &[FileItem] {
568 self.sync_data.files()
569 }
570
571 pub fn get_overflow_files(&self) -> &[FileItem] {
572 self.sync_data.overflow_files()
573 }
574
575 pub fn get_dirs(&self) -> &[DirItem] {
577 &self.sync_data.dirs
578 }
579
580 pub fn arena_bytes(&self) -> (usize, usize, usize) {
583 let chunked = self
584 .sync_data
585 .chunked_paths
586 .as_ref()
587 .map_or(0, |s| s.heap_bytes());
588
589 (chunked, 0, 0)
590 }
591
592 #[tracing::instrument(level = "debug", skip_all)]
593 pub(crate) fn for_each_dir(&self, mut f: impl FnMut(&Path) -> ControlFlow<()>) {
594 let dir_table = &self.sync_data.dirs;
595 let base = self.base_path.as_path();
596
597 if !dir_table.is_empty() {
598 let arena = self.arena_base_ptr();
599 let mut path_buf = PathBuf::with_capacity(crate::simd_path::PATH_BUF_SIZE);
600 let mut prev_relative_path = String::new();
601
602 let mut scratch_buf = [0u8; crate::simd_path::PATH_BUF_SIZE];
603 for dir_item in dir_table {
604 let full_relative_path = dir_item.read_relative_path(arena, &mut scratch_buf);
605 let relative_path = full_relative_path.trim_end_matches(std::path::is_separator);
606
607 if relative_path.is_empty() {
608 prev_relative_path.clear();
610 continue;
611 }
612
613 let mut i = common_dir_prefix_len(&prev_relative_path, relative_path);
614 if i < relative_path.len()
618 && std::path::is_separator(relative_path.as_bytes()[i] as char)
619 {
620 i += 1;
621 }
622
623 while i < relative_path.len() {
626 let next_sep = relative_path[i..]
627 .find(std::path::is_separator)
628 .map(|off| i + off)
629 .unwrap_or(relative_path.len());
630 let ancestor_rel = &relative_path[..next_sep];
631
632 path_buf.clear();
633 path_buf.push(base);
634 path_buf.push(ancestor_rel);
635
636 if matches!(f(path_buf.as_path()), ControlFlow::Break(())) {
638 return;
639 }
640
641 i = next_sep + 1;
642 }
643
644 prev_relative_path.clear();
645 prev_relative_path.push_str(relative_path);
646 }
647 return;
648 }
649
650 let files = self.sync_data.files();
653 let arena = self.arena_base_ptr();
654 let mut current = self.base_path.clone();
655 let mut path_buf = [0u8; PATH_BUF_SIZE];
656
657 for file in files {
658 let abs = file.write_absolute_path(arena, base, &mut path_buf);
659 let Some(parent) = abs.parent() else {
660 continue;
661 };
662 if parent == current.as_path() {
663 continue;
664 }
665
666 while current.as_path() != base && !parent.starts_with(¤t) {
667 current.pop();
668 }
669
670 let Ok(remainder) = parent.strip_prefix(¤t) else {
671 continue;
672 };
673 for component in remainder.components() {
674 current.push(component);
675 if matches!(f(current.as_path()), ControlFlow::Break(())) {
676 return;
677 }
678 }
679 }
680 }
681
682 pub fn new(options: FilePickerOptions) -> Result<Self, Error> {
686 let path = PathBuf::from(&options.base_path);
687 if !path.exists() {
688 error!("Base path does not exist: {}", options.base_path);
689 return Err(Error::InvalidPath(path));
690 }
691 if path.parent().is_none() {
692 error!("Refusing to index filesystem root: {}", path.display());
693 return Err(Error::FilesystemRoot(path));
694 }
695
696 #[cfg(windows)]
700 let path = crate::path_utils::canonicalize(&path).unwrap_or(path);
701
702 let has_explicit_budget = options.cache_budget.is_some();
703 let initial_budget = options.cache_budget.unwrap_or_default();
704
705 Ok(FilePicker {
706 background_watcher: None,
707 base_path: path,
708 cache_budget: Arc::new(initial_budget),
709 has_explicit_cache_budget: has_explicit_budget,
710 signals: crate::scan::ScanSignals::default(),
711 mode: options.mode,
712 scanned_files_count: Arc::new(AtomicUsize::new(0)),
713 sync_data: FileSync::new(),
714 enable_mmap_cache: options.enable_mmap_cache,
715 enable_content_indexing: options.enable_content_indexing,
716 watch: options.watch,
717 })
718 }
719
720 pub fn new_with_shared_state(
723 shared_picker: SharedFilePicker,
724 shared_frecency: SharedFrecency,
725 options: FilePickerOptions,
726 ) -> Result<(), Error> {
727 let picker = Self::new(options)?;
728
729 info!(
730 "Spawning background threads: base_path={}, warmup={}, content_indexing={}, mode={:?}",
731 picker.base_path.display(),
732 picker.enable_mmap_cache,
733 picker.enable_content_indexing,
734 picker.mode,
735 );
736
737 let warmup = picker.enable_mmap_cache;
738 let content_indexing = picker.enable_content_indexing;
739 let watch = picker.watch;
740 let mode = picker.mode;
741
742 let signals = picker.scan_signals();
743 let scanned_files_counter = picker.scanned_files_counter();
744 let path = picker.base_path.clone();
745
746 {
747 let mut guard = shared_picker.write()?;
748 *guard = Some(picker);
749 }
750
751 ScanJob::new_initial(
756 shared_picker,
757 shared_frecency,
758 path,
759 mode,
760 signals,
761 scanned_files_counter,
762 ScanConfig {
763 warmup,
764 content_indexing,
765 watch,
766 auto_cache_budget: true,
767 install_watcher: true,
768 },
769 )
770 .spawn();
771
772 Ok(())
773 }
774
775 pub fn collect_files(&mut self) -> Result<(), Error> {
784 self.signals.scanning.store(true, Ordering::Relaxed);
785 self.scanned_files_count.store(0, Ordering::Relaxed);
786
787 let git_workdir = FileSync::discover_git_workdir(&self.base_path);
788 let git_handle = git_workdir.clone().map(FileSync::spawn_git_status);
789
790 let empty_frecency = SharedFrecency::default();
791 let sync = FileSync::walk_filesystem(
792 &self.base_path,
793 git_workdir,
794 &self.scanned_files_count,
795 &empty_frecency,
796 self.mode,
797 )?;
798
799 self.sync_data = sync;
800
801 if !self.has_explicit_cache_budget {
804 let file_count = self.sync_data.files().len();
805 self.cache_budget = Arc::new(ContentCacheBudget::new_for_repo(file_count));
806 } else {
807 self.cache_budget.reset();
808 }
809
810 if let Some(handle) = git_handle
812 && let Ok(Some(git_cache)) = handle.join()
813 {
814 let arena = self.arena_base_ptr();
815 for file in self.sync_data.files.iter_mut() {
816 file.git_status =
817 git_cache.lookup_status(&file.absolute_path(arena, &self.base_path));
818 }
819 }
820
821 self.signals.scanning.store(false, Ordering::Relaxed);
822 Ok(())
823 }
824
825 pub fn spawn_background_watcher(
831 &mut self,
832 shared_picker: &SharedFilePicker,
833 shared_frecency: &SharedFrecency,
834 ) -> Result<(), Error> {
835 let git_workdir = self.sync_data.git_workdir.clone();
836 let watcher = BackgroundWatcher::new(
837 self.base_path.clone(),
838 git_workdir,
839 shared_picker.clone(),
840 shared_frecency.clone(),
841 self.mode,
842 )?;
843 self.background_watcher = Some(watcher);
844
845 #[cfg(target_os = "macos")]
849 std::thread::sleep(std::time::Duration::from_millis(500));
850
851 self.signals.watcher_ready.store(true, Ordering::Release);
852 Ok(())
853 }
854
855 pub fn fuzzy_search<'q>(
861 &self,
862 query: &'q FfsQuery<'q>,
863 query_tracker: Option<&QueryTracker>,
864 options: FuzzySearchOptions<'q>,
865 ) -> SearchResult<'_> {
866 let files = self.get_files();
867 let max_threads = if options.max_threads == 0 {
868 std::thread::available_parallelism()
869 .map(|n| n.get())
870 .unwrap_or(4)
871 } else {
872 options.max_threads
873 };
874
875 debug!(
876 raw_query = ?query.raw_query,
877 pagination = ?options.pagination,
878 ?max_threads,
879 current_file = ?options.current_file,
880 "Fuzzy search",
881 );
882
883 let total_files = files.len();
884 let location = query.location;
885
886 let effective_query = match &query.fuzzy_query {
888 ffs_query_parser::FuzzyQuery::Text(t) => *t,
889 ffs_query_parser::FuzzyQuery::Parts(parts) if !parts.is_empty() => parts[0],
890 _ => query.raw_query.trim(),
891 };
892
893 let max_typos = (effective_query.len() as u16 / 4).clamp(2, 6);
895 let last_same_query_entry =
897 query_tracker
898 .zip(options.project_path)
899 .and_then(|(tracker, project_path)| {
900 tracker
901 .get_last_query_entry(
902 query.raw_query,
903 project_path,
904 options.min_combo_count,
905 )
906 .ok()
907 .flatten()
908 });
909
910 let context = ScoringContext {
911 query,
912 max_typos,
913 max_threads,
914 project_path: options.project_path,
915 current_file: options.current_file,
916 last_same_query_match: last_same_query_entry,
917 combo_boost_score_multiplier: options.combo_boost_score_multiplier,
918 min_combo_count: options.min_combo_count,
919 pagination: options.pagination,
920 };
921
922 let time = std::time::Instant::now();
923
924 let base_arena = self.sync_data.arena_base_ptr();
925 let overflow_arena = self
926 .sync_data
927 .overflow_builder
928 .as_ref()
929 .map(|b| b.as_arena_ptr())
930 .unwrap_or(base_arena);
931
932 let (items, scores, total_matched) = fuzzy_match_and_score_files(
933 files,
934 &context,
935 self.sync_data.base_count,
936 base_arena,
937 overflow_arena,
938 );
939
940 info!(
941 ?query,
942 completed_in = ?time.elapsed(),
943 total_matched,
944 returned_count = items.len(),
945 pagination = ?options.pagination,
946 "Fuzzy search completed",
947 );
948
949 SearchResult {
950 items,
951 scores,
952 total_matched,
953 total_files,
954 location,
955 }
956 }
957
958 pub fn fuzzy_search_directories<'q>(
962 &self,
963 query: &'q FfsQuery<'q>,
964 options: FuzzySearchOptions<'q>,
965 ) -> DirSearchResult<'_> {
966 let dirs = self.get_dirs();
967 let max_threads = if options.max_threads == 0 {
968 std::thread::available_parallelism()
969 .map(|n| n.get())
970 .unwrap_or(4)
971 } else {
972 options.max_threads
973 };
974
975 let total_dirs = dirs.len();
976
977 let effective_query = match &query.fuzzy_query {
978 ffs_query_parser::FuzzyQuery::Text(t) => *t,
979 ffs_query_parser::FuzzyQuery::Parts(parts) if !parts.is_empty() => parts[0],
980 _ => query.raw_query.trim(),
981 };
982
983 let max_typos = (effective_query.len() as u16 / 4).clamp(2, 6);
984
985 let context = ScoringContext {
986 query,
987 max_typos,
988 max_threads,
989 project_path: options.project_path,
990 current_file: options.current_file,
991 last_same_query_match: None,
992 combo_boost_score_multiplier: 0,
993 min_combo_count: 0,
994 pagination: options.pagination,
995 };
996
997 let arena = self.sync_data.arena_base_ptr();
998 let time = std::time::Instant::now();
999
1000 let (items, scores, total_matched) =
1001 crate::score::fuzzy_match_and_score_dirs(dirs, &context, arena);
1002
1003 info!(
1004 ?query,
1005 completed_in = ?time.elapsed(),
1006 total_matched,
1007 returned_count = items.len(),
1008 "Directory search completed",
1009 );
1010
1011 DirSearchResult {
1012 items,
1013 scores,
1014 total_matched,
1015 total_dirs,
1016 }
1017 }
1018
1019 pub fn fuzzy_search_mixed<'q>(
1029 &self,
1030 query: &'q FfsQuery<'q>,
1031 query_tracker: Option<&QueryTracker>,
1032 options: FuzzySearchOptions<'q>,
1033 ) -> MixedSearchResult<'_> {
1034 let location = query.location;
1035 let page_offset = options.pagination.offset;
1036 let page_limit = if options.pagination.limit > 0 {
1037 options.pagination.limit
1038 } else {
1039 100
1040 };
1041
1042 let dirs_only =
1043 query.raw_query.ends_with(std::path::MAIN_SEPARATOR) || query.raw_query.ends_with('/');
1044
1045 let internal_limit = page_offset.saturating_add(page_limit).saturating_mul(2);
1047
1048 let dir_options = FuzzySearchOptions {
1049 pagination: PaginationArgs {
1050 offset: 0,
1051 limit: internal_limit,
1052 },
1053 ..options
1054 };
1055 let dir_results = self.fuzzy_search_directories(query, dir_options);
1056
1057 if dirs_only {
1058 let total_matched = dir_results.total_matched;
1059 let total_dirs = dir_results.total_dirs;
1060
1061 let mut merged: Vec<(MixedItemRef<'_>, Score)> =
1062 Vec::with_capacity(dir_results.items.len());
1063 for (dir, score) in dir_results.items.into_iter().zip(dir_results.scores) {
1064 merged.push((MixedItemRef::Dir(dir), score));
1065 }
1066
1067 if page_offset >= merged.len() {
1068 return MixedSearchResult {
1069 items: vec![],
1070 scores: vec![],
1071 total_matched,
1072 total_files: self.sync_data.files().len(),
1073 total_dirs,
1074 location,
1075 };
1076 }
1077
1078 let end = (page_offset + page_limit).min(merged.len());
1079 let page = merged.drain(page_offset..end);
1080 let (items, scores): (Vec<_>, Vec<_>) = page.unzip();
1081
1082 return MixedSearchResult {
1083 items,
1084 scores,
1085 total_matched,
1086 total_files: self.sync_data.files().len(),
1087 total_dirs,
1088 location,
1089 };
1090 }
1091
1092 let file_options = FuzzySearchOptions {
1093 pagination: PaginationArgs {
1094 offset: 0,
1095 limit: internal_limit,
1096 },
1097 ..options
1098 };
1099 let file_results = self.fuzzy_search(query, query_tracker, file_options);
1100
1101 let total_matched = file_results.total_matched + dir_results.total_matched;
1103 let total_files = file_results.total_files;
1104 let total_dirs = dir_results.total_dirs;
1105
1106 let mut merged: Vec<(MixedItemRef<'_>, Score)> =
1107 Vec::with_capacity(file_results.items.len() + dir_results.items.len());
1108
1109 for (file, score) in file_results.items.into_iter().zip(file_results.scores) {
1110 merged.push((MixedItemRef::File(file), score));
1111 }
1112 for (dir, score) in dir_results.items.into_iter().zip(dir_results.scores) {
1113 merged.push((MixedItemRef::Dir(dir), score));
1114 }
1115
1116 merged.sort_unstable_by_key(|b| std::cmp::Reverse(b.1.total));
1118
1119 if page_offset >= merged.len() {
1121 return MixedSearchResult {
1122 items: vec![],
1123 scores: vec![],
1124 total_matched,
1125 total_files,
1126 total_dirs,
1127 location,
1128 };
1129 }
1130
1131 let end = (page_offset + page_limit).min(merged.len());
1132 let page = merged.drain(page_offset..end);
1133 let (items, scores): (Vec<_>, Vec<_>) = page.unzip();
1134
1135 MixedSearchResult {
1136 items,
1137 scores,
1138 total_matched,
1139 total_files,
1140 total_dirs,
1141 location,
1142 }
1143 }
1144
1145 pub fn grep(&self, query: &FfsQuery<'_>, options: &GrepSearchOptions) -> GrepResult<'_> {
1150 let overlay_guard = self.sync_data.bigram_overlay.as_ref().map(|o| o.read());
1151 let arena = self.arena_base_ptr();
1152 let overflow_arena = self.sync_data.overflow_arena_ptr();
1153 let cancel = options
1154 .abort_signal
1155 .as_deref()
1156 .unwrap_or(&self.signals.cancelled);
1157
1158 grep_search(
1159 self.get_files(),
1160 query,
1161 options,
1162 self.cache_budget(),
1163 self.sync_data.bigram_index.as_deref(),
1164 overlay_guard.as_deref(),
1165 cancel,
1166 &self.base_path,
1167 arena,
1168 overflow_arena,
1169 )
1170 }
1171
1172 pub fn multi_grep(
1174 &self,
1175 patterns: &[&str],
1176 constraints: &[ffs_query_parser::Constraint<'_>],
1177 options: &GrepSearchOptions,
1178 ) -> GrepResult<'_> {
1179 let overlay_guard = self.sync_data.bigram_overlay.as_ref().map(|o| o.read());
1180 let arena = self.arena_base_ptr();
1181 let overflow_arena = self.sync_data.overflow_arena_ptr();
1182 let cancel = options
1183 .abort_signal
1184 .as_deref()
1185 .unwrap_or(&self.signals.cancelled);
1186
1187 multi_grep_search(
1188 self.get_files(),
1189 patterns,
1190 constraints,
1191 options,
1192 self.cache_budget(),
1193 self.sync_data.bigram_index.as_deref(),
1194 overlay_guard.as_deref(),
1195 cancel,
1196 &self.base_path,
1197 arena,
1198 overflow_arena,
1199 )
1200 }
1201
1202 #[doc(hidden)]
1203 pub fn grep_original(
1204 &self,
1205 query: &FfsQuery<'_>,
1206 options: &GrepSearchOptions,
1207 ) -> GrepResult<'_> {
1208 let arena = self.arena_base_ptr();
1209 let overflow_arena = self.sync_data.overflow_arena_ptr();
1210 let cancel = options
1211 .abort_signal
1212 .as_deref()
1213 .unwrap_or(&self.signals.cancelled);
1214
1215 grep_search(
1216 self.get_files(),
1217 query,
1218 options,
1219 self.cache_budget(),
1220 self.sync_data.bigram_index.as_deref(),
1221 None,
1222 cancel,
1223 &self.base_path,
1224 arena,
1225 overflow_arena,
1226 )
1227 }
1228
1229 pub fn get_scan_progress(&self) -> ScanProgress {
1231 let scanned_count = self.scanned_files_count.load(Ordering::Relaxed);
1232 let is_scanning = self.signals.scanning.load(Ordering::Relaxed);
1233 ScanProgress {
1234 scanned_files_count: scanned_count,
1235 is_scanning,
1236 is_watcher_ready: self.signals.watcher_ready.load(Ordering::Relaxed),
1237 is_warmup_complete: self.sync_data.bigram_index.is_some(),
1238 }
1239 }
1240
1241 pub(crate) fn set_bigram_index(&mut self, index: BigramFilter, overlay: BigramOverlay) {
1242 self.sync_data.bigram_index = Some(Arc::new(index));
1243 self.sync_data.bigram_overlay = Some(Arc::new(parking_lot::RwLock::new(overlay)));
1244 }
1245
1246 pub(crate) fn scan_signals(&self) -> crate::scan::ScanSignals {
1247 self.signals.clone()
1248 }
1249
1250 pub(crate) fn scanned_files_counter(&self) -> Arc<AtomicUsize> {
1251 Arc::clone(&self.scanned_files_count)
1252 }
1253
1254 pub(crate) unsafe fn post_scan_snapshot(&self) -> Option<PostScanUnsafeSnapshot> {
1272 if self
1273 .signals
1274 .post_scan_indexing_active
1275 .load(Ordering::Acquire)
1276 {
1277 tracing::error!(
1278 "Can not acquire post scan unsafe snapshot, someone already acquired it"
1279 );
1280 return None;
1281 }
1282
1283 self.signals
1284 .post_scan_indexing_active
1285 .store(true, Ordering::Release);
1286
1287 let files = &self.sync_data.files;
1288 let dirs = &self.sync_data.dirs;
1289 Some(PostScanUnsafeSnapshot {
1290 files: files.as_ptr() as *mut FileItem,
1291 base_count: self.sync_data.base_count,
1292 indexable_count: self.sync_data.indexable_count,
1293 dirs: dirs.as_ptr(),
1294 dirs_len: dirs.len(),
1295 arena: self.sync_data.arena_base_ptr(),
1296 budget: &*self.cache_budget as *const _,
1297 base_path: self.base_path.clone(),
1298 post_scan_flag: Arc::clone(&self.signals.post_scan_indexing_active),
1299 })
1300 }
1301
1302 pub(crate) fn commit_new_sync(&mut self, sync: FileSync) {
1303 self.sync_data = sync;
1304 self.cache_budget.reset();
1305 }
1306
1307 #[inline]
1308 pub(crate) fn arena_base_ptr(&self) -> ArenaPtr {
1309 self.sync_data.arena_base_ptr()
1310 }
1311
1312 pub(crate) fn update_git_statuses(
1314 &mut self,
1315 status_cache: GitStatusCache,
1316 shared_frecency: &SharedFrecency,
1317 ) -> Result<(), Error> {
1318 debug!(
1319 statuses_count = status_cache.statuses_len(),
1320 "Updating git status",
1321 );
1322
1323 let mode = self.mode;
1324 let bp = self.base_path.clone();
1325 let arena = self.arena_base_ptr();
1326 let frecency = shared_frecency.read()?;
1327 status_cache
1328 .into_iter()
1329 .try_for_each(|(path, status)| -> Result<(), Error> {
1330 if let Some(file) = self.get_mut_file_by_path(&path) {
1331 file.git_status = Some(status);
1332 if let Some(ref f) = *frecency {
1333 file.update_frecency_scores(f, arena, &bp, mode)?;
1334 }
1335 let score = file.access_frecency_score as i32;
1337 let dir_idx = file.parent_dir_index() as usize;
1338 if let Some(dir) = self.sync_data.dirs.get_mut(dir_idx) {
1339 dir.update_frecency_if_larger(score);
1340 }
1341 } else {
1342 debug!(?path, "Git status for path not in index, skipping");
1346 }
1347 Ok(())
1348 })?;
1349
1350 Ok(())
1351 }
1352
1353 pub fn update_single_file_frecency(
1354 &mut self,
1355 file_path: impl AsRef<Path>,
1356 frecency_tracker: &FrecencyTracker,
1357 ) -> Result<(), Error> {
1358 let path = file_path.as_ref();
1359 let arena = self.arena_base_ptr();
1360 let rel = self.to_relative_path(path);
1361 let rel_ref: &str = rel.as_deref().unwrap_or("");
1362 let index = self
1363 .sync_data
1364 .find_file_index(path, &self.base_path)
1365 .ok()
1366 .or_else(|| self.sync_data.find_overflow_index(rel_ref));
1367 if let Some(index) = index
1368 && let Some(file) = self.sync_data.get_file_mut(index)
1369 {
1370 file.update_frecency_scores(frecency_tracker, arena, &self.base_path, self.mode)?;
1371
1372 let score = file.access_frecency_score as i32;
1374 let dir_idx = file.parent_dir_index() as usize;
1375 if let Some(dir) = self.sync_data.dirs.get_mut(dir_idx) {
1376 dir.update_frecency_if_larger(score);
1377 }
1378 }
1379
1380 Ok(())
1381 }
1382
1383 pub fn get_file_by_path(&self, path: impl AsRef<Path>) -> Option<&FileItem> {
1384 self.sync_data
1385 .find_file_index(path.as_ref(), &self.base_path)
1386 .ok()
1387 .and_then(|index| self.sync_data.files().get(index))
1388 }
1389
1390 pub fn get_mut_file_by_path(&mut self, path: impl AsRef<Path>) -> Option<&mut FileItem> {
1391 let path = path.as_ref();
1392 let rel = self.to_relative_path(path);
1393 let rel_ref: &str = rel.as_deref().unwrap_or("");
1394 let index = self
1395 .sync_data
1396 .find_file_index(path, &self.base_path)
1397 .ok()
1398 .or_else(|| self.sync_data.find_overflow_index(rel_ref));
1399 index.and_then(|i| self.sync_data.get_file_mut(i))
1400 }
1401
1402 #[tracing::instrument(skip(self),level = Level::DEBUG)]
1406 pub fn handle_create_or_modify(&mut self, path: impl AsRef<Path> + Debug) -> Option<&FileItem> {
1407 let path = path.as_ref();
1408
1409 if let Ok(idx) = self.sync_data.find_file_index(path, &self.base_path) {
1410 return self.handle_file_modify(path, FileSlot::Base(idx));
1411 }
1412
1413 let relative_path = self.to_relative_path(path)?;
1414 if let Some(idx) = self.sync_data.find_overflow_index(&relative_path) {
1415 return self.handle_file_modify(path, FileSlot::Overflow(idx));
1416 }
1417
1418 self.add_new_file(path)
1419 }
1420
1421 #[tracing::instrument(skip_all, fields(path = ?path), level = Level::DEBUG)]
1422 fn handle_file_modify(&mut self, path: &Path, slot: FileSlot) -> Option<&FileItem> {
1423 let overlay = self.sync_data.bigram_overlay.as_ref().map(Arc::clone);
1424 let pos = slot.index();
1425 let file = self.sync_data.get_file_mut(pos)?;
1426
1427 let metadata = std::fs::metadata(path)
1428 .inspect_err(|e| {
1429 tracing::error!(
1430 ?e,
1431 "File market for modification doesn't exists or not accessible"
1432 )
1433 })
1434 .ok()?; let size = metadata.len();
1437 let modified_time = metadata
1438 .modified()
1439 .ok()
1440 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
1441 .map(|d| d.as_secs());
1442
1443 if file.is_deleted() {
1444 file.set_deleted(false);
1445 }
1446
1447 file.update_metadata(&self.cache_budget, modified_time, Some(size));
1448
1449 if matches!(slot, FileSlot::Base(_))
1451 && let Some(ref overlay) = overlay
1452 {
1453 let in_indexable = {
1454 let guard = overlay.read();
1455 pos < guard.base_file_count()
1456 };
1457
1458 if in_indexable && let Ok(content) = std::fs::read(path) {
1459 overlay.write().modify_file(pos, &content);
1460 }
1461 }
1462
1463 Some(&*self.sync_data.get_file_mut(pos)?)
1464 }
1465
1466 #[tracing::instrument(skip(self))]
1469 pub fn add_new_file(&mut self, path: &Path) -> Option<&FileItem> {
1470 #[cfg(windows)]
1474 let canonical_buf: Option<PathBuf> = if path.starts_with(&self.base_path) {
1475 None
1476 } else if let Ok(c) = crate::path_utils::canonicalize(path) {
1477 Some(c)
1478 } else {
1479 let parent = path.parent()?;
1480 let file_name = path.file_name()?;
1481 let mut p = crate::path_utils::canonicalize(parent).ok()?;
1482 p.push(file_name);
1483 Some(p)
1484 };
1485 #[cfg(windows)]
1486 let path_for_index: &Path = canonical_buf.as_deref().unwrap_or(path);
1487 #[cfg(not(windows))]
1488 let path_for_index: &Path = path;
1489
1490 let (mut file_item, rel_path) =
1491 FileItem::new(path_for_index.to_path_buf(), &self.base_path, None);
1492
1493 let builder = self
1495 .sync_data
1496 .overflow_builder
1497 .get_or_insert_with(|| crate::simd_path::ChunkedPathStoreBuilder::new(64));
1498
1499 let chunked_path = builder.add_file_immediate(&rel_path, file_item.path.filename_offset);
1500 file_item.set_path(chunked_path);
1501 file_item.set_overflow(true);
1502
1503 if !self.sync_data.files.push(file_item) {
1504 return None;
1505 }
1506
1507 self.sync_data.files.last()
1508 }
1509
1510 pub fn remove_file_by_path(&mut self, path: impl AsRef<Path>) -> bool {
1512 let path = path.as_ref();
1513 match self.sync_data.find_file_index(path, &self.base_path) {
1514 Ok(index) => {
1515 let file = &mut self.sync_data.files[index];
1516 file.set_deleted(true);
1517 file.git_status = None;
1526 file.invalidate_mmap(&self.cache_budget);
1527 if let Some(ref overlay) = self.sync_data.bigram_overlay {
1528 overlay.write().delete_file(index);
1529 }
1530 true
1531 }
1532 Err(_) => {
1533 let rel = self.to_relative_path(path);
1536 let rel_ref: &str = rel.as_deref().unwrap_or("");
1537 if let Some(abs_pos) = self.sync_data.find_overflow_index(rel_ref) {
1538 self.sync_data.files.remove(abs_pos);
1539 true
1540 } else {
1541 false
1542 }
1543 }
1544 }
1545 }
1546
1547 pub fn remove_all_files_in_dir(&mut self, dir: impl AsRef<Path>) -> usize {
1549 let dir_path = dir.as_ref();
1550 let relative_dir = self
1551 .to_relative_path(dir_path)
1552 .map(|c| c.into_owned())
1553 .unwrap_or_default();
1554
1555 let dir_prefix = if relative_dir.is_empty() {
1556 String::new()
1557 } else {
1558 format!("{}{}", relative_dir, std::path::MAIN_SEPARATOR)
1559 };
1560
1561 self.sync_data.retain_files_with_arena(|file, arena| {
1562 !file.relative_path_starts_with(arena, &dir_prefix)
1563 })
1564 }
1565
1566 pub fn cancel(&self) {
1568 self.signals.cancelled.store(true, Ordering::Release);
1569 }
1570
1571 pub fn stop_background_monitor(&mut self) {
1573 if let Some(mut watcher) = self.background_watcher.take() {
1574 watcher.stop();
1575 }
1576 }
1577
1578 pub fn is_scan_active(&self) -> bool {
1580 self.signals.scanning.load(Ordering::Relaxed)
1581 }
1582
1583 pub fn watcher_signal(&self) -> Arc<AtomicBool> {
1586 Arc::clone(&self.signals.watcher_ready)
1587 }
1588
1589 fn to_relative_path<'a>(&self, path: &'a Path) -> Option<std::borrow::Cow<'a, str>> {
1597 if let Ok(stripped) = path.strip_prefix(&self.base_path)
1598 && let Some(s) = stripped.to_str()
1599 {
1600 return Some(normalize_relative_path(s));
1601 }
1602
1603 #[cfg(windows)]
1604 {
1605 let rel = canonical_relative_path(path, &self.base_path)?;
1606 return Some(std::borrow::Cow::Owned(rel));
1607 }
1608
1609 #[cfg(not(windows))]
1610 None
1611 }
1612}
1613
1614#[cfg(windows)]
1623#[inline]
1624fn normalize_relative_path(s: &str) -> std::borrow::Cow<'_, str> {
1625 if s.contains('/') {
1626 std::borrow::Cow::Owned(s.replace('/', "\\"))
1627 } else {
1628 std::borrow::Cow::Borrowed(s)
1629 }
1630}
1631
1632#[cfg(not(windows))]
1633#[inline]
1634fn normalize_relative_path(s: &str) -> std::borrow::Cow<'_, str> {
1635 std::borrow::Cow::Borrowed(s)
1636}
1637
1638#[cfg(windows)]
1642fn canonical_relative_path(path: &Path, base: &Path) -> Option<String> {
1643 if let Ok(canonical) = crate::path_utils::canonicalize(path)
1644 && let Ok(stripped) = canonical.strip_prefix(base)
1645 && let Some(s) = stripped.to_str()
1646 {
1647 return Some(s.to_owned());
1648 }
1649
1650 let parent = path.parent()?;
1653 let file_name = path.file_name()?;
1654 let canonical_parent = crate::path_utils::canonicalize(parent).ok()?;
1655 let stripped_parent = canonical_parent.strip_prefix(base).ok()?;
1656 let mut rel = stripped_parent.to_path_buf();
1657 rel.push(file_name);
1658 rel.to_str().map(str::to_owned)
1659}
1660
1661#[derive(Debug, Clone, Copy)]
1662enum FileSlot {
1663 Base(usize),
1664 Overflow(usize),
1665}
1666
1667impl FileSlot {
1668 fn index(self) -> usize {
1669 match self {
1670 FileSlot::Base(i) | FileSlot::Overflow(i) => i,
1671 }
1672 }
1673}
1674
1675pub(crate) struct PostScanUnsafeSnapshot {
1681 pub files: *mut FileItem,
1682 pub base_count: usize,
1683 pub indexable_count: usize,
1684 pub dirs: *const crate::types::DirItem,
1685 pub dirs_len: usize,
1686 pub arena: ArenaPtr,
1687 pub budget: *const crate::types::ContentCacheBudget,
1688 pub base_path: PathBuf,
1689 post_scan_flag: Arc<AtomicBool>,
1692}
1693
1694impl Drop for PostScanUnsafeSnapshot {
1695 fn drop(&mut self) {
1696 self.post_scan_flag.store(false, Ordering::Release);
1697 }
1698}
1699
1700unsafe impl Send for PostScanUnsafeSnapshot {}
1703unsafe impl Sync for PostScanUnsafeSnapshot {}
1704
1705#[derive(Debug, Clone)]
1710pub struct ScanProgress {
1711 pub scanned_files_count: usize,
1712 pub is_scanning: bool,
1713 pub is_watcher_ready: bool,
1714 pub is_warmup_complete: bool,
1715}
1716
1717#[tracing::instrument(skip(files), name = "warmup_mmaps", level = Level::DEBUG)]
1728pub(crate) fn warmup_mmaps(
1729 files: &[FileItem],
1730 budget: &ContentCacheBudget,
1731 base_path: &Path,
1732 arena: ArenaPtr,
1733) {
1734 let max_files = budget.max_files;
1735 let max_bytes = budget.max_bytes;
1736 let max_file_size = budget.max_file_size;
1737
1738 let mut all: Vec<&FileItem> = files.iter().collect();
1741
1742 if all.len() > max_files {
1746 all.select_nth_unstable_by(max_files, |a, b| {
1747 let a_ok = !a.is_binary() && a.size > 0;
1748 let b_ok = !b.is_binary() && b.size > 0;
1749 match (a_ok, b_ok) {
1750 (true, false) => std::cmp::Ordering::Less,
1751 (false, true) => std::cmp::Ordering::Greater,
1752 (false, false) => std::cmp::Ordering::Equal,
1753 (true, true) => b.total_frecency_score().cmp(&a.total_frecency_score()),
1754 }
1755 });
1756 }
1757
1758 let to_warm = &all[..all.len().min(max_files)];
1759
1760 let warmed_bytes = AtomicU64::new(0);
1761 let budget_exhausted = AtomicBool::new(false);
1762
1763 BACKGROUND_THREAD_POOL.install(|| {
1764 to_warm.par_iter().for_each(|file| {
1765 if budget_exhausted.load(Ordering::Relaxed) {
1766 return;
1767 }
1768
1769 if file.is_binary() || file.size == 0 || file.size > max_file_size {
1770 return;
1771 }
1772
1773 let prev_bytes = warmed_bytes.fetch_add(file.size, Ordering::Relaxed);
1775 if prev_bytes + file.size > max_bytes {
1776 budget_exhausted.store(true, Ordering::Relaxed);
1777 return;
1778 }
1779
1780 if let Some(content) = file.get_content(arena, base_path, budget) {
1781 let _ = std::hint::black_box(content.first());
1782 }
1783 });
1784 });
1785}
1786
1787impl FileSync {
1788 pub(crate) fn discover_git_workdir(base_path: &Path) -> Option<PathBuf> {
1789 let git_workdir = Repository::discover(base_path)
1790 .ok()
1791 .and_then(|repo| repo.workdir().map(Path::to_path_buf))
1792 .map(crate::path_utils::normalize);
1793
1794 match &git_workdir {
1795 Some(workdir) => debug!("Git repository found at: {}", workdir.display()),
1796 None => warn!("No git repository found for path: {}", base_path.display()),
1797 }
1798
1799 git_workdir
1800 }
1801
1802 pub(crate) fn spawn_git_status(git_workdir: PathBuf) -> JoinHandle<Option<GitStatusCache>> {
1803 std::thread::spawn(move || {
1804 GitStatusCache::read_git_status(
1805 Some(git_workdir.as_path()),
1806 &mut crate::git::default_status_options(),
1807 )
1808 })
1809 }
1810
1811 pub(crate) fn walk_filesystem(
1815 base_path: &Path,
1816 git_workdir: Option<PathBuf>,
1817 synced_files_count: &Arc<AtomicUsize>,
1818 shared_frecency: &SharedFrecency,
1819 mode: FfsMode,
1820 ) -> Result<FileSync, Error> {
1821 use ignore::WalkBuilder;
1822
1823 let scan_start = std::time::Instant::now();
1824 info!("SCAN: Starting filesystem walk and git status (async)");
1825
1826 let is_git_repo = git_workdir.is_some();
1828 let bg_threads = BACKGROUND_THREAD_POOL.current_num_threads();
1829
1830 let mut walk_builder = WalkBuilder::new(base_path);
1831 walk_builder
1832 .hidden(!is_git_repo)
1834 .git_ignore(true)
1835 .git_exclude(true)
1836 .git_global(true)
1837 .ignore(true)
1838 .follow_links(false)
1839 .threads(bg_threads);
1840
1841 if !is_git_repo && let Some(overrides) = non_git_repo_overrides(base_path) {
1842 walk_builder.overrides(overrides);
1843 }
1844
1845 let walker = walk_builder.build_parallel();
1846 let walker_start = std::time::Instant::now();
1847 debug!("SCAN: Starting file walker");
1848
1849 let pairs = parking_lot::Mutex::new(Vec::<(FileItem, String)>::new());
1852
1853 walker.run(|| {
1854 let pairs = &pairs;
1855 let counter = Arc::clone(synced_files_count);
1856 let base_path = base_path.to_path_buf();
1857
1858 Box::new(move |result| {
1859 let Ok(entry) = result else {
1860 return ignore::WalkState::Continue;
1861 };
1862
1863 if entry.file_type().is_some_and(|ft| ft.is_file()) {
1864 let path = entry.path();
1865
1866 if is_git_file(path) {
1869 return ignore::WalkState::Continue;
1870 }
1871
1872 if !is_git_repo && is_known_binary_extension(path) {
1873 return ignore::WalkState::Continue;
1874 }
1875
1876 let metadata = entry.metadata().ok();
1877 let (file_item, rel_path) =
1878 FileItem::new_from_walk(path, &base_path, None, metadata.as_ref());
1879
1880 pairs.lock().push((file_item, rel_path));
1881 counter.fetch_add(1, Ordering::Relaxed);
1882 }
1883 ignore::WalkState::Continue
1884 })
1885 });
1886
1887 let mut pairs = pairs.into_inner();
1888 info!(
1889 "SCAN: File walking completed in {:?} for {} files",
1890 walker_start.elapsed(),
1891 pairs.len(),
1892 );
1893
1894 BACKGROUND_THREAD_POOL.install(|| {
1898 pairs.par_sort_unstable_by(|(a, path_a), (b, path_b)| {
1899 let (a_dir, a_file) = path_a.split_at(a.path.filename_offset as usize);
1901 let (b_dir, b_file) = path_b.split_at(b.path.filename_offset as usize);
1902 a_dir.cmp(b_dir).then_with(|| a_file.cmp(b_file))
1903 });
1904 });
1905
1906 let mut builder = crate::simd_path::ChunkedPathStoreBuilder::new(pairs.len());
1907 let dirs = populates_dirs_files_chunked_storage(&mut pairs, &mut builder);
1908
1909 let mut files: Vec<FileItem> = pairs.into_iter().map(|(file, _)| file).collect();
1910 let chunked_paths = builder.finish();
1911 let arena = chunked_paths.as_arena_ptr();
1912
1913 let frecency = shared_frecency
1916 .read()
1917 .map_err(|_| Error::AcquireFrecencyLock)?;
1918
1919 if let Some(frecency) = frecency.as_ref() {
1920 let dirs_ref = &dirs;
1921 BACKGROUND_THREAD_POOL.install(|| {
1922 files.par_iter_mut().for_each(|file| {
1923 let _ = file.update_frecency_scores(frecency, arena, base_path, mode);
1924 let score = file.access_frecency_score as i32;
1925 if score > 0 {
1926 let dir_idx = file.parent_dir_index() as usize;
1927 if let Some(dir) = dirs_ref.get(dir_idx) {
1928 dir.update_frecency_if_larger(score);
1929 }
1930 }
1931 });
1932 });
1933 }
1934 drop(frecency);
1935
1936 const BIGRAM_ELIGIBLE_MAX_SIZE: u64 = 10 * 1024 * 1024;
1947 let is_indexable =
1948 |f: &FileItem| !f.is_binary() && f.size > 0 && f.size <= BIGRAM_ELIGIBLE_MAX_SIZE;
1949 BACKGROUND_THREAD_POOL.install(|| {
1950 files.par_sort_unstable_by(|a, b| {
1951 (!is_indexable(a))
1953 .cmp(&!is_indexable(b))
1954 .then_with(|| a.parent_dir_index().cmp(&b.parent_dir_index()))
1955 .then_with(|| a.file_name(arena).cmp(&b.file_name(arena)))
1956 });
1957 });
1958 let indexable_count = files.partition_point(is_indexable);
1959
1960 hint_allocator_collect();
1962
1963 let file_item_size = std::mem::size_of::<FileItem>();
1964 let files_vec_bytes = files.len() * file_item_size;
1965 let dir_table_bytes = dirs.len() * std::mem::size_of::<DirItem>()
1966 + dirs
1967 .iter()
1968 .map(|d| d.relative_path(arena).len())
1969 .sum::<usize>();
1970
1971 let total_time = scan_start.elapsed();
1972 info!(
1973 "SCAN: Walk completed in {:?} ({} files, {} dirs, \
1974 chunked_store={:.2}MB, files_vec={:.2}MB, dirs={:.2}MB, FileItem={}B)",
1975 total_time,
1976 files.len(),
1977 dirs.len(),
1978 chunked_paths.heap_bytes() as f64 / 1_048_576.0,
1979 files_vec_bytes as f64 / 1_048_576.0,
1980 dir_table_bytes as f64 / 1_048_576.0,
1981 file_item_size,
1982 );
1983
1984 let base_count = files.len();
1985
1986 Ok(FileSync {
1987 files: StableVec::from_vec_with_reserve(files, MAX_OVERFLOW_FILES),
1988 indexable_count,
1989 base_count,
1990 dirs,
1991 overflow_builder: None,
1992 git_workdir,
1993 bigram_index: None,
1994 bigram_overlay: None,
1995 chunked_paths: Some(chunked_paths),
1996 })
1997 }
1998}
1999
2000fn populates_dirs_files_chunked_storage<'a>(
2003 pairs: &'a mut [(FileItem, String)],
2004 builder: &mut crate::simd_path::ChunkedPathStoreBuilder,
2005) -> Vec<DirItem> {
2006 let mut dirs: Vec<DirItem> = Vec::new();
2007
2008 let mut prev_dir: &'a str = "";
2009 let mut prev_dir_valid = false;
2010 let mut current_dir_idx: u32 = 0;
2011
2012 for (file, rel) in pairs.iter_mut() {
2013 let rel: &'a str = rel;
2014 let dir_part: &'a str = &rel[..file.path.filename_offset as usize];
2015
2016 if !prev_dir_valid || prev_dir != dir_part {
2017 let dir_string = builder.add_dir_immediate(dir_part);
2018
2019 let last_seg = if dir_part.is_empty() {
2021 0
2022 } else {
2023 let trimmed = dir_part.trim_end_matches(std::path::is_separator);
2024 trimmed
2025 .rfind(std::path::is_separator)
2026 .map(|i| i + 1)
2027 .unwrap_or(0) as u16
2028 };
2029
2030 dirs.push(DirItem::new(dir_string, last_seg));
2031 current_dir_idx = (dirs.len() - 1) as u32;
2032
2033 prev_dir = dir_part;
2034 prev_dir_valid = true;
2035 }
2036
2037 let cs = builder.add_file_immediate(rel, file.path.filename_offset);
2038
2039 file.set_path(cs);
2040 file.set_parent_dir(current_dir_idx);
2041 }
2042
2043 dirs
2044}
2045
2046#[inline]
2049fn is_known_binary_extension(path: &Path) -> bool {
2050 let Some(ext) = path.extension().and_then(|e| e.to_str()) else {
2051 return false;
2052 };
2053
2054 matches!(
2055 ext,
2056 "png" | "jpg" | "jpeg" | "gif" | "bmp" | "ico" | "webp" | "tiff" | "tif" | "avif" |
2058 "heic" | "psd" | "icns" | "cur" | "raw" | "cr2" | "nef" | "dng" |
2059 "mp4" | "avi" | "mov" | "wmv" | "mkv" | "mp3" | "wav" | "flac" | "ogg" | "m4a" |
2061 "aac" | "webm" | "flv" | "mpg" | "mpeg" | "wma" | "opus" | "pcm" | "reapeaks" |
2062 "zip" | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar" | "zst" | "lz4" | "lzma" |
2064 "cab" | "cpio" | "jsonlz4" |
2065 "deb" | "rpm" | "apk" | "dmg" | "msi" | "iso" | "nupkg" | "whl" | "egg" |
2067 "snap" | "appimage" | "flatpak" | "crx" | "pak" |
2068 "exe" | "dll" | "so" | "dylib" | "o" | "a" | "lib" | "bin" | "elf" |
2070 "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" |
2072 "db" | "sqlite" | "sqlite3" | "mdb" |
2074 "sqlite-wal" | "sqlite-shm" | "sqlite3-wal" | "sqlite3-shm" |
2076 "db-wal" | "db-shm" | "ldb" |
2077 "ttf" | "otf" | "woff" | "woff2" | "eot" |
2079 "class" | "pyc" | "pyo" | "wasm" | "dex" | "jar" | "war" |
2081 "cmi" | "cmt" | "cmti" | "cmx" | "cof" | "cot" | "cop" | "nib" |
2083 "swiftdeps" | "swiftdeps~" | "swiftdoc" | "swiftmodule" | "swiftsourceinfo" |
2084 "npy" | "npz" | "pkl" | "pickle" | "h5" | "hdf5" | "pt" | "pth" | "onnx" |
2086 "safetensors" | "tfrecord" |
2087 "glb" | "fbx" | "blend" | "blp" | "tga" |
2089 "meta" | "dat" | "tfx" | "dia" | "journal" | "toc" | "thm" | "pfl" |
2091 "shadow" | "scan" | "flm" | "bcmap" | "userinfo" |
2092 "parquet" | "arrow" | "pb" |
2094 "DS_Store" | "suo"
2096 )
2097}
2098
2099#[inline]
2102pub(crate) fn detect_binary_content(content: &[u8]) -> bool {
2103 let check_len = content.len().min(512);
2104 content[..check_len].contains(&0)
2105}
2106
2107fn common_dir_prefix_len(a: &str, b: &str) -> usize {
2121 let max = a.len().min(b.len());
2122 let a_bytes = a.as_bytes();
2123 let b_bytes = b.as_bytes();
2124 let mut last_sep = 0;
2125 let mut i = 0;
2126 while i < max && a_bytes[i] == b_bytes[i] {
2127 if std::path::is_separator(a_bytes[i] as char) {
2128 last_sep = i + 1;
2129 }
2130 i += 1;
2131 }
2132 if i == max && i > 0 {
2135 let longer = if a.len() > b.len() { a_bytes } else { b_bytes };
2136 if i < longer.len() && std::path::is_separator(longer[i] as char) {
2137 return i;
2138 }
2139 }
2140 last_sep
2141}
2142
2143pub(crate) fn hint_allocator_collect() {
2147 #[cfg(feature = "mimalloc-collect")]
2148 {
2149 BACKGROUND_THREAD_POOL.broadcast(|_| unsafe { libmimalloc_sys::mi_collect(true) });
2153
2154 unsafe { libmimalloc_sys::mi_collect(true) };
2156 }
2157}
2158
2159#[cfg(test)]
2160mod tests {
2161 use super::*;
2162
2163 #[test]
2172 fn extract_watch_dirs_includes_pure_ancestor_dirs() {
2173 let dir = tempfile::tempdir().unwrap();
2174 let base_buf = crate::path_utils::canonicalize(dir.path()).unwrap();
2178 let base = base_buf.as_path();
2179
2180 for rel in [
2196 "src/components/button.txt",
2197 "src/routes/home.txt",
2198 "lib/deep/nested/util.txt",
2199 ] {
2200 let path = base.join(rel);
2201 std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2202 std::fs::write(&path, b"x").unwrap();
2203 }
2204
2205 let mut picker = FilePicker::new(FilePickerOptions {
2206 base_path: base.to_str().unwrap().into(),
2207 watch: false,
2208 ..Default::default()
2209 })
2210 .unwrap();
2211 picker.collect_files().unwrap();
2212
2213 let mut watch_dirs: Vec<PathBuf> = Vec::new();
2214 picker.for_each_dir(|p| {
2215 watch_dirs.push(p.to_path_buf());
2216 std::ops::ControlFlow::Continue(())
2217 });
2218 let watch_set: std::collections::HashSet<PathBuf> = watch_dirs.iter().cloned().collect();
2219
2220 for rel in ["src/components", "src/routes", "lib/deep/nested"] {
2222 assert!(
2223 watch_set.contains(&base.join(rel)),
2224 "expected immediate parent {rel} in watch dirs, got {watch_set:?}",
2225 );
2226 }
2227
2228 for rel in ["src", "lib", "lib/deep"] {
2230 assert!(
2231 watch_set.contains(&base.join(rel)),
2232 "expected pure-ancestor {rel} in watch dirs, got {watch_set:?}",
2233 );
2234 }
2235
2236 assert_eq!(
2238 watch_dirs.len(),
2239 watch_set.len(),
2240 "duplicate watch dir emitted: {watch_dirs:?}",
2241 );
2242
2243 assert!(
2247 !watch_set.contains(base),
2248 "base path must not be in watch dirs (covered by the top-level watch call)",
2249 );
2250 }
2251
2252 #[test]
2253 fn common_dir_prefix_len_cases() {
2254 assert_eq!(common_dir_prefix_len("", ""), 0);
2255 assert_eq!(common_dir_prefix_len("", "src"), 0);
2256 assert_eq!(common_dir_prefix_len("lib", "src"), 0);
2257 assert_eq!(common_dir_prefix_len("src/components", "src/routes"), 4);
2258 assert_eq!(common_dir_prefix_len("lib/deep/nested", "lib/deep"), 8);
2259 assert_eq!(common_dir_prefix_len("lib/deep", "lib/deep/nested"), 8);
2260 assert_eq!(common_dir_prefix_len("lib/deep", "lib/deeper"), 4);
2261 assert_eq!(common_dir_prefix_len("src", "src"), 0);
2262 assert_eq!(common_dir_prefix_len("src", "src/x"), 3);
2264 }
2265}