Skip to main content

fff_search/
file_picker.rs

1use crate::background_watcher::BackgroundWatcher;
2use crate::error::Error;
3use crate::frecency::FrecencyTracker;
4use crate::git::GitStatusCache;
5use crate::query_tracker::QueryMatchEntry;
6use crate::score::match_and_score_files;
7use crate::types::{FileItem, PaginationArgs, ScoringContext, SearchResult};
8use crate::{SharedFrecency, SharedPicker};
9use fff_query_parser::FFFQuery;
10use git2::{Repository, Status, StatusOptions};
11use rayon::prelude::*;
12use std::fmt::Debug;
13use std::io::Read;
14use std::path::{Path, PathBuf};
15use std::sync::{
16    Arc,
17    atomic::{AtomicBool, AtomicUsize, Ordering},
18};
19use std::time::SystemTime;
20use tracing::{Level, debug, error, info, warn};
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
23pub enum FFFMode {
24    #[default]
25    Neovim,
26    Ai,
27}
28
29impl FFFMode {
30    pub fn is_ai(self) -> bool {
31        self == FFFMode::Ai
32    }
33}
34
35/// Detect if a file is binary by checking for NUL bytes in the first 512 bytes.
36/// This is the same heuristic used by git and grep — simple, fast, and sufficient.
37#[inline]
38fn detect_binary(path: &Path, size: u64) -> bool {
39    // Empty files are not binary
40    if size == 0 {
41        return false;
42    }
43
44    let Ok(file) = std::fs::File::open(path) else {
45        return false;
46    };
47    let mut reader = std::io::BufReader::with_capacity(1024, file);
48
49    let mut buf = [0u8; 512];
50    let n = reader.read(&mut buf).unwrap_or(0);
51    buf[..n].contains(&0)
52}
53
54#[derive(Debug, Clone, Copy)]
55pub struct FuzzySearchOptions<'a> {
56    pub max_threads: usize,
57    pub current_file: Option<&'a str>,
58    pub project_path: Option<&'a Path>,
59    pub last_same_query_match: Option<&'a QueryMatchEntry>,
60    pub combo_boost_score_multiplier: i32,
61    pub min_combo_count: u32,
62    pub pagination: PaginationArgs,
63}
64
65#[derive(Debug, Clone)]
66struct FileSync {
67    /// Files sorted by path for binary search
68    files: Vec<FileItem>,
69    pub git_workdir: Option<PathBuf>,
70}
71
72impl FileSync {
73    fn new() -> Self {
74        Self {
75            files: Vec::new(),
76            git_workdir: None,
77        }
78    }
79
80    /// Get all files (read-only). Files are sorted by path.
81    #[inline]
82    fn files(&self) -> &[FileItem] {
83        &self.files
84    }
85
86    fn get_file(&self, index: usize) -> Option<&FileItem> {
87        self.files.get(index)
88    }
89
90    /// Get mutable file at index
91    #[inline]
92    fn get_file_mut(&mut self, index: usize) -> Option<&mut FileItem> {
93        self.files.get_mut(index)
94    }
95
96    /// Find file index by path using binary search - O(log n)
97    #[inline]
98    fn find_file_index(&self, path: &Path) -> Result<usize, usize> {
99        self.files.binary_search_by(|f| f.path.as_path().cmp(path))
100    }
101
102    /// Get file count
103    #[inline]
104    #[allow(dead_code)]
105    fn len(&self) -> usize {
106        self.files.len()
107    }
108
109    /// Insert a file at position. Simple - no HashMap to maintain!
110    fn insert_file(&mut self, position: usize, file: FileItem) {
111        self.files.insert(position, file);
112    }
113
114    /// Remove file at index. Simple - no HashMap to maintain!
115    fn remove_file(&mut self, index: usize) {
116        if index < self.files.len() {
117            self.files.remove(index);
118        }
119    }
120
121    /// Remove files matching predicate.
122    /// Returns number of files removed.
123    fn retain_files<F>(&mut self, predicate: F) -> usize
124    where
125        F: FnMut(&FileItem) -> bool,
126    {
127        let initial_len = self.files.len();
128        self.files.retain(predicate);
129        initial_len - self.files.len()
130    }
131
132    /// Insert a file in sorted order (by path).
133    /// Returns true if inserted, false if file already exists.
134    fn insert_file_sorted(&mut self, file: FileItem) -> bool {
135        match self.find_file_index(&file.path) {
136            Ok(_) => false, // File already exists
137            Err(position) => {
138                self.insert_file(position, file);
139                true
140            }
141        }
142    }
143}
144
145impl FileItem {
146    pub fn new(path: PathBuf, base_path: &Path, git_status: Option<Status>) -> Self {
147        let relative_path = pathdiff::diff_paths(&path, base_path)
148            .unwrap_or_else(|| path.clone())
149            .to_string_lossy()
150            .into_owned();
151
152        let name = path
153            .file_name()
154            .unwrap_or_default()
155            .to_string_lossy()
156            .into_owned();
157
158        let (size, modified) = match std::fs::metadata(&path) {
159            Ok(metadata) => {
160                let size = metadata.len();
161                let modified = metadata
162                    .modified()
163                    .ok()
164                    .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
165                    .map_or(0, |d| d.as_secs());
166
167                (size, modified)
168            }
169            Err(_) => (0, 0),
170        };
171
172        let is_binary = detect_binary(&path, size);
173
174        Self::new_raw(
175            path,
176            relative_path,
177            name,
178            size,
179            modified,
180            git_status,
181            is_binary,
182        )
183    }
184
185    pub fn update_frecency_scores(
186        &mut self,
187        tracker: &FrecencyTracker,
188        mode: FFFMode,
189    ) -> Result<(), Error> {
190        self.access_frecency_score = tracker.get_access_score(&self.path, mode);
191        self.modification_frecency_score =
192            tracker.get_modification_score(self.modified, self.git_status, mode);
193        self.total_frecency_score = self.access_frecency_score + self.modification_frecency_score;
194
195        Ok(())
196    }
197}
198
199pub struct FilePicker {
200    base_path: PathBuf,
201    sync_data: FileSync,
202    is_scanning: Arc<AtomicBool>,
203    scanned_files_count: Arc<AtomicUsize>,
204    background_watcher: Option<BackgroundWatcher>,
205    warmup_mmap_cache: bool,
206    cancelled: Arc<AtomicBool>,
207    mode: FFFMode,
208}
209
210impl std::fmt::Debug for FilePicker {
211    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212        f.debug_struct("FilePicker")
213            .field("base_path", &self.base_path)
214            .field("sync_data", &self.sync_data)
215            .field("is_scanning", &self.is_scanning.load(Ordering::Relaxed))
216            .field(
217                "scanned_files_count",
218                &self.scanned_files_count.load(Ordering::Relaxed),
219            )
220            .finish_non_exhaustive()
221    }
222}
223
224impl FilePicker {
225    pub fn base_path(&self) -> &Path {
226        &self.base_path
227    }
228
229    pub fn warmup_mmap_cache(&self) -> bool {
230        self.warmup_mmap_cache
231    }
232
233    pub fn mode(&self) -> FFFMode {
234        self.mode
235    }
236
237    pub fn git_root(&self) -> Option<&Path> {
238        self.sync_data.git_workdir.as_deref()
239    }
240
241    /// Get all indexed files sorted by path.
242    /// Note: Files are stored sorted by PATH for efficient insert/remove.
243    /// For frecency-sorted results, use search() which sorts matched results.
244    pub fn get_files(&self) -> &[FileItem] {
245        self.sync_data.files()
246    }
247
248    /// Create a new FilePicker and place it into the provided shared handle.
249    ///
250    /// The background scan thread and file-system watcher write into the
251    /// provided `SharedPicker` and read frecency data from the provided
252    /// `SharedFrecency`.
253    ///
254    /// Multiple independent instances can coexist in the same process.
255    pub fn new_with_shared_state(
256        base_path: String,
257        warmup_mmap_cache: bool,
258        mode: FFFMode,
259        shared_picker: SharedPicker,
260        shared_frecency: SharedFrecency,
261    ) -> Result<(), Error> {
262        info!(
263            "Initializing FilePicker with base_path: {}, warmup: {}, mode: {:?}",
264            base_path, warmup_mmap_cache, mode
265        );
266        let path = PathBuf::from(&base_path);
267        if !path.exists() {
268            error!("Base path does not exist: {}", base_path);
269            return Err(Error::InvalidPath(path));
270        }
271
272        // Initialize scan_signal to `true` so that any `wait_for_scan` call
273        // that races with the background thread sees "scanning in progress"
274        // rather than a stale `false` (the thread hasn't started yet).
275        let scan_signal = Arc::new(AtomicBool::new(true));
276        let synced_files_count = Arc::new(AtomicUsize::new(0));
277        let cancelled = Arc::new(AtomicBool::new(false));
278
279        let picker = FilePicker {
280            base_path: path.clone(),
281            sync_data: FileSync::new(),
282            is_scanning: Arc::clone(&scan_signal),
283            scanned_files_count: Arc::clone(&synced_files_count),
284            background_watcher: None,
285            warmup_mmap_cache,
286            cancelled: Arc::clone(&cancelled),
287            mode,
288        };
289
290        // Place the picker into the shared handle before spawning the
291        // background thread so the thread can find it immediately.
292        {
293            let mut guard = shared_picker.write().map_err(|_| Error::AcquireItemLock)?;
294            *guard = Some(picker);
295        }
296
297        spawn_scan_and_watcher(
298            path.clone(),
299            Arc::clone(&scan_signal),
300            Arc::clone(&synced_files_count),
301            warmup_mmap_cache,
302            mode,
303            shared_picker,
304            shared_frecency,
305            cancelled,
306        );
307
308        Ok(())
309    }
310
311    /// Perform fuzzy search on files with a pre-parsed query.
312    ///
313    /// The query should be parsed using `QueryParser::parse()` before calling this function.
314    /// This allows the caller to handle location parsing and other preprocessing.
315    ///
316    /// # Arguments
317    /// * `files` - Slice of files to search
318    /// * `query` - The raw query string (used for max_typos calculation and debugging)
319    /// * `parsed` - Pre-parsed query result (can be None for simple single-token queries)
320    /// * `options` - Search options including pagination, threading, and scoring parameters
321    ///
322    /// # Returns
323    /// SearchResult containing matched files, scores, and location information
324    pub fn fuzzy_search<'a, 'q>(
325        files: &'a [FileItem],
326        query: &'q FFFQuery<'q>,
327        options: FuzzySearchOptions<'q>,
328    ) -> SearchResult<'a> {
329        let max_threads = if options.max_threads == 0 {
330            std::thread::available_parallelism()
331                .map(|n| n.get())
332                .unwrap_or(4)
333        } else {
334            options.max_threads
335        };
336        debug!(
337            raw_query = ?query.raw_query,
338            pagination = ?options.pagination,
339            ?max_threads,
340            current_file = ?options.current_file,
341            "Fuzzy search",
342        );
343
344        let total_files = files.len();
345
346        let location = query.location;
347
348        // Get effective query for max_typos calculation (without location suffix)
349        let effective_query = match &query.fuzzy_query {
350            fff_query_parser::FuzzyQuery::Text(t) => *t,
351            fff_query_parser::FuzzyQuery::Parts(parts) if !parts.is_empty() => parts[0],
352            _ => query.raw_query.trim(),
353        };
354
355        // small queries with a large number of results can match absolutely everything
356        let max_typos = (effective_query.len() as u16 / 4).clamp(2, 6);
357
358        let context = ScoringContext {
359            query,
360            project_path: options.project_path,
361            max_typos,
362            max_threads,
363            current_file: options.current_file,
364            last_same_query_match: options.last_same_query_match,
365            combo_boost_score_multiplier: options.combo_boost_score_multiplier,
366            min_combo_count: options.min_combo_count,
367            pagination: options.pagination,
368        };
369
370        let time = std::time::Instant::now();
371
372        let (items, scores, total_matched) = match_and_score_files(files, &context);
373
374        debug!(
375            ?query,
376            completed_in = ?time.elapsed(),
377            total_matched,
378            returned_count = items.len(),
379            pagination = ?options.pagination,
380            "Fuzzy search completed",
381        );
382
383        SearchResult {
384            items,
385            scores,
386            total_matched,
387            total_files,
388            location,
389        }
390    }
391
392    pub fn get_scan_progress(&self) -> ScanProgress {
393        let scanned_count = self.scanned_files_count.load(Ordering::Relaxed);
394        let is_scanning = self.is_scanning.load(Ordering::Relaxed);
395        ScanProgress {
396            scanned_files_count: scanned_count,
397            is_scanning,
398        }
399    }
400
401    /// Update git statuses for files, using the provided shared frecency tracker.
402    pub fn update_git_statuses(
403        &mut self,
404        status_cache: GitStatusCache,
405        shared_frecency: &SharedFrecency,
406    ) -> Result<(), Error> {
407        debug!(
408            statuses_count = status_cache.statuses_len(),
409            "Updating git status",
410        );
411
412        let mode = self.mode;
413        let frecency = shared_frecency
414            .read()
415            .map_err(|_| Error::AcquireFrecencyLock)?;
416        status_cache
417            .into_iter()
418            .try_for_each(|(path, status)| -> Result<(), Error> {
419                if let Some(file) = self.get_mut_file_by_path(&path) {
420                    file.git_status = Some(status);
421                    if let Some(ref f) = *frecency {
422                        file.update_frecency_scores(f, mode)?;
423                    }
424                } else {
425                    error!(?path, "Couldn't update the git status for path");
426                }
427                Ok(())
428            })?;
429
430        Ok(())
431    }
432
433    /// Refreshes git statuses using the provided shared picker and frecency handles.
434    pub fn refresh_git_status(
435        shared_picker: &SharedPicker,
436        shared_frecency: &SharedFrecency,
437    ) -> Result<usize, Error> {
438        let git_status = {
439            let guard = shared_picker.read().map_err(|_| Error::AcquireItemLock)?;
440            let Some(ref picker) = *guard else {
441                return Err(Error::FilePickerMissing);
442            };
443
444            debug!(
445                "Refreshing git statuses for picker: {:?}",
446                picker.git_root()
447            );
448
449            GitStatusCache::read_git_status(
450                picker.git_root(),
451                StatusOptions::new()
452                    .include_untracked(true)
453                    .recurse_untracked_dirs(true)
454                    .include_unmodified(true)
455                    .exclude_submodules(true),
456            )
457        };
458
459        let mut guard = shared_picker.write().map_err(|_| Error::AcquireItemLock)?;
460        let picker = guard.as_mut().ok_or(Error::FilePickerMissing)?;
461
462        let statuses_count = if let Some(git_status) = git_status {
463            let count = git_status.statuses_len();
464            picker.update_git_statuses(git_status, shared_frecency)?;
465            count
466        } else {
467            0
468        };
469
470        Ok(statuses_count)
471    }
472
473    pub fn update_single_file_frecency(
474        &mut self,
475        file_path: impl AsRef<Path>,
476        frecency_tracker: &FrecencyTracker,
477    ) -> Result<(), Error> {
478        if let Ok(index) = self.sync_data.find_file_index(file_path.as_ref())
479            && let Some(file) = self.sync_data.get_file_mut(index)
480        {
481            file.update_frecency_scores(frecency_tracker, self.mode)?;
482        }
483
484        Ok(())
485    }
486
487    pub fn get_file_by_path(&self, path: impl AsRef<Path>) -> Option<&FileItem> {
488        self.sync_data
489            .find_file_index(path.as_ref())
490            .ok()
491            .and_then(|index| self.sync_data.files().get(index))
492    }
493
494    pub fn get_mut_file_by_path(&mut self, path: impl AsRef<Path>) -> Option<&mut FileItem> {
495        self.sync_data
496            .find_file_index(path.as_ref())
497            .ok()
498            .and_then(|index| self.sync_data.get_file_mut(index))
499    }
500
501    /// Add a file to the picker's files in sorted order (used by background watcher)
502    pub fn add_file_sorted(&mut self, file: FileItem) -> Option<&FileItem> {
503        let path = file.path.clone();
504
505        if self.sync_data.insert_file_sorted(file) {
506            // File was inserted, look it up
507            self.sync_data
508                .find_file_index(&path)
509                .ok()
510                .and_then(|idx| self.sync_data.get_file_mut(idx))
511                .map(|file_mut| &*file_mut) // Convert &mut to &
512        } else {
513            // File already exists
514            warn!(
515                "Trying to insert a file that already exists: {}",
516                path.display()
517            );
518            self.sync_data
519                .find_file_index(&path)
520                .ok()
521                .and_then(|idx| self.sync_data.get_file_mut(idx))
522                .map(|file_mut| &*file_mut) // Convert &mut to &
523        }
524    }
525
526    #[tracing::instrument(skip(self), name = "timing_update", level = Level::DEBUG)]
527    pub fn on_create_or_modify(&mut self, path: impl AsRef<Path> + Debug) -> Option<&FileItem> {
528        let path = path.as_ref();
529        match self.sync_data.find_file_index(path) {
530            Ok(pos) => {
531                debug!(
532                    "on_create_or_modify: file EXISTS at index {}, updating metadata",
533                    pos
534                );
535                // File exists - update its metadata (doesn't change indices, safe)
536                let file = self.sync_data.get_file_mut(pos)?;
537
538                let modified = match std::fs::metadata(path) {
539                    Ok(metadata) => metadata
540                        .modified()
541                        .ok()
542                        .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok()),
543                    Err(e) => {
544                        error!("Failed to get metadata for {}: {}", path.display(), e);
545                        None
546                    }
547                };
548
549                if let Some(modified) = modified {
550                    let modified = modified.as_secs();
551                    if file.modified < modified {
552                        file.modified = modified;
553
554                        // TODO figure out if we actually need to remap the memory or invalidate
555                        // mapping here because on linux and macos with the shared map opening it
556                        // should be automatically available everywhere automatically which saves
557                        // some time from doing extra remapping on every search
558                        file.invalidate_mmap();
559                    }
560                }
561
562                Some(&*file) // Convert &mut to &
563            }
564            Err(pos) => {
565                debug!(
566                    "on_create_or_modify: file NEW, inserting at index {} (total files: {})",
567                    pos,
568                    self.sync_data.files().len()
569                );
570
571                let file_item = FileItem::new(path.to_path_buf(), &self.base_path, None);
572                let path_buf = file_item.path.clone();
573
574                self.sync_data.insert_file(pos, file_item);
575                let result = self.sync_data.get_file(pos);
576
577                if result.is_none() {
578                    error!(
579                        "on_create_or_modify: FAILED to find file after insert! path={:?}",
580                        path_buf
581                    );
582                } else {
583                    debug!("on_create_or_modify: successfully inserted and found file");
584                }
585
586                result
587            }
588        }
589    }
590
591    pub fn remove_file_by_path(&mut self, path: impl AsRef<Path>) -> bool {
592        let path = path.as_ref();
593        match self.sync_data.find_file_index(path) {
594            Ok(index) => {
595                self.sync_data.remove_file(index);
596                true
597            }
598            Err(_) => false,
599        }
600    }
601
602    // TODO make this O(n)
603    pub fn remove_all_files_in_dir(&mut self, dir: impl AsRef<Path>) -> usize {
604        let dir_path = dir.as_ref();
605        // Use the safe retain_files method which maintains both indices
606        self.sync_data
607            .retain_files(|file| !file.path.starts_with(dir_path))
608    }
609
610    /// We use this to prevent any substantial background threads from acquiring the locks
611    pub fn cancel(&self) {
612        self.cancelled.store(true, Ordering::Release);
613    }
614
615    pub fn stop_background_monitor(&mut self) {
616        if let Some(mut watcher) = self.background_watcher.take() {
617            watcher.stop();
618        }
619    }
620
621    pub fn trigger_rescan(&mut self, shared_frecency: &SharedFrecency) -> Result<(), Error> {
622        if self.is_scanning.load(Ordering::Relaxed) {
623            debug!("Scan already in progress, skipping trigger_rescan");
624            return Ok(());
625        }
626
627        self.is_scanning.store(true, Ordering::Relaxed);
628        self.scanned_files_count.store(0, Ordering::Relaxed);
629
630        let scan_result = scan_filesystem(
631            &self.base_path,
632            &self.scanned_files_count,
633            shared_frecency,
634            self.mode,
635        );
636        match scan_result {
637            Ok(sync) => {
638                info!(
639                    "Filesystem scan completed: found {} files",
640                    sync.files.len()
641                );
642
643                self.sync_data = sync;
644
645                if self.warmup_mmap_cache {
646                    // Warmup in background to avoid blocking
647                    let files = self.sync_data.files().to_vec(); // Clone all files
648                    std::thread::spawn(move || {
649                        warmup_mmaps(&files);
650                    });
651                }
652            }
653            Err(error) => error!(?error, "Failed to scan file system"),
654        }
655
656        self.is_scanning.store(false, Ordering::Relaxed);
657        Ok(())
658    }
659
660    pub fn is_scan_active(&self) -> bool {
661        self.is_scanning.load(Ordering::Relaxed)
662    }
663
664    /// Return a clone of the scanning flag so callers can poll it without
665    /// holding a lock on the picker.
666    pub fn scan_signal(&self) -> Arc<AtomicBool> {
667        Arc::clone(&self.is_scanning)
668    }
669}
670
671#[allow(unused)]
672#[derive(Debug, Clone)]
673pub struct ScanProgress {
674    pub scanned_files_count: usize,
675    pub is_scanning: bool,
676}
677
678#[allow(clippy::too_many_arguments)]
679fn spawn_scan_and_watcher(
680    base_path: PathBuf,
681    scan_signal: Arc<AtomicBool>,
682    synced_files_count: Arc<AtomicUsize>,
683    warmup_mmap_cache: bool,
684    mode: FFFMode,
685    shared_picker: SharedPicker,
686    shared_frecency: SharedFrecency,
687    cancelled: Arc<AtomicBool>,
688) {
689    std::thread::spawn(move || {
690        // scan_signal is already `true` (set by the caller before spawning)
691        // so waiters see "scanning" even before this thread is scheduled.
692        info!("Starting initial file scan");
693
694        let mut git_workdir = None;
695        match scan_filesystem(&base_path, &synced_files_count, &shared_frecency, mode) {
696            Ok(sync) => {
697                if cancelled.load(Ordering::Acquire) {
698                    info!("Scan completed but picker was replaced, discarding results");
699                    scan_signal.store(false, Ordering::Relaxed);
700                    return;
701                }
702
703                info!(
704                    "Initial filesystem scan completed: found {} files",
705                    sync.files.len()
706                );
707
708                git_workdir = sync.git_workdir.clone();
709
710                // Write results into the provided shared handle.
711                let write_result = shared_picker.write().ok().map(|mut guard| {
712                    if let Some(ref mut picker) = *guard {
713                        picker.sync_data = sync;
714                    }
715                });
716
717                if write_result.is_none() {
718                    error!("Failed to write scan results into picker");
719                }
720
721                // OPTIMIZATION: Warmup mmap cache in background to avoid blocking first grep.
722                if warmup_mmap_cache
723                    && !cancelled.load(Ordering::Acquire)
724                    && let Ok(guard) = shared_picker.read()
725                    && let Some(ref picker) = *guard
726                {
727                    warmup_mmaps(picker.sync_data.files());
728                }
729            }
730            Err(e) => {
731                error!("Initial scan failed: {:?}", e);
732            }
733        }
734        scan_signal.store(false, Ordering::Relaxed);
735
736        // Don't create a watcher if this picker instance was already replaced
737        if cancelled.load(Ordering::Acquire) {
738            info!("Picker was replaced, skipping background watcher creation");
739            return;
740        }
741
742        match BackgroundWatcher::new(
743            base_path,
744            git_workdir,
745            shared_picker.clone(),
746            shared_frecency.clone(),
747            mode,
748        ) {
749            Ok(watcher) => {
750                info!("Background file watcher initialized successfully");
751
752                // Final cancellation check: if the picker was replaced between
753                // watcher creation and this write, drop the watcher instead of
754                // storing it in the wrong picker.
755                if cancelled.load(Ordering::Acquire) {
756                    info!("Picker was replaced, dropping orphaned watcher");
757                    drop(watcher);
758                    return;
759                }
760
761                let write_result = shared_picker.write().ok().map(|mut guard| {
762                    if let Some(ref mut picker) = *guard {
763                        picker.background_watcher = Some(watcher);
764                    }
765                });
766
767                if write_result.is_none() {
768                    error!("Failed to store background watcher in picker");
769                }
770            }
771            Err(e) => {
772                error!("Failed to initialize background file watcher: {:?}", e);
773            }
774        }
775
776        // the debouncer keeps running in its own thread
777    });
778}
779
780/// Pre-populate mmap caches for all eligible files so the first grep search
781/// doesn't pay the mmap creation + page fault cost.
782///
783/// Each file is mmap'd and a single byte is read to trigger the page fault.
784/// This runs in parallel using rayon.
785#[tracing::instrument(skip(files), name = "warmup_mmaps", level = Level::DEBUG)]
786fn warmup_mmaps(files: &[FileItem]) {
787    let warmed = std::sync::atomic::AtomicUsize::new(0);
788
789    files.par_iter().for_each(|file| {
790        if file.is_binary || file.size == 0 {
791            return;
792        }
793
794        if let Some(content) = file.get_mmap() {
795            // Read the first byte to trigger the initial page fault (mmap)
796            // or ensure the content is cached (Windows buffer).
797            let _ = std::hint::black_box(content.first());
798
799            warmed.fetch_add(1, Ordering::Relaxed);
800        }
801    });
802}
803
804fn scan_filesystem(
805    base_path: &Path,
806    synced_files_count: &Arc<AtomicUsize>,
807    shared_frecency: &SharedFrecency,
808    mode: FFFMode,
809) -> Result<FileSync, Error> {
810    use ignore::{WalkBuilder, WalkState};
811    use std::thread;
812
813    let scan_start = std::time::Instant::now();
814    info!("SCAN: Starting parallel filesystem scan and git status");
815
816    // run separate thread for git status because it effectively does another separate file
817    // traversal which could be pretty slow on large repos (in general 300-500ms)
818    thread::scope(|s| {
819        let git_handle = s.spawn(|| {
820            let git_workdir = Repository::discover(base_path)
821                .ok()
822                .and_then(|repo| repo.workdir().map(Path::to_path_buf));
823
824            if let Some(ref git_dir) = git_workdir {
825                debug!("Git repository found at: {}", git_dir.display());
826            } else {
827                debug!("No git repository found for path: {}", base_path.display());
828            }
829
830            let status_cache = GitStatusCache::read_git_status(
831                git_workdir.as_deref(),
832                // do not include unmodified here to avoid extra cost
833                // we are treating all missing files as unmodified
834                StatusOptions::new()
835                    .include_untracked(true)
836                    .recurse_untracked_dirs(true)
837                    .exclude_submodules(true),
838            );
839
840            (git_workdir, status_cache)
841        });
842
843        let walker = WalkBuilder::new(base_path)
844            .hidden(false)
845            .git_ignore(true)
846            .git_exclude(true)
847            .git_global(true)
848            .ignore(true)
849            .follow_links(false)
850            .build_parallel();
851
852        let walker_start = std::time::Instant::now();
853        debug!("SCAN: Starting file walker");
854
855        let files = Arc::new(std::sync::Mutex::new(Vec::new()));
856        walker.run(|| {
857            let files = Arc::clone(&files);
858            let counter = Arc::clone(synced_files_count);
859            let base_path = base_path.to_path_buf();
860
861            Box::new(move |result| {
862                if let Ok(entry) = result
863                    && entry.file_type().is_some_and(|ft| ft.is_file())
864                {
865                    let path = entry.path();
866
867                    if is_git_file(path) {
868                        return WalkState::Continue;
869                    }
870
871                    let file_item = FileItem::new(
872                        path.to_path_buf(),
873                        &base_path,
874                        None, // Git status will be added after join
875                    );
876
877                    if let Ok(mut files_vec) = files.lock() {
878                        files_vec.push(file_item);
879                        counter.fetch_add(1, Ordering::Relaxed);
880                    }
881                }
882                WalkState::Continue
883            })
884        });
885
886        let mut files = Arc::try_unwrap(files).unwrap().into_inner().unwrap();
887        let walker_time = walker_start.elapsed();
888        info!("SCAN: File walking completed in {:?}", walker_time);
889
890        let (git_workdir, git_cache) = git_handle.join().map_err(|_| {
891            error!("Failed to join git status thread");
892            Error::ThreadPanic
893        })?;
894
895        let frecency = shared_frecency
896            .read()
897            .map_err(|_| Error::AcquireFrecencyLock)?;
898
899        files
900            .par_iter_mut()
901            .try_for_each(|file| -> Result<(), Error> {
902                if let Some(git_cache) = &git_cache {
903                    file.git_status = git_cache.lookup_status(&file.path);
904                }
905
906                if let Some(frecency) = frecency.as_ref() {
907                    file.update_frecency_scores(frecency, mode)?;
908                }
909
910                Ok(())
911            })?;
912
913        let total_time = scan_start.elapsed();
914        info!(
915            "SCAN: Total scan time {:?} for {} files",
916            total_time,
917            files.len()
918        );
919
920        files.par_sort_unstable_by(|a, b| a.path.as_os_str().cmp(b.path.as_os_str()));
921        Ok(FileSync { files, git_workdir })
922    })
923}
924
925#[inline]
926fn is_git_file(path: &Path) -> bool {
927    path.to_str().is_some_and(|path| {
928        if cfg!(target_family = "windows") {
929            path.contains("\\.git\\")
930        } else {
931            path.contains("/.git/")
932        }
933    })
934}