use crate::background_watcher::BackgroundWatcher;
use crate::error::Error;
use crate::frecency::FrecencyTracker;
use crate::git::GitStatusCache;
use crate::grep::{GrepResult, GrepSearchOptions, grep_search};
use crate::query_tracker::QueryTracker;
use crate::score::match_and_score_files;
use crate::shared::{SharedFrecency, SharedPicker};
use crate::types::{
BigramFilter, BigramIndexBuilder, BigramOverlay, ContentCacheBudget, FileItem, PaginationArgs,
ScoringContext, SearchResult,
};
use fff_query_parser::FFFQuery;
use git2::{Repository, Status, StatusOptions};
use rayon::prelude::*;
use std::fmt::Debug;
use std::path::{Path, PathBuf};
use std::sync::{
Arc,
atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
};
use std::time::SystemTime;
use tracing::{Level, debug, error, info, warn};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum FFFMode {
#[default]
Neovim,
Ai,
}
impl FFFMode {
pub fn is_ai(self) -> bool {
self == FFFMode::Ai
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct FuzzySearchOptions<'a> {
pub max_threads: usize,
pub current_file: Option<&'a str>,
pub project_path: Option<&'a Path>,
pub combo_boost_score_multiplier: i32,
pub min_combo_count: u32,
pub pagination: PaginationArgs,
}
#[derive(Debug, Clone)]
struct FileSync {
files: Vec<FileItem>,
base_count: usize,
pub git_workdir: Option<PathBuf>,
}
impl FileSync {
fn new() -> Self {
Self {
files: Vec::new(),
base_count: 0,
git_workdir: None,
}
}
#[inline]
fn files(&self) -> &[FileItem] {
&self.files
}
#[inline]
fn overflow_files(&self) -> &[FileItem] {
&self.files[self.base_count..]
}
#[allow(dead_code)]
fn get_file(&self, index: usize) -> Option<&FileItem> {
self.files.get(index)
}
#[inline]
fn get_file_mut(&mut self, index: usize) -> Option<&mut FileItem> {
self.files.get_mut(index)
}
#[inline]
fn find_file_index(&self, path: &Path) -> Result<usize, usize> {
self.files[..self.base_count].binary_search_by(|f| f.path.as_path().cmp(path))
}
fn find_overflow_index(&self, path: &Path) -> Option<usize> {
self.files[self.base_count..]
.iter()
.position(|f| f.path == path)
.map(|pos| self.base_count + pos)
}
#[inline]
#[allow(dead_code)]
fn len(&self) -> usize {
self.files.len()
}
fn insert_file(&mut self, position: usize, file: FileItem) {
self.files.insert(position, file);
}
#[allow(dead_code)]
fn remove_file(&mut self, index: usize) {
if index < self.files.len() {
self.files.remove(index);
}
}
fn retain_files<F>(&mut self, mut predicate: F) -> usize
where
F: FnMut(&FileItem) -> bool,
{
let initial_len = self.files.len();
let base_retained = self.files[..self.base_count]
.iter()
.filter(|f| predicate(f))
.count();
self.files.retain(predicate);
self.base_count = base_retained;
initial_len - self.files.len()
}
fn insert_file_sorted(&mut self, file: FileItem) -> bool {
match self.find_file_index(&file.path) {
Ok(_) => false, Err(position) => {
self.insert_file(position, file);
true
}
}
}
}
impl FileItem {
pub fn new(path: PathBuf, base_path: &Path, git_status: Option<Status>) -> Self {
let metadata = std::fs::metadata(&path).ok();
Self::new_with_metadata(path, base_path, git_status, metadata.as_ref())
}
pub fn new_with_metadata(
path: PathBuf,
base_path: &Path,
git_status: Option<Status>,
metadata: Option<&std::fs::Metadata>,
) -> Self {
let relative_path = pathdiff::diff_paths(&path, base_path)
.unwrap_or_else(|| path.clone())
.to_string_lossy()
.into_owned();
let name = path
.file_name()
.unwrap_or_default()
.to_string_lossy()
.into_owned();
let (size, modified) = match metadata {
Some(metadata) => {
let size = metadata.len();
let modified = metadata
.modified()
.ok()
.and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
.map_or(0, |d| d.as_secs());
(size, modified)
}
None => (0, 0),
};
let is_binary = is_known_binary_extension(&path);
Self::new_raw(
path,
relative_path,
name,
size,
modified,
git_status,
is_binary,
)
}
pub fn update_frecency_scores(
&mut self,
tracker: &FrecencyTracker,
mode: FFFMode,
) -> Result<(), Error> {
self.access_frecency_score = tracker.get_access_score(&self.path, mode) as i32;
self.modification_frecency_score =
tracker.get_modification_score(self.modified, self.git_status, mode) as i32;
self.total_frecency_score = self.access_frecency_score + self.modification_frecency_score;
Ok(())
}
}
pub struct FilePickerOptions {
pub base_path: String,
pub warmup_mmap_cache: bool,
pub mode: FFFMode,
pub cache_budget: Option<ContentCacheBudget>,
pub watch: bool,
}
impl Default for FilePickerOptions {
fn default() -> Self {
Self {
base_path: ".".into(),
warmup_mmap_cache: false,
mode: FFFMode::default(),
cache_budget: None,
watch: true,
}
}
}
pub struct FilePicker {
pub mode: FFFMode,
pub base_path: PathBuf,
pub is_scanning: Arc<AtomicBool>,
sync_data: FileSync,
cache_budget: Arc<ContentCacheBudget>,
has_explicit_cache_budget: bool,
watcher_ready: Arc<AtomicBool>,
scanned_files_count: Arc<AtomicUsize>,
background_watcher: Option<BackgroundWatcher>,
warmup_mmap_cache: bool,
watch: bool,
cancelled: Arc<AtomicBool>,
bigram_index: Option<Arc<BigramFilter>>,
bigram_overlay: Option<Arc<parking_lot::RwLock<BigramOverlay>>>,
}
impl std::fmt::Debug for FilePicker {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FilePicker")
.field("base_path", &self.base_path)
.field("sync_data", &self.sync_data)
.field("is_scanning", &self.is_scanning.load(Ordering::Relaxed))
.field(
"scanned_files_count",
&self.scanned_files_count.load(Ordering::Relaxed),
)
.finish_non_exhaustive()
}
}
impl FilePicker {
pub fn base_path(&self) -> &Path {
&self.base_path
}
pub fn need_warmup_mmap_cache(&self) -> bool {
self.warmup_mmap_cache
}
pub fn mode(&self) -> FFFMode {
self.mode
}
pub fn cache_budget(&self) -> &ContentCacheBudget {
&self.cache_budget
}
pub fn bigram_index(&self) -> Option<&BigramFilter> {
self.bigram_index.as_deref()
}
pub fn bigram_overlay(&self) -> Option<&parking_lot::RwLock<BigramOverlay>> {
self.bigram_overlay.as_deref()
}
pub fn get_file_mut(&mut self, index: usize) -> Option<&mut FileItem> {
self.sync_data.get_file_mut(index)
}
pub fn set_bigram_index(&mut self, index: BigramFilter, overlay: BigramOverlay) {
self.bigram_index = Some(Arc::new(index));
self.bigram_overlay = Some(Arc::new(parking_lot::RwLock::new(overlay)));
}
pub fn git_root(&self) -> Option<&Path> {
self.sync_data.git_workdir.as_deref()
}
pub fn get_files(&self) -> &[FileItem] {
self.sync_data.files()
}
pub fn get_overflow_files(&self) -> &[FileItem] {
self.sync_data.overflow_files()
}
pub fn new(options: FilePickerOptions) -> Result<Self, Error> {
let path = PathBuf::from(&options.base_path);
if !path.exists() {
error!("Base path does not exist: {}", options.base_path);
return Err(Error::InvalidPath(path));
}
let has_explicit_budget = options.cache_budget.is_some();
let initial_budget = options.cache_budget.unwrap_or_default();
Ok(FilePicker {
base_path: path,
sync_data: FileSync::new(),
is_scanning: Arc::new(AtomicBool::new(false)),
watcher_ready: Arc::new(AtomicBool::new(false)),
scanned_files_count: Arc::new(AtomicUsize::new(0)),
background_watcher: None,
warmup_mmap_cache: options.warmup_mmap_cache,
watch: options.watch,
cancelled: Arc::new(AtomicBool::new(false)),
mode: options.mode,
cache_budget: Arc::new(initial_budget),
bigram_index: None,
bigram_overlay: None,
has_explicit_cache_budget: has_explicit_budget,
})
}
pub fn new_with_shared_state(
shared_picker: SharedPicker,
shared_frecency: SharedFrecency,
options: FilePickerOptions,
) -> Result<(), Error> {
let picker = Self::new(options)?;
info!(
"Spawning background threads: base_path={}, warmup={}, mode={:?}",
picker.base_path.display(),
picker.warmup_mmap_cache,
picker.mode,
);
let warmup = picker.warmup_mmap_cache;
let watch = picker.watch;
let mode = picker.mode;
picker.is_scanning.store(true, Ordering::Release);
let scan_signal = Arc::clone(&picker.is_scanning);
let watcher_ready = Arc::clone(&picker.watcher_ready);
let synced_files_count = Arc::clone(&picker.scanned_files_count);
let cancelled = Arc::clone(&picker.cancelled);
let path = picker.base_path.clone();
{
let mut guard = shared_picker.write()?;
*guard = Some(picker);
}
spawn_scan_and_watcher(
path,
scan_signal,
watcher_ready,
synced_files_count,
warmup,
watch,
mode,
shared_picker,
shared_frecency,
cancelled,
);
Ok(())
}
pub fn collect_files(&mut self) -> Result<(), Error> {
self.is_scanning.store(true, Ordering::Relaxed);
self.scanned_files_count.store(0, Ordering::Relaxed);
let empty_frecency = SharedFrecency::default();
let walk = walk_filesystem(
&self.base_path,
&self.scanned_files_count,
&empty_frecency,
self.mode,
)?;
self.sync_data = walk.sync;
if !self.has_explicit_cache_budget {
let file_count = self.sync_data.files().len();
self.cache_budget = Arc::new(ContentCacheBudget::new_for_repo(file_count));
} else {
self.cache_budget.reset();
}
if let Ok(Some(git_cache)) = walk.git_handle.join() {
for file in self.sync_data.files.iter_mut() {
file.git_status = git_cache.lookup_status(&file.path);
}
}
self.is_scanning.store(false, Ordering::Relaxed);
Ok(())
}
pub fn spawn_background_watcher(
&mut self,
shared_picker: &SharedPicker,
shared_frecency: &SharedFrecency,
) -> Result<(), Error> {
let git_workdir = self.sync_data.git_workdir.clone();
let watcher = BackgroundWatcher::new(
self.base_path.clone(),
git_workdir,
shared_picker.clone(),
shared_frecency.clone(),
self.mode,
)?;
self.background_watcher = Some(watcher);
self.watcher_ready.store(true, Ordering::Release);
Ok(())
}
pub fn fuzzy_search<'a, 'q>(
files: &'a [FileItem],
query: &'q FFFQuery<'q>,
query_tracker: Option<&QueryTracker>,
options: FuzzySearchOptions<'q>,
) -> SearchResult<'a> {
let max_threads = if options.max_threads == 0 {
std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(4)
} else {
options.max_threads
};
debug!(
raw_query = ?query.raw_query,
pagination = ?options.pagination,
?max_threads,
current_file = ?options.current_file,
"Fuzzy search",
);
let total_files = files.len();
let location = query.location;
let effective_query = match &query.fuzzy_query {
fff_query_parser::FuzzyQuery::Text(t) => *t,
fff_query_parser::FuzzyQuery::Parts(parts) if !parts.is_empty() => parts[0],
_ => query.raw_query.trim(),
};
let max_typos = (effective_query.len() as u16 / 4).clamp(2, 6);
let last_same_query_entry =
query_tracker
.zip(options.project_path)
.and_then(|(tracker, project_path)| {
tracker
.get_last_query_entry(
query.raw_query,
project_path,
options.min_combo_count,
)
.ok()
.flatten()
});
let context = ScoringContext {
query,
max_typos,
max_threads,
project_path: options.project_path,
current_file: options.current_file,
last_same_query_match: last_same_query_entry,
combo_boost_score_multiplier: options.combo_boost_score_multiplier,
min_combo_count: options.min_combo_count,
pagination: options.pagination,
};
let time = std::time::Instant::now();
let (items, scores, total_matched) = match_and_score_files(files, &context);
info!(
?query,
completed_in = ?time.elapsed(),
total_matched,
returned_count = items.len(),
pagination = ?options.pagination,
"Fuzzy search completed",
);
SearchResult {
items,
scores,
total_matched,
total_files,
location,
}
}
pub fn grep(&self, query: &FFFQuery<'_>, options: &GrepSearchOptions) -> GrepResult<'_> {
let overlay_guard = self.bigram_overlay.as_ref().map(|o| o.read());
grep_search(
self.get_files(),
query,
options,
self.cache_budget(),
self.bigram_index.as_deref(),
overlay_guard.as_deref(),
Some(&self.cancelled),
)
}
pub fn grep_without_overlay(
&self,
query: &FFFQuery<'_>,
options: &GrepSearchOptions,
) -> GrepResult<'_> {
grep_search(
self.get_files(),
query,
options,
self.cache_budget(),
self.bigram_index.as_deref(),
None,
Some(&self.cancelled),
)
}
pub fn get_scan_progress(&self) -> ScanProgress {
let scanned_count = self.scanned_files_count.load(Ordering::Relaxed);
let is_scanning = self.is_scanning.load(Ordering::Relaxed);
ScanProgress {
scanned_files_count: scanned_count,
is_scanning,
is_watcher_ready: self.watcher_ready.load(Ordering::Relaxed),
is_warmup_complete: self.bigram_index.is_some(),
}
}
pub fn update_git_statuses(
&mut self,
status_cache: GitStatusCache,
shared_frecency: &SharedFrecency,
) -> Result<(), Error> {
debug!(
statuses_count = status_cache.statuses_len(),
"Updating git status",
);
let mode = self.mode;
let frecency = shared_frecency.read()?;
status_cache
.into_iter()
.try_for_each(|(path, status)| -> Result<(), Error> {
if let Some(file) = self.get_mut_file_by_path(&path) {
file.git_status = Some(status);
if let Some(ref f) = *frecency {
file.update_frecency_scores(f, mode)?;
}
} else {
error!(?path, "Couldn't update the git status for path");
}
Ok(())
})?;
Ok(())
}
pub fn update_single_file_frecency(
&mut self,
file_path: impl AsRef<Path>,
frecency_tracker: &FrecencyTracker,
) -> Result<(), Error> {
let path = file_path.as_ref();
let index = self
.sync_data
.find_file_index(path)
.ok()
.or_else(|| self.sync_data.find_overflow_index(path));
if let Some(index) = index
&& let Some(file) = self.sync_data.get_file_mut(index)
{
file.update_frecency_scores(frecency_tracker, self.mode)?;
}
Ok(())
}
pub fn get_file_by_path(&self, path: impl AsRef<Path>) -> Option<&FileItem> {
self.sync_data
.find_file_index(path.as_ref())
.ok()
.and_then(|index| self.sync_data.files().get(index))
}
pub fn get_mut_file_by_path(&mut self, path: impl AsRef<Path>) -> Option<&mut FileItem> {
let path = path.as_ref();
let index = self
.sync_data
.find_file_index(path)
.ok()
.or_else(|| self.sync_data.find_overflow_index(path));
index.and_then(|i| self.sync_data.get_file_mut(i))
}
pub fn add_file_sorted(&mut self, file: FileItem) -> Option<&FileItem> {
let path = file.path.clone();
if self.sync_data.insert_file_sorted(file) {
self.sync_data
.find_file_index(&path)
.ok()
.and_then(|idx| self.sync_data.get_file_mut(idx))
.map(|file_mut| &*file_mut) } else {
warn!(
"Trying to insert a file that already exists: {}",
path.display()
);
self.sync_data
.find_file_index(&path)
.ok()
.and_then(|idx| self.sync_data.get_file_mut(idx))
.map(|file_mut| &*file_mut) }
}
#[tracing::instrument(skip(self), name = "timing_update", level = Level::DEBUG)]
pub fn on_create_or_modify(&mut self, path: impl AsRef<Path> + Debug) -> Option<&FileItem> {
let path = path.as_ref();
if let Ok(pos) = self.sync_data.find_file_index(path) {
let file = self.sync_data.get_file_mut(pos)?;
if file.is_deleted {
file.is_deleted = false;
debug!(
"on_create_or_modify: resurrected tombstoned file at index {}",
pos
);
}
debug!(
"on_create_or_modify: file EXISTS at index {}, updating metadata",
pos
);
let modified = match std::fs::metadata(path) {
Ok(metadata) => metadata
.modified()
.ok()
.and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok()),
Err(e) => {
error!("Failed to get metadata for {}: {}", path.display(), e);
None
}
};
if let Some(modified) = modified {
let modified = modified.as_secs();
if file.modified < modified {
file.modified = modified;
file.invalidate_mmap(&self.cache_budget);
}
}
if let Some(ref overlay) = self.bigram_overlay
&& let Ok(content) = std::fs::read(path)
{
overlay.write().modify_file(pos, &content);
}
return Some(&*file);
}
if let Some(abs_pos) = self.sync_data.find_overflow_index(path) {
let file = &mut self.sync_data.files[abs_pos];
let modified = std::fs::metadata(path)
.ok()
.and_then(|m| m.modified().ok())
.and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok());
if let Some(modified) = modified {
let modified = modified.as_secs();
if file.modified < modified {
file.modified = modified;
file.invalidate_mmap(&self.cache_budget);
}
}
if let Some(ref overlay) = self.bigram_overlay
&& let Ok(content) = std::fs::read(path)
{
let overflow_pos = abs_pos - self.sync_data.base_count;
let bigrams = crate::types::extract_bigrams(&content);
overlay.write().update_added(overflow_pos, bigrams);
}
return Some(&self.sync_data.files[abs_pos]);
}
debug!(
"on_create_or_modify: file NEW, appending to overflow (base: {}, overflow: {})",
self.sync_data.base_count,
self.sync_data.overflow_files().len(),
);
let file_item = FileItem::new(path.to_path_buf(), &self.base_path, None);
self.sync_data.files.push(file_item);
if let Some(ref overlay) = self.bigram_overlay {
let content = std::fs::read(path).unwrap_or_default();
overlay.write().add_file(&content);
}
self.sync_data.files.last()
}
pub fn remove_file_by_path(&mut self, path: impl AsRef<Path>) -> bool {
let path = path.as_ref();
match self.sync_data.find_file_index(path) {
Ok(index) => {
let file = &mut self.sync_data.files[index];
file.is_deleted = true;
file.invalidate_mmap(&self.cache_budget);
if let Some(ref overlay) = self.bigram_overlay {
overlay.write().delete_file(index);
}
true
}
Err(_) => {
if let Some(abs_pos) = self.sync_data.find_overflow_index(path) {
let overflow_pos = abs_pos - self.sync_data.base_count;
self.sync_data.files.remove(abs_pos);
if let Some(ref overlay) = self.bigram_overlay {
overlay.write().remove_added(overflow_pos);
}
true
} else {
false
}
}
}
}
pub fn remove_all_files_in_dir(&mut self, dir: impl AsRef<Path>) -> usize {
let dir_path = dir.as_ref();
self.sync_data
.retain_files(|file| !file.path.starts_with(dir_path))
}
pub fn cancel(&self) {
self.cancelled.store(true, Ordering::Release);
}
pub fn stop_background_monitor(&mut self) {
if let Some(mut watcher) = self.background_watcher.take() {
watcher.stop();
}
}
pub fn trigger_rescan(&mut self, shared_frecency: &SharedFrecency) -> Result<(), Error> {
if self.is_scanning.load(Ordering::Relaxed) {
debug!("Scan already in progress, skipping trigger_rescan");
return Ok(());
}
self.is_scanning.store(true, Ordering::Relaxed);
self.scanned_files_count.store(0, Ordering::Relaxed);
let walk_result = walk_filesystem(
&self.base_path,
&self.scanned_files_count,
shared_frecency,
self.mode,
);
match walk_result {
Ok(walk) => {
info!(
"Filesystem rescan completed: found {} files",
walk.sync.files.len()
);
self.sync_data = walk.sync;
self.cache_budget.reset();
if let Ok(Some(git_cache)) = walk.git_handle.join() {
let frecency = shared_frecency.read().ok();
let frecency_ref = frecency.as_ref().and_then(|f| f.as_ref());
self.sync_data.files.par_iter_mut().for_each(|file| {
file.git_status = git_cache.lookup_status(&file.path);
if let Some(frecency) = frecency_ref {
let _ = file.update_frecency_scores(frecency, self.mode);
}
});
}
if self.warmup_mmap_cache {
let files = self.sync_data.files().to_vec();
let budget = Arc::clone(&self.cache_budget);
std::thread::spawn(move || {
warmup_mmaps(&files, &budget);
});
}
}
Err(error) => error!(?error, "Failed to scan file system"),
}
self.is_scanning.store(false, Ordering::Relaxed);
Ok(())
}
pub fn is_scan_active(&self) -> bool {
self.is_scanning.load(Ordering::Relaxed)
}
pub fn scan_signal(&self) -> Arc<AtomicBool> {
Arc::clone(&self.is_scanning)
}
pub fn watcher_signal(&self) -> Arc<AtomicBool> {
Arc::clone(&self.watcher_ready)
}
}
#[allow(unused)]
#[derive(Debug, Clone)]
pub struct ScanProgress {
pub scanned_files_count: usize,
pub is_scanning: bool,
pub is_watcher_ready: bool,
pub is_warmup_complete: bool,
}
#[allow(clippy::too_many_arguments)]
fn spawn_scan_and_watcher(
base_path: PathBuf,
scan_signal: Arc<AtomicBool>,
watcher_ready: Arc<AtomicBool>,
synced_files_count: Arc<AtomicUsize>,
warmup_mmap_cache: bool,
watch: bool,
mode: FFFMode,
shared_picker: SharedPicker,
shared_frecency: SharedFrecency,
cancelled: Arc<AtomicBool>,
) {
std::thread::spawn(move || {
info!("Starting initial file scan");
let git_workdir;
match walk_filesystem(&base_path, &synced_files_count, &shared_frecency, mode) {
Ok(walk) => {
if cancelled.load(Ordering::Acquire) {
info!("Walk completed but picker was replaced, discarding results");
scan_signal.store(false, Ordering::Relaxed);
return;
}
info!(
"Initial filesystem walk completed: found {} files",
walk.sync.files.len()
);
git_workdir = walk.sync.git_workdir.clone();
let git_handle = walk.git_handle;
let write_result = shared_picker.write().ok().map(|mut guard| {
if let Some(ref mut picker) = *guard {
picker.sync_data = walk.sync;
picker.cache_budget.reset();
}
});
if write_result.is_none() {
error!("Failed to write scan results into picker");
}
scan_signal.store(false, Ordering::Relaxed);
info!("Files indexed and searchable");
if !cancelled.load(Ordering::Acquire) {
apply_git_status(&shared_picker, &shared_frecency, git_handle, mode);
}
}
Err(e) => {
error!("Initial scan failed: {:?}", e);
scan_signal.store(false, Ordering::Relaxed);
watcher_ready.store(true, Ordering::Release);
return;
}
}
if watch && !cancelled.load(Ordering::Acquire) {
match BackgroundWatcher::new(
base_path,
git_workdir,
shared_picker.clone(),
shared_frecency.clone(),
mode,
) {
Ok(watcher) => {
info!("Background file watcher initialized successfully");
if cancelled.load(Ordering::Acquire) {
info!("Picker was replaced, dropping orphaned watcher");
drop(watcher);
watcher_ready.store(true, Ordering::Release);
return;
}
let write_result = shared_picker.write().ok().map(|mut guard| {
if let Some(ref mut picker) = *guard {
picker.background_watcher = Some(watcher);
}
});
if write_result.is_none() {
error!("Failed to store background watcher in picker");
}
}
Err(e) => {
error!("Failed to initialize background file watcher: {:?}", e);
}
}
}
watcher_ready.store(true, Ordering::Release);
if warmup_mmap_cache && !cancelled.load(Ordering::Acquire) {
let phase_start = std::time::Instant::now();
if let Ok(mut guard) = shared_picker.write()
&& let Some(ref mut picker) = *guard
&& !picker.has_explicit_cache_budget
{
let file_count = picker.sync_data.files().len();
picker.cache_budget = Arc::new(ContentCacheBudget::new_for_repo(file_count));
info!(
"Cache budget configured for {} files: max_files={}, max_bytes={}",
file_count, picker.cache_budget.max_files, picker.cache_budget.max_bytes,
);
}
if !cancelled.load(Ordering::Acquire)
&& let Ok(guard) = shared_picker.read()
&& let Some(ref picker) = *guard
{
let warmup_start = std::time::Instant::now();
warmup_mmaps(picker.sync_data.files(), &picker.cache_budget);
info!(
"Warmup completed in {:.2}s (cached {} files, {} bytes)",
warmup_start.elapsed().as_secs_f64(),
picker.cache_budget.cached_count.load(Ordering::Relaxed),
picker.cache_budget.cached_bytes.load(Ordering::Relaxed),
);
}
if !cancelled.load(Ordering::Acquire) {
let snapshot = shared_picker.read().ok().and_then(|guard| {
guard.as_ref().map(|picker| {
(
picker.sync_data.files().to_vec(),
Arc::clone(&picker.cache_budget),
)
})
});
if let Some((files, budget)) = snapshot {
let bigram_start = std::time::Instant::now();
info!("Starting bigram index build for {} files...", files.len());
let (index, content_binary) = build_bigram_index(&files, &budget);
info!(
"Bigram index ready in {:.2}s",
bigram_start.elapsed().as_secs_f64(),
);
if let Ok(mut guard) = shared_picker.write()
&& let Some(ref mut picker) = *guard
{
for &idx in &content_binary {
if let Some(file) = picker.sync_data.get_file_mut(idx) {
file.is_binary = true;
}
}
let base_count = picker.sync_data.base_count;
picker.bigram_index = Some(Arc::new(index));
picker.bigram_overlay = Some(Arc::new(parking_lot::RwLock::new(
BigramOverlay::new(base_count),
)));
}
}
}
info!(
"Post-scan warmup + bigram total: {:.2}s",
phase_start.elapsed().as_secs_f64(),
);
}
});
}
#[tracing::instrument(skip(files), name = "warmup_mmaps", level = Level::DEBUG)]
pub fn warmup_mmaps(files: &[FileItem], budget: &ContentCacheBudget) {
let max_files = budget.max_files;
let max_bytes = budget.max_bytes;
let max_file_size = budget.max_file_size;
let mut all: Vec<&FileItem> = files.iter().collect();
if all.len() > max_files {
all.select_nth_unstable_by(max_files, |a, b| {
let a_ok = !a.is_binary && a.size > 0;
let b_ok = !b.is_binary && b.size > 0;
match (a_ok, b_ok) {
(true, false) => std::cmp::Ordering::Less,
(false, true) => std::cmp::Ordering::Greater,
(false, false) => std::cmp::Ordering::Equal,
(true, true) => b.total_frecency_score.cmp(&a.total_frecency_score),
}
});
}
let to_warm = &all[..all.len().min(max_files)];
let warmed_bytes = AtomicU64::new(0);
let budget_exhausted = AtomicBool::new(false);
to_warm.par_iter().for_each(|file| {
if budget_exhausted.load(Ordering::Relaxed) {
return;
}
if file.is_binary || file.size == 0 || file.size > max_file_size {
return;
}
let prev_bytes = warmed_bytes.fetch_add(file.size, Ordering::Relaxed);
if prev_bytes + file.size > max_bytes {
budget_exhausted.store(true, Ordering::Relaxed);
return;
}
if let Some(content) = file.get_content(budget) {
let _ = std::hint::black_box(content.first());
}
});
}
pub fn build_bigram_index(
files: &[FileItem],
budget: &ContentCacheBudget,
) -> (BigramFilter, Vec<usize>) {
let start = std::time::Instant::now();
info!("Building bigram index for {} files...", files.len());
let builder = BigramIndexBuilder::new(files.len());
let skip_builder = BigramIndexBuilder::new(files.len());
let max_file_size = budget.max_file_size;
let content_binary: std::sync::Mutex<Vec<usize>> = std::sync::Mutex::new(Vec::new());
files.par_iter().enumerate().for_each(|(i, file)| {
if file.is_binary || file.size == 0 || file.size > max_file_size {
return;
}
let data: Option<&[u8]>;
let owned;
if let Some(cached) = file.get_content(budget) {
if detect_binary_content(cached) {
content_binary.lock().unwrap().push(i);
return;
}
data = Some(cached);
owned = None;
} else if let Ok(read_data) = std::fs::read(&file.path) {
if detect_binary_content(&read_data) {
content_binary.lock().unwrap().push(i);
return;
}
data = None;
owned = Some(read_data);
} else {
return;
}
let content = data.unwrap_or_else(|| owned.as_ref().unwrap());
builder.add_file_content(i, content);
skip_builder.add_file_content_skip(i, content);
});
let cols = builder.columns_used();
let mut index = builder.compress(None);
let skip_index = skip_builder.compress(Some(12));
index.set_skip_index(skip_index);
hint_allocator_collect();
info!(
"Bigram index built in {:.2}s — {} dense columns for {} files",
start.elapsed().as_secs_f64(),
cols,
files.len(),
);
let binary_indices = content_binary.into_inner().unwrap();
if !binary_indices.is_empty() {
info!(
"Bigram build detected {} content-binary files (not caught by extension)",
binary_indices.len(),
);
}
(index, binary_indices)
}
struct WalkResult {
sync: FileSync,
git_handle: std::thread::JoinHandle<Option<GitStatusCache>>,
}
fn walk_filesystem(
base_path: &Path,
synced_files_count: &Arc<AtomicUsize>,
shared_frecency: &SharedFrecency,
mode: FFFMode,
) -> Result<WalkResult, Error> {
use ignore::{WalkBuilder, WalkState};
let scan_start = std::time::Instant::now();
info!("SCAN: Starting filesystem walk and git status (async)");
let git_workdir = Repository::discover(base_path)
.ok()
.and_then(|repo| repo.workdir().map(Path::to_path_buf));
if let Some(ref git_dir) = git_workdir {
debug!("Git repository found at: {}", git_dir.display());
} else {
debug!("No git repository found for path: {}", base_path.display());
}
let git_workdir_for_status = git_workdir.clone();
let git_handle = std::thread::spawn(move || {
GitStatusCache::read_git_status(
git_workdir_for_status.as_deref(),
StatusOptions::new()
.include_untracked(true)
.recurse_untracked_dirs(true)
.exclude_submodules(true),
)
});
let walker = WalkBuilder::new(base_path)
.hidden(false)
.git_ignore(true)
.git_exclude(true)
.git_global(true)
.ignore(true)
.follow_links(false)
.build_parallel();
let walker_start = std::time::Instant::now();
debug!("SCAN: Starting file walker");
let files = parking_lot::Mutex::new(Vec::new());
walker.run(|| {
let files = &files;
let counter = Arc::clone(synced_files_count);
let base_path = base_path.to_path_buf();
Box::new(move |result| {
if let Ok(entry) = result
&& entry.file_type().is_some_and(|ft| ft.is_file())
{
let path = entry.path();
if is_git_file(path) {
return WalkState::Continue;
}
let metadata = entry.metadata().ok();
let file_item = FileItem::new_with_metadata(
path.to_path_buf(),
&base_path,
None,
metadata.as_ref(),
);
files.lock().push(file_item);
counter.fetch_add(1, Ordering::Relaxed);
}
WalkState::Continue
})
});
let mut files = files.into_inner();
info!(
"SCAN: File walking completed in {:?} for {} files",
walker_start.elapsed(),
files.len(),
);
let frecency = shared_frecency
.read()
.map_err(|_| Error::AcquireFrecencyLock)?;
if let Some(frecency) = frecency.as_ref() {
files
.par_iter_mut()
.try_for_each(|file| file.update_frecency_scores(frecency, mode))?;
}
drop(frecency);
files.par_sort_unstable_by(|a, b| a.path.as_os_str().cmp(b.path.as_os_str()));
let total_time = scan_start.elapsed();
info!("SCAN: Walk + frecency completed in {:?}", total_time);
let base_count = files.len();
Ok(WalkResult {
sync: FileSync {
files,
base_count,
git_workdir,
},
git_handle,
})
}
fn apply_git_status(
shared_picker: &SharedPicker,
shared_frecency: &SharedFrecency,
git_handle: std::thread::JoinHandle<Option<GitStatusCache>>,
mode: FFFMode,
) {
let join_start = std::time::Instant::now();
let git_cache = match git_handle.join() {
Ok(cache) => cache,
Err(_) => {
error!("Git status thread panicked");
return;
}
};
info!("SCAN: Git status ready in {:?}", join_start.elapsed());
let Some(git_cache) = git_cache else { return };
if let Ok(mut guard) = shared_picker.write()
&& let Some(ref mut picker) = *guard
{
let frecency = shared_frecency.read().ok();
let frecency_ref = frecency.as_ref().and_then(|f| f.as_ref());
picker.sync_data.files.par_iter_mut().for_each(|file| {
file.git_status = git_cache.lookup_status(&file.path);
if let Some(frecency) = frecency_ref {
let _ = file.update_frecency_scores(frecency, mode);
}
});
info!(
"SCAN: Applied git status to {} files ({} dirty)",
picker.sync_data.files.len(),
git_cache.statuses_len(),
);
}
}
#[inline]
fn is_git_file(path: &Path) -> bool {
path.to_str().is_some_and(|path| {
if cfg!(target_family = "windows") {
path.contains("\\.git\\")
} else {
path.contains("/.git/")
}
})
}
#[inline]
fn is_known_binary_extension(path: &Path) -> bool {
let Some(ext) = path.extension().and_then(|e| e.to_str()) else {
return false;
};
matches!(
ext,
"png" | "jpg" | "jpeg" | "gif" | "bmp" | "ico" | "webp" | "tiff" | "tif" | "avif" |
"heic" | "psd" | "icns" | "cur" | "raw" | "cr2" | "nef" | "dng" |
"mp4" | "avi" | "mov" | "wmv" | "mkv" | "mp3" | "wav" | "flac" | "ogg" | "m4a" |
"aac" | "webm" | "flv" | "mpg" | "mpeg" | "wma" | "opus" |
"zip" | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar" | "zst" | "lz4" | "lzma" |
"cab" | "cpio" |
"deb" | "rpm" | "apk" | "dmg" | "msi" | "iso" | "nupkg" | "whl" | "egg" |
"snap" | "appimage" | "flatpak" |
"exe" | "dll" | "so" | "dylib" | "o" | "a" | "lib" | "bin" | "elf" |
"pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" |
"db" | "sqlite" | "sqlite3" | "mdb" |
"ttf" | "otf" | "woff" | "woff2" | "eot" |
"class" | "pyc" | "pyo" | "wasm" | "dex" | "jar" | "war" |
"npy" | "npz" | "pkl" | "pickle" | "h5" | "hdf5" | "pt" | "pth" | "onnx" |
"safetensors" | "tfrecord" |
"glb" | "fbx" | "blend" |
"parquet" | "arrow" | "pb" |
"DS_Store" | "suo"
)
}
#[inline]
pub(crate) fn detect_binary_content(content: &[u8]) -> bool {
let check_len = content.len().min(512);
content[..check_len].contains(&0)
}
fn hint_allocator_collect() {
#[cfg(feature = "mimalloc-collect")]
{
rayon::broadcast(|_| unsafe { libmimalloc_sys::mi_collect(true) });
unsafe { libmimalloc_sys::mi_collect(true) };
}
}