use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
use crate::constraints::Constrainable;
use crate::query_tracker::QueryMatchEntry;
use fff_query_parser::{FFFQuery, FuzzyQuery, Location};
#[derive(Debug)]
#[allow(dead_code)] pub enum FileContent {
#[cfg(not(target_os = "windows"))]
Mmap(memmap2::Mmap),
Buffer(Vec<u8>),
}
impl std::ops::Deref for FileContent {
type Target = [u8];
fn deref(&self) -> &[u8] {
match self {
#[cfg(not(target_os = "windows"))]
FileContent::Mmap(m) => m,
FileContent::Buffer(b) => b,
}
}
}
#[derive(Debug)]
pub struct FileItem {
pub path: PathBuf,
pub relative_path: String,
pub file_name: String,
pub size: u64,
pub modified: u64,
pub access_frecency_score: i32,
pub modification_frecency_score: i32,
pub total_frecency_score: i32,
pub git_status: Option<git2::Status>,
pub is_binary: bool,
pub is_deleted: bool,
content: OnceLock<FileContent>,
}
impl Clone for FileItem {
fn clone(&self) -> Self {
Self {
path: self.path.clone(),
relative_path: self.relative_path.clone(),
file_name: self.file_name.clone(),
size: self.size,
modified: self.modified,
access_frecency_score: self.access_frecency_score,
modification_frecency_score: self.modification_frecency_score,
total_frecency_score: self.total_frecency_score,
git_status: self.git_status,
is_binary: self.is_binary,
is_deleted: self.is_deleted,
content: OnceLock::new(),
}
}
}
pub enum FileContentRef<'a> {
Cached(&'a [u8]),
Temp(FileContent),
}
impl std::ops::Deref for FileContentRef<'_> {
type Target = [u8];
fn deref(&self) -> &[u8] {
match self {
FileContentRef::Cached(s) => s,
FileContentRef::Temp(c) => c,
}
}
}
impl FileItem {
pub fn new_raw(
path: PathBuf,
relative_path: String,
file_name: String,
size: u64,
modified: u64,
git_status: Option<git2::Status>,
is_binary: bool,
) -> Self {
Self {
path,
relative_path,
file_name,
size,
modified,
access_frecency_score: 0,
modification_frecency_score: 0,
total_frecency_score: 0,
git_status,
is_binary,
is_deleted: false,
content: OnceLock::new(),
}
}
pub fn invalidate_mmap(&mut self, budget: &ContentCacheBudget) {
if self.content.get().is_some() {
budget.cached_count.fetch_sub(1, Ordering::Relaxed);
budget.cached_bytes.fetch_sub(self.size, Ordering::Relaxed);
}
self.content = OnceLock::new();
}
pub fn get_content(&self, budget: &ContentCacheBudget) -> Option<&[u8]> {
if let Some(content) = self.content.get() {
return Some(content);
}
let max_file_size = budget.max_file_size;
if self.size == 0 || self.size > max_file_size {
return None;
}
let count = budget.cached_count.load(Ordering::Relaxed);
let bytes = budget.cached_bytes.load(Ordering::Relaxed);
let max_files = budget.max_files;
let max_bytes = budget.max_bytes;
if count >= max_files || bytes + self.size > max_bytes {
return None;
}
let content = load_file_content(&self.path, self.size)?;
let result = self.content.get_or_init(|| content);
budget.cached_count.fetch_add(1, Ordering::Relaxed);
budget.cached_bytes.fetch_add(self.size, Ordering::Relaxed);
Some(result)
}
#[inline]
pub fn get_content_for_search<'a>(
&'a self,
budget: &ContentCacheBudget,
) -> Option<FileContentRef<'a>> {
if let Some(cached) = self.get_content(budget) {
return Some(FileContentRef::Cached(cached));
}
let max_file_size = budget.max_file_size;
if self.is_binary || self.size == 0 || self.size > max_file_size {
return None;
}
let content = load_file_content(&self.path, self.size)?;
Some(FileContentRef::Temp(content))
}
}
#[cfg(target_arch = "aarch64")]
const MMAP_THRESHOLD: u64 = 16 * 1024;
#[cfg(not(target_arch = "aarch64"))]
const MMAP_THRESHOLD: u64 = 4 * 1024;
fn load_file_content(path: &Path, size: u64) -> Option<FileContent> {
#[cfg(not(target_os = "windows"))]
{
if size < MMAP_THRESHOLD {
let data = std::fs::read(path).ok()?;
Some(FileContent::Buffer(data))
} else {
let file = std::fs::File::open(path).ok()?;
let mmap = unsafe { memmap2::Mmap::map(&file) }.ok()?;
Some(FileContent::Mmap(mmap))
}
}
#[cfg(target_os = "windows")]
{
let _ = size;
let data = std::fs::read(path).ok()?;
Some(FileContent::Buffer(data))
}
}
impl Constrainable for FileItem {
#[inline]
fn relative_path(&self) -> &str {
&self.relative_path
}
#[inline]
fn file_name(&self) -> &str {
&self.file_name
}
#[inline]
fn git_status(&self) -> Option<git2::Status> {
self.git_status
}
}
#[derive(Debug, Clone, Default)]
pub struct Score {
pub total: i32,
pub base_score: i32,
pub filename_bonus: i32,
pub special_filename_bonus: i32,
pub frecency_boost: i32,
pub git_status_boost: i32,
pub distance_penalty: i32,
pub current_file_penalty: i32,
pub combo_match_boost: i32,
pub exact_match: bool,
pub match_type: &'static str,
}
#[derive(Debug, Clone, Copy)]
pub struct PaginationArgs {
pub offset: usize,
pub limit: usize,
}
impl Default for PaginationArgs {
fn default() -> Self {
Self {
offset: 0,
limit: 100,
}
}
}
#[derive(Debug, Clone)]
pub struct ScoringContext<'a> {
pub query: &'a FFFQuery<'a>,
pub project_path: Option<&'a Path>,
pub current_file: Option<&'a str>,
pub max_typos: u16,
pub max_threads: usize,
pub last_same_query_match: Option<QueryMatchEntry>,
pub combo_boost_score_multiplier: i32,
pub min_combo_count: u32,
pub pagination: PaginationArgs,
}
impl ScoringContext<'_> {
pub fn effective_query(&self) -> &str {
match &self.query.fuzzy_query {
FuzzyQuery::Text(t) => t,
FuzzyQuery::Parts(parts) if !parts.is_empty() => parts[0],
_ => self.query.raw_query.trim(),
}
}
}
#[derive(Debug, Clone, Default)]
pub struct SearchResult<'a> {
pub items: Vec<&'a FileItem>,
pub scores: Vec<Score>,
pub total_matched: usize,
pub total_files: usize,
pub location: Option<Location>,
}
const MAX_MMAP_FILE_SIZE: u64 = 10 * 1024 * 1024;
const MAX_CACHED_CONTENT_BYTES: u64 = 512 * 1024 * 1024;
#[derive(Debug)]
pub struct ContentCacheBudget {
pub max_files: usize,
pub max_bytes: u64,
pub max_file_size: u64,
pub cached_count: AtomicUsize,
pub cached_bytes: AtomicU64,
}
impl ContentCacheBudget {
pub fn unlimited() -> Self {
Self {
max_files: usize::MAX,
max_bytes: u64::MAX,
max_file_size: MAX_MMAP_FILE_SIZE,
cached_count: AtomicUsize::new(0),
cached_bytes: AtomicU64::new(0),
}
}
pub fn zero() -> Self {
Self {
max_files: 0,
max_bytes: 0,
max_file_size: 0,
cached_count: AtomicUsize::new(0),
cached_bytes: AtomicU64::new(0),
}
}
pub fn new_for_repo(file_count: usize) -> Self {
let max_files = if file_count > 50_000 {
5_000
} else if file_count > 10_000 {
10_000
} else {
30_000 };
let max_bytes = if file_count > 50_000 {
128 * 1024 * 1024 } else if file_count > 10_000 {
256 * 1024 * 1024 } else {
MAX_CACHED_CONTENT_BYTES };
Self {
max_files,
max_bytes,
max_file_size: MAX_MMAP_FILE_SIZE,
cached_count: AtomicUsize::new(0),
cached_bytes: AtomicU64::new(0),
}
}
pub fn reset(&self) {
self.cached_count.store(0, Ordering::Relaxed);
self.cached_bytes.store(0, Ordering::Relaxed);
}
}
impl Default for ContentCacheBudget {
fn default() -> Self {
Self::new_for_repo(30_000)
}
}