use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
use crate::constraints::Constrainable;
use crate::query_tracker::QueryMatchEntry;
use fff_query_parser::{FFFQuery, FuzzyQuery, Location};
#[derive(Debug)]
#[allow(dead_code)] pub enum FileContent {
#[cfg(not(target_os = "windows"))]
Mmap(memmap2::Mmap),
#[cfg(target_os = "windows")]
Buffer(Vec<u8>),
}
impl std::ops::Deref for FileContent {
type Target = [u8];
fn deref(&self) -> &[u8] {
match self {
#[cfg(not(target_os = "windows"))]
FileContent::Mmap(m) => m,
#[cfg(target_os = "windows")]
FileContent::Buffer(b) => b,
}
}
}
#[derive(Debug)]
pub struct FileItem {
pub path: PathBuf,
pub relative_path: String,
pub relative_path_lower: String,
pub file_name: String,
pub file_name_lower: String,
pub size: u64,
pub modified: u64,
pub access_frecency_score: i64,
pub modification_frecency_score: i64,
pub total_frecency_score: i64,
pub git_status: Option<git2::Status>,
pub is_binary: bool,
content: OnceLock<FileContent>,
}
impl Clone for FileItem {
fn clone(&self) -> Self {
Self {
path: self.path.clone(),
relative_path: self.relative_path.clone(),
relative_path_lower: self.relative_path_lower.clone(),
file_name: self.file_name.clone(),
file_name_lower: self.file_name_lower.clone(),
size: self.size,
modified: self.modified,
access_frecency_score: self.access_frecency_score,
modification_frecency_score: self.modification_frecency_score,
total_frecency_score: self.total_frecency_score,
git_status: self.git_status,
is_binary: self.is_binary,
content: OnceLock::new(),
}
}
}
pub enum FileContentRef<'a> {
Cached(&'a [u8]),
Temp(FileContent),
}
impl std::ops::Deref for FileContentRef<'_> {
type Target = [u8];
fn deref(&self) -> &[u8] {
match self {
FileContentRef::Cached(s) => s,
FileContentRef::Temp(c) => c,
}
}
}
impl FileItem {
pub fn new_raw(
path: PathBuf,
relative_path: String,
file_name: String,
size: u64,
modified: u64,
git_status: Option<git2::Status>,
is_binary: bool,
) -> Self {
Self {
relative_path_lower: relative_path.to_lowercase(),
file_name_lower: file_name.to_lowercase(),
path,
relative_path,
file_name,
size,
modified,
access_frecency_score: 0,
modification_frecency_score: 0,
total_frecency_score: 0,
git_status,
is_binary,
content: OnceLock::new(),
}
}
pub fn invalidate_mmap(&mut self, budget: &ContentCacheBudget) {
if self.content.get().is_some() {
budget.cached_count.fetch_sub(1, Ordering::Relaxed);
budget.cached_bytes.fetch_sub(self.size, Ordering::Relaxed);
}
self.content = OnceLock::new();
}
#[inline]
pub fn get_content(&self, budget: &ContentCacheBudget) -> Option<&[u8]> {
if let Some(content) = self.content.get() {
return Some(content);
}
if self.size == 0 || self.size > MAX_MMAP_FILE_SIZE {
return None;
}
let count = budget.cached_count.load(Ordering::Relaxed);
let bytes = budget.cached_bytes.load(Ordering::Relaxed);
if count >= budget.max_files || bytes + self.size > MAX_CACHED_CONTENT_BYTES {
return None;
}
let content = load_file_content(&self.path)?;
let result = self.content.get_or_init(|| content);
budget.cached_count.fetch_add(1, Ordering::Relaxed);
budget.cached_bytes.fetch_add(self.size, Ordering::Relaxed);
Some(result)
}
#[inline]
pub fn get_mmap(&self, budget: &ContentCacheBudget) -> Option<&[u8]> {
self.get_content(budget)
}
#[inline]
pub fn get_content_for_search<'a>(
&'a self,
budget: &ContentCacheBudget,
) -> Option<FileContentRef<'a>> {
if let Some(cached) = self.get_content(budget) {
return Some(FileContentRef::Cached(cached));
}
if self.is_binary || self.size == 0 || self.size > MAX_MMAP_FILE_SIZE {
return None;
}
let content = load_file_content(&self.path)?;
Some(FileContentRef::Temp(content))
}
}
fn load_file_content(path: &Path) -> Option<FileContent> {
#[cfg(not(target_os = "windows"))]
{
let file = std::fs::File::open(path).ok()?;
let mmap = unsafe { memmap2::Mmap::map(&file) }.ok()?;
Some(FileContent::Mmap(mmap))
}
#[cfg(target_os = "windows")]
{
let data = std::fs::read(path).ok()?;
Some(FileContent::Buffer(data))
}
}
impl Constrainable for FileItem {
#[inline]
fn relative_path(&self) -> &str {
&self.relative_path
}
#[inline]
fn relative_path_lower(&self) -> &str {
&self.relative_path_lower
}
#[inline]
fn file_name(&self) -> &str {
&self.file_name
}
#[inline]
fn git_status(&self) -> Option<git2::Status> {
self.git_status
}
}
#[derive(Debug, Clone, Default)]
pub struct Score {
pub total: i32,
pub base_score: i32,
pub filename_bonus: i32,
pub special_filename_bonus: i32,
pub frecency_boost: i32,
pub git_status_boost: i32,
pub distance_penalty: i32,
pub current_file_penalty: i32,
pub combo_match_boost: i32,
pub exact_match: bool,
pub match_type: &'static str,
}
#[derive(Debug, Clone, Copy)]
pub struct PaginationArgs {
pub offset: usize,
pub limit: usize,
}
impl Default for PaginationArgs {
fn default() -> Self {
Self {
offset: 0,
limit: 100,
}
}
}
#[derive(Debug, Clone)]
pub struct ScoringContext<'a> {
pub query: &'a FFFQuery<'a>,
pub project_path: Option<&'a Path>,
pub current_file: Option<&'a str>,
pub max_typos: u16,
pub max_threads: usize,
pub last_same_query_match: Option<QueryMatchEntry>,
pub combo_boost_score_multiplier: i32,
pub min_combo_count: u32,
pub pagination: PaginationArgs,
}
impl ScoringContext<'_> {
pub fn effective_query(&self) -> &str {
match &self.query.fuzzy_query {
FuzzyQuery::Text(t) => t,
FuzzyQuery::Parts(parts) if !parts.is_empty() => parts[0],
_ => self.query.raw_query.trim(),
}
}
}
#[derive(Debug, Clone, Default)]
pub struct SearchResult<'a> {
pub items: Vec<&'a FileItem>,
pub scores: Vec<Score>,
pub total_matched: usize,
pub total_files: usize,
pub location: Option<Location>,
}
const MAX_MMAP_FILE_SIZE: u64 = 10 * 1024 * 1024;
const MAX_CACHED_CONTENT_BYTES: u64 = 512 * 1024 * 1024;
#[derive(Debug)]
pub struct ContentCacheBudget {
pub max_files: usize,
pub cached_count: AtomicUsize,
pub cached_bytes: AtomicU64,
}
impl ContentCacheBudget {
pub fn unlimited() -> Self {
Self {
max_files: usize::MAX,
cached_count: AtomicUsize::new(0),
cached_bytes: AtomicU64::new(0),
}
}
pub fn zero() -> Self {
Self {
max_files: 0,
cached_count: AtomicUsize::new(0),
cached_bytes: AtomicU64::new(0),
}
}
pub fn new(max_files: usize) -> Self {
Self {
max_files,
cached_count: AtomicUsize::new(0),
cached_bytes: AtomicU64::new(0),
}
}
pub fn reset(&self) {
self.cached_count.store(0, Ordering::Relaxed);
self.cached_bytes.store(0, Ordering::Relaxed);
}
}
impl Default for ContentCacheBudget {
fn default() -> Self {
Self::new(30_000)
}
}