1use std::io::Read;
2use std::path::{Path, PathBuf};
3use std::sync::OnceLock;
4use std::sync::atomic::{AtomicI32, AtomicU64, AtomicUsize, Ordering};
5
6use crate::constraints::Constrainable;
7use crate::query_tracker::QueryMatchEntry;
8use crate::simd_path::{ArenaPtr, PATH_BUF_SIZE};
9use fff_query_parser::{FFFQuery, FuzzyQuery, Location};
10
11pub trait FFFStringStorage {
15 fn arena_for(&self, file: &FileItem) -> ArenaPtr;
17
18 fn base_arena(&self) -> ArenaPtr;
20 fn overflow_arena(&self) -> ArenaPtr;
22}
23
24impl FFFStringStorage for ArenaPtr {
25 #[inline]
26 fn arena_for(&self, _file: &FileItem) -> ArenaPtr {
27 *self
28 }
29
30 #[inline]
31 fn base_arena(&self) -> ArenaPtr {
32 *self
33 }
34
35 #[inline]
36 fn overflow_arena(&self) -> ArenaPtr {
37 *self
38 }
39}
40
41#[derive(Debug)]
50#[allow(dead_code)] pub enum FileContent {
52 #[cfg(not(target_os = "windows"))]
53 Mmap(memmap2::Mmap),
54 Buffer(Vec<u8>),
55}
56
57impl std::ops::Deref for FileContent {
58 type Target = [u8];
59 fn deref(&self) -> &[u8] {
60 match self {
61 #[cfg(not(target_os = "windows"))]
62 FileContent::Mmap(m) => m,
63 FileContent::Buffer(b) => b,
64 }
65 }
66}
67
68pub struct FileItemFlags;
69
70impl FileItemFlags {
71 pub const BINARY: u8 = 1 << 0;
72 pub const DELETED: u8 = 1 << 1;
75 pub const OVERFLOW: u8 = 1 << 2;
78}
79
80pub struct DirFlags;
81
82impl DirFlags {
83 pub const OVERFLOW: u8 = 1 << 0;
84}
85
86#[derive(Debug)]
88pub struct DirItem {
89 flags: u8,
90 pub(crate) path: crate::simd_path::ChunkedString,
91 last_segment_offset: u16,
94 max_access_frecency: AtomicI32,
97}
98
99impl Clone for DirItem {
100 fn clone(&self) -> Self {
101 Self {
102 flags: self.flags,
103 path: self.path.clone(),
104 last_segment_offset: self.last_segment_offset,
105 max_access_frecency: AtomicI32::new(self.max_access_frecency()),
106 }
107 }
108}
109
110impl DirItem {
111 #[inline(always)]
112 pub fn is_overflow(&self) -> bool {
113 self.flags & DirFlags::OVERFLOW == 0
114 }
115
116 pub(crate) fn new(path: crate::simd_path::ChunkedString, last_segment_offset: u16) -> Self {
117 Self {
118 path,
119 flags: 0,
120 last_segment_offset,
121 max_access_frecency: AtomicI32::new(0),
122 }
123 }
124
125 #[inline]
127 pub fn last_segment_offset(&self) -> u16 {
128 self.last_segment_offset
129 }
130
131 #[inline]
133 pub fn max_access_frecency(&self) -> i32 {
134 self.max_access_frecency.load(Ordering::Relaxed)
135 }
136
137 #[inline]
140 pub fn update_frecency_if_larger(&self, score: i32) {
141 self.max_access_frecency.fetch_max(score, Ordering::Relaxed);
142 }
143
144 #[inline]
146 pub fn reset_frecency(&self) {
147 self.max_access_frecency.store(0, Ordering::Relaxed);
148 }
149
150 pub(crate) fn read_relative_path<'a>(&self, arena: ArenaPtr, buf: &'a mut [u8]) -> &'a str {
151 self.path.read_to_buf(arena, buf)
152 }
153
154 pub fn relative_path(&self, arena: impl FFFStringStorage) -> String {
156 let mut out = String::new();
157 let ptr = if self.is_overflow() {
158 arena.overflow_arena()
159 } else {
160 arena.base_arena()
161 };
162
163 self.path.write_to_string(ptr, &mut out);
164 out
165 }
166
167 pub fn write_dir_name(&self, arena: ArenaPtr, out: &mut String) {
169 out.clear();
170 let total = self.path.byte_len as usize;
171 let offset = self.last_segment_offset as usize;
172 if offset >= total {
173 return;
174 }
175 let mut buf = [0u8; PATH_BUF_SIZE];
177 let full = self.path.read_to_buf(arena, &mut buf);
178 out.push_str(&full[offset..]);
179 }
180
181 pub fn dir_name(&self, arena: impl FFFStringStorage) -> String {
183 let mut out = String::new();
184 let ptr = if self.is_overflow() {
185 arena.overflow_arena()
186 } else {
187 arena.base_arena()
188 };
189 self.write_dir_name(ptr, &mut out);
190 out
191 }
192
193 pub fn absolute_path(&self, arena: impl FFFStringStorage, base_path: &Path) -> PathBuf {
195 let rel = self.relative_path(arena);
196 if rel.is_empty() {
197 base_path.to_path_buf()
198 } else {
199 base_path.join(&rel)
200 }
201 }
202}
203
204impl Constrainable for DirItem {
205 #[inline]
206 fn write_file_name(&self, arena: ArenaPtr, out: &mut String) {
207 self.write_dir_name(arena, out);
209 }
210
211 #[inline]
212 fn write_relative_path(&self, arena: ArenaPtr, out: &mut String) {
213 self.path.write_to_string(arena, out);
214 }
215
216 #[inline]
217 fn git_status(&self) -> Option<git2::Status> {
218 None
219 }
220}
221
222#[derive(Debug)]
223pub struct FileItem {
224 pub size: u64,
225 pub modified: u64,
226 pub access_frecency_score: i16,
227 pub modification_frecency_score: i16,
228 pub git_status: Option<git2::Status>,
229 pub(crate) path: crate::simd_path::ChunkedString,
230 parent_dir: u32,
231 flags: u8,
232 content: OnceLock<FileContent>,
233}
234
235impl Clone for FileItem {
236 fn clone(&self) -> Self {
237 Self {
238 path: self.path.clone(),
239 parent_dir: self.parent_dir,
240 size: self.size,
241 modified: self.modified,
242 access_frecency_score: self.access_frecency_score,
243 modification_frecency_score: self.modification_frecency_score,
244 git_status: self.git_status,
245 flags: self.flags,
246 content: OnceLock::new(),
248 }
249 }
250}
251
252impl FileItem {
253 pub fn new_raw(
254 filename_start: u16,
255 size: u64,
256 modified: u64,
257 git_status: Option<git2::Status>,
258 is_binary: bool,
259 ) -> Self {
260 let mut flags = 0u8;
261 if is_binary {
262 flags |= FileItemFlags::BINARY;
263 }
264
265 let mut path = crate::simd_path::ChunkedString::empty();
266 path.filename_offset = filename_start;
267
268 Self {
269 path,
270 parent_dir: u32::MAX,
271 size,
272 modified,
273 access_frecency_score: 0,
274 modification_frecency_score: 0,
275 git_status,
276 flags,
277 content: OnceLock::new(),
278 }
279 }
280
281 pub fn absolute_path(&self, arena: impl FFFStringStorage, base_path: &Path) -> PathBuf {
283 let mut buf = [0u8; PATH_BUF_SIZE];
284 let rel = self.path.read_to_buf(arena.arena_for(self), &mut buf);
285 base_path.join(rel)
286 }
287
288 pub(crate) fn set_path(&mut self, path: crate::simd_path::ChunkedString) {
289 self.path = path;
290 }
291
292 pub(crate) fn parent_dir_index(&self) -> u32 {
293 self.parent_dir
294 }
295
296 pub(crate) fn set_parent_dir(&mut self, idx: u32) {
297 self.parent_dir = idx;
298 }
299
300 pub fn dir_str(&self, arena: impl FFFStringStorage) -> String {
301 let mut s = String::with_capacity(64);
302 self.path.write_dir_to(arena.arena_for(self), &mut s);
303 s
304 }
305
306 pub(crate) fn write_dir_str(&self, arena: ArenaPtr, out: &mut String) {
307 self.path.write_dir_to(arena, out);
308 }
309
310 pub fn file_name(&self, arena: impl FFFStringStorage) -> String {
311 let mut s = String::with_capacity(32);
312 self.path.write_filename_to(arena.arena_for(self), &mut s);
313 s
314 }
315
316 pub(crate) fn write_file_name_from_arena(&self, arena: ArenaPtr, out: &mut String) {
317 self.path.write_filename_to(arena, out);
318 }
319
320 pub fn relative_path(&self, arena: impl FFFStringStorage) -> String {
321 let mut s = String::with_capacity(64);
322 self.path.write_to_string(arena.arena_for(self), &mut s);
323 s
324 }
325
326 pub(crate) fn write_relative_path_from_arena(&self, arena: ArenaPtr, out: &mut String) {
327 self.path.write_to_string(arena, out);
328 }
329
330 pub fn relative_path_len(&self) -> usize {
331 self.path.byte_len as usize
332 }
333
334 pub fn filename_offset_in_relative_path(&self) -> usize {
335 self.path.filename_offset as usize
336 }
337
338 pub(crate) fn relative_path_eq(&self, arena: ArenaPtr, other: &str) -> bool {
339 if other.len() != self.path.byte_len as usize {
340 return false;
341 }
342 let mut buf = [0u8; 512];
343 let mine = self.path.read_to_buf(arena, &mut buf);
344 mine == other
345 }
346
347 pub(crate) fn relative_path_starts_with(&self, arena: ArenaPtr, prefix: &str) -> bool {
348 let mut buf = [0u8; PATH_BUF_SIZE];
349 let path = self.path.read_to_buf(arena, &mut buf);
350 path.starts_with(prefix)
351 }
352
353 pub(crate) fn write_absolute_path<'a>(
354 &self,
355 arena: ArenaPtr,
356 base_path: &Path,
357 buf: &'a mut [u8; PATH_BUF_SIZE],
358 ) -> &'a Path {
359 let base = base_path.as_os_str().as_encoded_bytes();
360 let base_len = base.len();
361 buf[..base_len].copy_from_slice(base);
362 let sep_len = if base_len > 0 && base[base_len - 1] != b'/' {
364 buf[base_len] = b'/';
365 1
366 } else {
367 0
368 };
369 let rel_start = base_len + sep_len;
370 let mut rel_buf = [0u8; PATH_BUF_SIZE];
371 let rel = self.path.read_to_buf(arena, &mut rel_buf);
372 let rel_bytes = rel.as_bytes();
373 buf[rel_start..rel_start + rel_bytes.len()].copy_from_slice(rel_bytes);
374 let total = rel_start + rel_bytes.len();
375 Path::new(unsafe { std::str::from_utf8_unchecked(&buf[..total]) })
376 }
377
378 #[inline]
379 pub fn total_frecency_score(&self) -> i32 {
380 self.access_frecency_score as i32 + self.modification_frecency_score as i32
381 }
382
383 #[inline]
384 pub fn is_binary(&self) -> bool {
385 self.flags & FileItemFlags::BINARY != 0
386 }
387
388 #[inline]
389 pub fn set_binary(&mut self, val: bool) {
390 if val {
391 self.flags |= FileItemFlags::BINARY;
392 } else {
393 self.flags &= !FileItemFlags::BINARY;
394 }
395 }
396
397 #[inline]
398 pub fn is_deleted(&self) -> bool {
399 self.flags & FileItemFlags::DELETED != 0
400 }
401
402 #[inline]
403 pub fn set_deleted(&mut self, val: bool) {
404 if val {
405 self.flags |= FileItemFlags::DELETED;
406 } else {
407 self.flags &= !FileItemFlags::DELETED;
408 }
409 }
410
411 #[inline]
412 pub fn is_overflow(&self) -> bool {
413 self.flags & FileItemFlags::OVERFLOW != 0
414 }
415
416 #[inline]
417 pub fn set_overflow(&mut self, val: bool) {
418 if val {
419 self.flags |= FileItemFlags::OVERFLOW;
420 } else {
421 self.flags &= !FileItemFlags::OVERFLOW;
422 }
423 }
424}
425
426impl FileItem {
427 pub fn invalidate_mmap(&mut self, budget: &ContentCacheBudget) {
434 if self.content.get().is_some() {
435 budget.cached_count.fetch_sub(1, Ordering::Relaxed);
436 budget.cached_bytes.fetch_sub(self.size, Ordering::Relaxed);
437 }
438
439 self.content = OnceLock::new();
440 }
441
442 pub(crate) fn get_content(
450 &self,
451 arena: ArenaPtr,
452 base_path: &Path,
453 budget: &ContentCacheBudget,
454 ) -> Option<&[u8]> {
455 if let Some(content) = self.content.get() {
456 return Some(content);
457 }
458
459 let max_file_size = budget.max_file_size;
460 if self.size == 0 || self.size > max_file_size {
461 return None;
462 }
463
464 let count = budget.cached_count.load(Ordering::Relaxed);
466 let bytes = budget.cached_bytes.load(Ordering::Relaxed);
467 let max_files = budget.max_files;
468 let max_bytes = budget.max_bytes;
469 if count >= max_files || bytes + self.size > max_bytes {
470 return None;
471 }
472
473 let content = load_file_content(&self.absolute_path(arena, base_path), self.size)?;
474 let result = self.content.get_or_init(|| content);
475
476 budget.cached_count.fetch_add(1, Ordering::Relaxed);
479 budget.cached_bytes.fetch_add(self.size, Ordering::Relaxed);
480
481 Some(result)
482 }
483
484 #[inline]
490 pub(crate) fn get_content_for_search<'a>(
491 &'a self,
492 buf: &'a mut Vec<u8>, arena: ArenaPtr,
494 base_path: &Path,
495 budget: &ContentCacheBudget,
496 ) -> Option<&'a [u8]> {
497 if let Some(cached) = self.get_content(arena, base_path, budget) {
499 return Some(cached);
500 }
501
502 let max_file_size = budget.max_file_size;
503 if self.is_binary() || self.size == 0 || self.size > max_file_size {
504 return None;
505 }
506
507 let abs = self.absolute_path(arena, base_path);
512 let len = self.size as usize;
513 buf.resize(len, 0);
514 let mut file = std::fs::File::open(&abs).ok()?;
515 file.read_exact(buf).ok()?;
516 Some(buf.as_slice())
517 }
518}
519
520#[cfg(target_arch = "aarch64")]
522const MMAP_THRESHOLD: u64 = 16 * 1024;
523#[cfg(not(target_arch = "aarch64"))]
524const MMAP_THRESHOLD: u64 = 4 * 1024;
525
526fn load_file_content(path: &Path, size: u64) -> Option<FileContent> {
527 #[cfg(not(target_os = "windows"))]
528 {
529 if size < MMAP_THRESHOLD {
530 let data = std::fs::read(path).ok()?;
531 Some(FileContent::Buffer(data))
532 } else {
533 let file = std::fs::File::open(path).ok()?;
534 let mmap = unsafe { memmap2::Mmap::map(&file) }.ok()?;
538 Some(FileContent::Mmap(mmap))
539 }
540 }
541
542 #[cfg(target_os = "windows")]
543 {
544 let _ = size;
545 let data = std::fs::read(path).ok()?;
546 Some(FileContent::Buffer(data))
547 }
548}
549
550impl Constrainable for FileItem {
551 #[inline]
552 fn write_file_name(&self, arena: ArenaPtr, out: &mut String) {
553 self.path.write_filename_to(arena, out);
554 }
555
556 #[inline]
557 fn write_relative_path(&self, arena: ArenaPtr, out: &mut String) {
558 self.path.write_to_string(arena, out);
559 }
560
561 #[inline]
562 fn git_status(&self) -> Option<git2::Status> {
563 self.git_status
564 }
565}
566
567#[derive(Debug, Clone, Default)]
568pub struct Score {
569 pub total: i32,
570 pub base_score: i32,
571 pub filename_bonus: i32,
572 pub special_filename_bonus: i32,
573 pub frecency_boost: i32,
574 pub git_status_boost: i32,
575 pub distance_penalty: i32,
576 pub current_file_penalty: i32,
577 pub combo_match_boost: i32,
578 pub path_alignment_bonus: i32,
579 pub exact_match: bool,
580 pub match_type: &'static str,
581}
582
583#[derive(Debug, Clone, Copy)]
584pub struct PaginationArgs {
585 pub offset: usize,
586 pub limit: usize,
587}
588
589impl Default for PaginationArgs {
590 fn default() -> Self {
591 Self {
592 offset: 0,
593 limit: 100,
594 }
595 }
596}
597
598#[derive(Debug, Clone)]
599pub struct ScoringContext<'a> {
600 pub query: &'a FFFQuery<'a>,
601 pub project_path: Option<&'a Path>,
602 pub current_file: Option<&'a str>,
603 pub max_typos: u16,
604 pub max_threads: usize,
605 pub last_same_query_match: Option<QueryMatchEntry>,
606 pub combo_boost_score_multiplier: i32,
607 pub min_combo_count: u32,
608 pub pagination: PaginationArgs,
609}
610
611impl ScoringContext<'_> {
612 pub fn effective_query(&self) -> &str {
613 match &self.query.fuzzy_query {
614 FuzzyQuery::Text(t) => t,
615 FuzzyQuery::Parts(parts) if !parts.is_empty() => parts[0],
616 _ => self.query.raw_query.trim(),
617 }
618 }
619}
620
621#[derive(Debug, Clone, Default)]
622pub struct SearchResult<'a> {
623 pub items: Vec<&'a FileItem>,
624 pub scores: Vec<Score>,
625 pub total_matched: usize,
626 pub total_files: usize,
627 pub location: Option<Location>,
628}
629
630#[derive(Debug, Clone, Default)]
632pub struct DirSearchResult<'a> {
633 pub items: Vec<&'a DirItem>,
634 pub scores: Vec<Score>,
635 pub total_matched: usize,
636 pub total_dirs: usize,
637}
638
639#[derive(Debug, Clone)]
641pub enum MixedItemRef<'a> {
642 File(&'a FileItem),
643 Dir(&'a DirItem),
644}
645
646#[derive(Debug, Clone, Default)]
649pub struct MixedSearchResult<'a> {
650 pub items: Vec<MixedItemRef<'a>>,
651 pub scores: Vec<Score>,
652 pub total_matched: usize,
653 pub total_files: usize,
654 pub total_dirs: usize,
655 pub location: Option<Location>,
656}
657
658impl Default for MixedItemRef<'_> {
659 fn default() -> Self {
660 unreachable!("MixedItemRef::default should not be called")
662 }
663}
664
665const MAX_MMAP_FILE_SIZE: u64 = 10 * 1024 * 1024;
666
667const MAX_CACHED_CONTENT_BYTES: u64 = 512 * 1024 * 1024;
668
669#[derive(Debug)]
670pub struct ContentCacheBudget {
671 pub max_files: usize,
672 pub max_bytes: u64,
673 pub max_file_size: u64,
674 pub cached_count: AtomicUsize,
675 pub cached_bytes: AtomicU64,
676}
677
678impl ContentCacheBudget {
679 pub fn unlimited() -> Self {
680 Self {
681 max_files: usize::MAX,
682 max_bytes: u64::MAX,
683 max_file_size: MAX_MMAP_FILE_SIZE,
684 cached_count: AtomicUsize::new(0),
685 cached_bytes: AtomicU64::new(0),
686 }
687 }
688
689 pub fn zero() -> Self {
690 Self {
691 max_files: 0,
692 max_bytes: 0,
693 max_file_size: 0,
694 cached_count: AtomicUsize::new(0),
695 cached_bytes: AtomicU64::new(0),
696 }
697 }
698
699 pub fn new_for_repo(file_count: usize) -> Self {
700 let max_files = if file_count > 50_000 {
701 5_000
702 } else if file_count > 10_000 {
703 10_000
704 } else {
705 30_000 };
707
708 let max_bytes = if file_count > 50_000 {
709 128 * 1024 * 1024 } else if file_count > 10_000 {
711 256 * 1024 * 1024 } else {
713 MAX_CACHED_CONTENT_BYTES };
715
716 Self {
717 max_files,
718 max_bytes,
719 max_file_size: MAX_MMAP_FILE_SIZE,
720 cached_count: AtomicUsize::new(0),
721 cached_bytes: AtomicU64::new(0),
722 }
723 }
724
725 pub fn from_overrides(max_files: usize, max_bytes: u64, max_file_size: u64) -> Option<Self> {
733 if max_files == 0 && max_bytes == 0 && max_file_size == 0 {
734 return None;
735 }
736
737 let mut budget = Self::default();
738 if max_files > 0 {
739 budget.max_files = max_files;
740 }
741 if max_bytes > 0 {
742 budget.max_bytes = max_bytes;
743 }
744 if max_file_size > 0 {
745 budget.max_file_size = max_file_size;
746 }
747 Some(budget)
748 }
749
750 pub fn reset(&self) {
751 self.cached_count.store(0, Ordering::Relaxed);
752 self.cached_bytes.store(0, Ordering::Relaxed);
753 }
754}
755
756impl Default for ContentCacheBudget {
757 fn default() -> Self {
758 Self::new_for_repo(30_000)
759 }
760}
761
762#[cfg(test)]
763impl FileItem {
764 pub fn new_for_test(
766 rel_path: &str,
767 size: u64,
768 modified: u64,
769 git_status: Option<git2::Status>,
770 is_binary: bool,
771 ) -> Self {
772 let (item, _arena) =
773 Self::new_for_test_with_arena(rel_path, size, modified, git_status, is_binary);
774 item
775 }
776
777 pub(crate) fn new_for_test_with_arena(
778 rel_path: &str,
779 size: u64,
780 modified: u64,
781 git_status: Option<git2::Status>,
782 is_binary: bool,
783 ) -> (Self, ArenaPtr) {
784 let filename_start = rel_path
785 .rfind(std::path::is_separator)
786 .map(|i| i + 1)
787 .unwrap_or(0) as u16;
788 let mut item = Self::new_raw(filename_start, size, modified, git_status, is_binary);
789 let paths = [rel_path.to_string()];
790 let (store, strings) = crate::simd_path::build_chunked_path_store_from_strings(
791 &paths,
792 std::slice::from_ref(&item),
793 );
794 let cs = strings.into_iter().next().unwrap();
795 let arena = store.as_arena_ptr();
796 item.set_path(cs);
797 std::mem::forget(store);
798 (item, arena)
799 }
800}