1use std::io::Read;
2use std::path::{Path, PathBuf};
3use std::sync::OnceLock;
4use std::sync::atomic::{AtomicI32, AtomicU64, AtomicUsize, Ordering};
5
6use crate::constraints::Constrainable;
7use crate::query_tracker::QueryMatchEntry;
8use crate::simd_path::{ArenaPtr, PATH_BUF_SIZE};
9use ffs_query_parser::{FfsQuery, FuzzyQuery, Location};
10
11pub trait FfsStringStorage {
15 fn arena_for(&self, file: &FileItem) -> ArenaPtr;
17
18 fn base_arena(&self) -> ArenaPtr;
20 fn overflow_arena(&self) -> ArenaPtr;
22}
23
24impl FfsStringStorage for ArenaPtr {
25 #[inline]
26 fn arena_for(&self, _file: &FileItem) -> ArenaPtr {
27 *self
28 }
29
30 #[inline]
31 fn base_arena(&self) -> ArenaPtr {
32 *self
33 }
34
35 #[inline]
36 fn overflow_arena(&self) -> ArenaPtr {
37 *self
38 }
39}
40
41#[derive(Debug)]
50#[allow(dead_code)] pub enum FileContent {
52 #[cfg(not(target_os = "windows"))]
53 Mmap(memmap2::Mmap),
54 Buffer(Vec<u8>),
55}
56
57impl std::ops::Deref for FileContent {
58 type Target = [u8];
59 fn deref(&self) -> &[u8] {
60 match self {
61 #[cfg(not(target_os = "windows"))]
62 FileContent::Mmap(m) => m,
63 FileContent::Buffer(b) => b,
64 }
65 }
66}
67
68pub struct FileItemFlags;
69
70impl FileItemFlags {
71 pub const BINARY: u8 = 1 << 0;
72 pub const DELETED: u8 = 1 << 1;
75 pub const OVERFLOW: u8 = 1 << 2;
78}
79
80pub struct DirFlags;
81
82impl DirFlags {
83 pub const OVERFLOW: u8 = 1 << 0;
84}
85
86#[derive(Debug)]
88pub struct DirItem {
89 flags: u8,
90 pub(crate) path: crate::simd_path::ChunkedString,
91 last_segment_offset: u16,
94 max_access_frecency: AtomicI32,
97}
98
99impl Clone for DirItem {
100 fn clone(&self) -> Self {
101 Self {
102 flags: self.flags,
103 path: self.path.clone(),
104 last_segment_offset: self.last_segment_offset,
105 max_access_frecency: AtomicI32::new(self.max_access_frecency()),
106 }
107 }
108}
109
110impl DirItem {
111 #[inline(always)]
112 pub fn is_overflow(&self) -> bool {
113 self.flags & DirFlags::OVERFLOW == 0
114 }
115
116 pub(crate) fn new(path: crate::simd_path::ChunkedString, last_segment_offset: u16) -> Self {
117 Self {
118 path,
119 flags: 0,
120 last_segment_offset,
121 max_access_frecency: AtomicI32::new(0),
122 }
123 }
124
125 #[inline]
127 pub fn last_segment_offset(&self) -> u16 {
128 self.last_segment_offset
129 }
130
131 #[inline]
133 pub fn max_access_frecency(&self) -> i32 {
134 self.max_access_frecency.load(Ordering::Relaxed)
135 }
136
137 #[inline]
140 pub fn update_frecency_if_larger(&self, score: i32) {
141 self.max_access_frecency.fetch_max(score, Ordering::Relaxed);
142 }
143
144 #[inline]
146 pub fn reset_frecency(&self) {
147 self.max_access_frecency.store(0, Ordering::Relaxed);
148 }
149
150 pub(crate) fn read_relative_path<'a>(&self, arena: ArenaPtr, buf: &'a mut [u8]) -> &'a str {
151 self.path.read_to_buf(arena, buf)
152 }
153
154 pub fn relative_path(&self, arena: impl FfsStringStorage) -> String {
156 let mut out = String::new();
157 let ptr = if self.is_overflow() {
158 arena.overflow_arena()
159 } else {
160 arena.base_arena()
161 };
162
163 self.path.write_to_string(ptr, &mut out);
164 out
165 }
166
167 pub fn write_dir_name(&self, arena: ArenaPtr, out: &mut String) {
169 out.clear();
170 let total = self.path.byte_len as usize;
171 let offset = self.last_segment_offset as usize;
172 if offset >= total {
173 return;
174 }
175 let mut buf = [0u8; PATH_BUF_SIZE];
177 let full = self.path.read_to_buf(arena, &mut buf);
178 out.push_str(&full[offset..]);
179 }
180
181 pub fn dir_name(&self, arena: impl FfsStringStorage) -> String {
183 let mut out = String::new();
184 let ptr = if self.is_overflow() {
185 arena.overflow_arena()
186 } else {
187 arena.base_arena()
188 };
189 self.write_dir_name(ptr, &mut out);
190 out
191 }
192
193 pub fn absolute_path(&self, arena: impl FfsStringStorage, base_path: &Path) -> PathBuf {
195 let rel = self.relative_path(arena);
196 if rel.is_empty() {
197 base_path.to_path_buf()
198 } else {
199 base_path.join(&rel)
200 }
201 }
202}
203
204impl Constrainable for DirItem {
205 #[inline]
206 fn write_file_name(&self, arena: ArenaPtr, out: &mut String) {
207 self.write_dir_name(arena, out);
209 }
210
211 #[inline]
212 fn write_relative_path(&self, arena: ArenaPtr, out: &mut String) {
213 self.path.write_to_string(arena, out);
214 }
215
216 #[inline]
217 fn git_status(&self) -> Option<git2::Status> {
218 None
219 }
220}
221
222#[derive(Debug)]
223pub struct FileItem {
224 pub size: u64,
225 pub modified: u64,
226 pub access_frecency_score: i16,
227 pub modification_frecency_score: i16,
228 pub git_status: Option<git2::Status>,
229 pub(crate) path: crate::simd_path::ChunkedString,
230 parent_dir: u32,
231 flags: u8,
232 content: OnceLock<FileContent>,
233}
234
235impl Clone for FileItem {
236 fn clone(&self) -> Self {
237 Self {
238 path: self.path.clone(),
239 parent_dir: self.parent_dir,
240 size: self.size,
241 modified: self.modified,
242 access_frecency_score: self.access_frecency_score,
243 modification_frecency_score: self.modification_frecency_score,
244 git_status: self.git_status,
245 flags: self.flags,
246 content: OnceLock::new(),
248 }
249 }
250}
251
252impl FileItem {
253 pub fn new_raw(
254 filename_start: u16,
255 size: u64,
256 modified: u64,
257 git_status: Option<git2::Status>,
258 is_binary: bool,
259 ) -> Self {
260 let mut flags = 0u8;
261 if is_binary {
262 flags |= FileItemFlags::BINARY;
263 }
264
265 let mut path = crate::simd_path::ChunkedString::empty();
266 path.filename_offset = filename_start;
267
268 Self {
269 path,
270 parent_dir: u32::MAX,
271 size,
272 modified,
273 access_frecency_score: 0,
274 modification_frecency_score: 0,
275 git_status,
276 flags,
277 content: OnceLock::new(),
278 }
279 }
280
281 pub fn absolute_path(&self, arena: impl FfsStringStorage, base_path: &Path) -> PathBuf {
283 let mut buf = [0u8; PATH_BUF_SIZE];
284 let rel = self.path.read_to_buf(arena.arena_for(self), &mut buf);
285 base_path.join(rel)
286 }
287
288 pub(crate) fn set_path(&mut self, path: crate::simd_path::ChunkedString) {
289 self.path = path;
290 }
291
292 pub(crate) fn parent_dir_index(&self) -> u32 {
293 self.parent_dir
294 }
295
296 pub(crate) fn set_parent_dir(&mut self, idx: u32) {
297 self.parent_dir = idx;
298 }
299
300 pub fn dir_str(&self, arena: impl FfsStringStorage) -> String {
301 let mut s = String::with_capacity(64);
302 self.path.write_dir_to(arena.arena_for(self), &mut s);
303 s
304 }
305
306 pub(crate) fn write_dir_str(&self, arena: ArenaPtr, out: &mut String) {
307 self.path.write_dir_to(arena, out);
308 }
309
310 pub fn file_name(&self, arena: impl FfsStringStorage) -> String {
311 let mut s = String::with_capacity(32);
312 self.path.write_filename_to(arena.arena_for(self), &mut s);
313 s
314 }
315
316 pub(crate) fn write_file_name_from_arena(&self, arena: ArenaPtr, out: &mut String) {
317 self.path.write_filename_to(arena, out);
318 }
319
320 pub fn relative_path(&self, arena: impl FfsStringStorage) -> String {
321 let mut s = String::with_capacity(64);
322 self.path.write_to_string(arena.arena_for(self), &mut s);
323 s
324 }
325
326 pub(crate) fn write_relative_path_from_arena(&self, arena: ArenaPtr, out: &mut String) {
327 self.path.write_to_string(arena, out);
328 }
329
330 pub fn relative_path_len(&self) -> usize {
331 self.path.byte_len as usize
332 }
333
334 pub fn filename_offset_in_relative_path(&self) -> usize {
335 self.path.filename_offset as usize
336 }
337
338 pub(crate) fn relative_path_eq(&self, arena: ArenaPtr, other: &str) -> bool {
339 if other.len() != self.path.byte_len as usize {
340 return false;
341 }
342 let mut buf = [0u8; 512];
343 let mine = self.path.read_to_buf(arena, &mut buf);
344 mine == other
345 }
346
347 pub(crate) fn relative_path_starts_with(&self, arena: ArenaPtr, prefix: &str) -> bool {
348 let mut buf = [0u8; PATH_BUF_SIZE];
349 let path = self.path.read_to_buf(arena, &mut buf);
350 path.starts_with(prefix)
351 }
352
353 pub(crate) fn write_absolute_path<'a>(
357 &self,
358 arena: ArenaPtr,
359 base_path: &Path,
360 buf: &'a mut [u8; PATH_BUF_SIZE],
361 ) -> &'a Path {
362 let base = base_path.as_os_str().as_encoded_bytes();
363 let base_len = base.len();
364 buf[..base_len].copy_from_slice(base);
365 let sep_len = if base_len > 0 && base[base_len - 1] != std::path::MAIN_SEPARATOR as u8 {
366 buf[base_len] = std::path::MAIN_SEPARATOR as u8;
367 1
368 } else {
369 0
370 };
371
372 let base_end_idx = base_len + sep_len;
373 let relative_portion_str = self.path.read_to_buf(arena, &mut buf[base_end_idx..]);
374 let total = base_end_idx + relative_portion_str.len();
375 Path::new(unsafe { std::str::from_utf8_unchecked(&buf[..total]) })
376 }
377
378 #[cfg(unix)]
385 pub(crate) fn write_relative_cstr<'a>(
386 &self,
387 arena: ArenaPtr,
388 buf: &'a mut [u8; PATH_BUF_SIZE],
389 ) -> &'a std::ffi::CStr {
390 let rel = self.path.read_to_buf(arena, &mut buf[..PATH_BUF_SIZE - 1]);
392 let n = rel.len();
393 buf[n] = 0;
394 unsafe { std::ffi::CStr::from_bytes_with_nul_unchecked(&buf[..=n]) }
397 }
398
399 #[inline]
400 pub fn total_frecency_score(&self) -> i32 {
401 self.access_frecency_score as i32 + self.modification_frecency_score as i32
402 }
403
404 #[inline]
405 pub fn is_binary(&self) -> bool {
406 self.flags & FileItemFlags::BINARY != 0
407 }
408
409 #[inline]
410 pub fn set_binary(&mut self, val: bool) {
411 if val {
412 self.flags |= FileItemFlags::BINARY;
413 } else {
414 self.flags &= !FileItemFlags::BINARY;
415 }
416 }
417
418 #[inline]
419 pub fn is_deleted(&self) -> bool {
420 self.flags & FileItemFlags::DELETED != 0
421 }
422
423 #[inline]
424 pub fn set_deleted(&mut self, val: bool) {
425 if val {
426 self.flags |= FileItemFlags::DELETED;
427 } else {
428 self.flags &= !FileItemFlags::DELETED;
429 }
430 }
431
432 #[inline]
433 pub fn is_overflow(&self) -> bool {
434 self.flags & FileItemFlags::OVERFLOW != 0
435 }
436
437 #[inline]
438 pub fn set_overflow(&mut self, val: bool) {
439 if val {
440 self.flags |= FileItemFlags::OVERFLOW;
441 } else {
442 self.flags &= !FileItemFlags::OVERFLOW;
443 }
444 }
445}
446
447impl FileItem {
448 pub fn invalidate_mmap(&mut self, budget: &ContentCacheBudget) {
455 if self.content.get().is_some() {
456 budget.cached_count.fetch_sub(1, Ordering::Relaxed);
457 budget.cached_bytes.fetch_sub(self.size, Ordering::Relaxed);
458 }
459
460 self.content = OnceLock::new();
461 }
462
463 pub fn update_metadata(
464 &mut self,
465 budget: &ContentCacheBudget,
466 modified_secs: Option<u64>,
467 new_size: Option<u64>,
468 ) {
469 if let Some(modified) = modified_secs
470 && self.modified < modified
471 {
472 self.modified = modified;
473 }
474
475 self.invalidate_mmap(budget);
476
477 if let Some(size) = new_size {
478 self.size = size;
479 }
480 }
481
482 pub(crate) fn get_content(
490 &self,
491 arena: ArenaPtr,
492 base_path: &Path,
493 budget: &ContentCacheBudget,
494 ) -> Option<&[u8]> {
495 if let Some(content) = self.content.get() {
496 return Some(content);
497 }
498
499 let max_file_size = budget.max_file_size;
500 if self.size == 0 || self.size > max_file_size {
501 return None;
502 }
503
504 let count = budget.cached_count.load(Ordering::Relaxed);
506 let bytes = budget.cached_bytes.load(Ordering::Relaxed);
507 let max_files = budget.max_files;
508 let max_bytes = budget.max_bytes;
509 if count >= max_files || bytes + self.size > max_bytes {
510 return None;
511 }
512
513 let content = load_file_content(&self.absolute_path(arena, base_path), self.size)?;
514 let result = self.content.get_or_init(|| content);
515
516 budget.cached_count.fetch_add(1, Ordering::Relaxed);
519 budget.cached_bytes.fetch_add(self.size, Ordering::Relaxed);
520
521 Some(result)
522 }
523
524 #[inline]
530 pub(crate) fn get_content_for_search<'a>(
531 &'a self,
532 buf: &'a mut Vec<u8>, arena: ArenaPtr,
534 base_path: &Path,
535 budget: &ContentCacheBudget,
536 ) -> Option<&'a [u8]> {
537 if let Some(cached) = self.get_content(arena, base_path, budget) {
539 return Some(cached);
540 }
541
542 let max_file_size = budget.max_file_size;
543 if self.is_binary() || self.size == 0 || self.size > max_file_size {
544 return None;
545 }
546
547 let abs = self.absolute_path(arena, base_path);
552 let len = self.size as usize;
553 buf.resize(len, 0);
554 let mut file = std::fs::File::open(&abs).ok()?;
555 file.read_exact(buf).ok()?;
556 Some(buf.as_slice())
557 }
558}
559
560#[cfg(all(not(target_os = "windows"), target_arch = "aarch64"))]
563const MMAP_THRESHOLD: u64 = 16 * 1024;
564#[cfg(all(not(target_os = "windows"), not(target_arch = "aarch64")))]
565const MMAP_THRESHOLD: u64 = 4 * 1024;
566
567fn load_file_content(path: &Path, size: u64) -> Option<FileContent> {
568 #[cfg(not(target_os = "windows"))]
569 {
570 if size < MMAP_THRESHOLD {
571 let data = std::fs::read(path).ok()?;
572 Some(FileContent::Buffer(data))
573 } else {
574 let file = std::fs::File::open(path).ok()?;
575 let mmap = unsafe { memmap2::Mmap::map(&file) }.ok()?;
579 Some(FileContent::Mmap(mmap))
580 }
581 }
582
583 #[cfg(target_os = "windows")]
584 {
585 let _ = size;
586 let data = std::fs::read(path).ok()?;
587 Some(FileContent::Buffer(data))
588 }
589}
590
591impl Constrainable for FileItem {
592 #[inline]
593 fn write_file_name(&self, arena: ArenaPtr, out: &mut String) {
594 self.path.write_filename_to(arena, out);
595 }
596
597 #[inline]
598 fn write_relative_path(&self, arena: ArenaPtr, out: &mut String) {
599 self.path.write_to_string(arena, out);
600 }
601
602 #[inline]
603 fn git_status(&self) -> Option<git2::Status> {
604 self.git_status
605 }
606}
607
608#[derive(Debug, Clone, Default)]
609pub struct Score {
610 pub total: i32,
611 pub base_score: i32,
612 pub filename_bonus: i32,
613 pub special_filename_bonus: i32,
614 pub frecency_boost: i32,
615 pub git_status_boost: i32,
616 pub distance_penalty: i32,
617 pub current_file_penalty: i32,
618 pub combo_match_boost: i32,
619 pub path_alignment_bonus: i32,
620 pub exact_match: bool,
621 pub match_type: &'static str,
622}
623
624#[derive(Debug, Clone, Copy)]
625pub struct PaginationArgs {
626 pub offset: usize,
627 pub limit: usize,
628}
629
630impl Default for PaginationArgs {
631 fn default() -> Self {
632 Self {
633 offset: 0,
634 limit: 100,
635 }
636 }
637}
638
639#[derive(Debug, Clone)]
640pub struct ScoringContext<'a> {
641 pub query: &'a FfsQuery<'a>,
642 pub project_path: Option<&'a Path>,
643 pub current_file: Option<&'a str>,
644 pub max_typos: u16,
645 pub max_threads: usize,
646 pub last_same_query_match: Option<QueryMatchEntry>,
647 pub combo_boost_score_multiplier: i32,
648 pub min_combo_count: u32,
649 pub pagination: PaginationArgs,
650}
651
652impl ScoringContext<'_> {
653 pub fn effective_query(&self) -> &str {
654 match &self.query.fuzzy_query {
655 FuzzyQuery::Text(t) => t,
656 FuzzyQuery::Parts(parts) if !parts.is_empty() => parts[0],
657 _ => self.query.raw_query.trim(),
658 }
659 }
660}
661
662#[derive(Debug, Clone, Default)]
663pub struct SearchResult<'a> {
664 pub items: Vec<&'a FileItem>,
665 pub scores: Vec<Score>,
666 pub total_matched: usize,
667 pub total_files: usize,
668 pub location: Option<Location>,
669}
670
671#[derive(Debug, Clone, Default)]
673pub struct DirSearchResult<'a> {
674 pub items: Vec<&'a DirItem>,
675 pub scores: Vec<Score>,
676 pub total_matched: usize,
677 pub total_dirs: usize,
678}
679
680#[derive(Debug, Clone)]
682pub enum MixedItemRef<'a> {
683 File(&'a FileItem),
684 Dir(&'a DirItem),
685}
686
687#[derive(Debug, Clone, Default)]
690pub struct MixedSearchResult<'a> {
691 pub items: Vec<MixedItemRef<'a>>,
692 pub scores: Vec<Score>,
693 pub total_matched: usize,
694 pub total_files: usize,
695 pub total_dirs: usize,
696 pub location: Option<Location>,
697}
698
699impl Default for MixedItemRef<'_> {
700 fn default() -> Self {
701 unreachable!("MixedItemRef::default should not be called")
703 }
704}
705
706const MAX_MMAP_FILE_SIZE: u64 = 10 * 1024 * 1024;
707
708const MAX_CACHED_CONTENT_BYTES: u64 = 512 * 1024 * 1024;
709
710#[derive(Debug)]
711pub struct ContentCacheBudget {
712 pub max_files: usize,
713 pub max_bytes: u64,
714 pub max_file_size: u64,
715 pub cached_count: AtomicUsize,
716 pub cached_bytes: AtomicU64,
717}
718
719impl ContentCacheBudget {
720 pub fn unlimited() -> Self {
721 Self {
722 max_files: usize::MAX,
723 max_bytes: u64::MAX,
724 max_file_size: MAX_MMAP_FILE_SIZE,
725 cached_count: AtomicUsize::new(0),
726 cached_bytes: AtomicU64::new(0),
727 }
728 }
729
730 pub fn zero() -> Self {
731 Self {
732 max_files: 0,
733 max_bytes: 0,
734 max_file_size: 0,
735 cached_count: AtomicUsize::new(0),
736 cached_bytes: AtomicU64::new(0),
737 }
738 }
739
740 pub fn new_for_repo(file_count: usize) -> Self {
741 let max_files = if file_count > 50_000 {
742 5_000
743 } else if file_count > 10_000 {
744 10_000
745 } else {
746 30_000 };
748
749 let max_bytes = if file_count > 50_000 {
750 128 * 1024 * 1024 } else if file_count > 10_000 {
752 256 * 1024 * 1024 } else {
754 MAX_CACHED_CONTENT_BYTES };
756
757 Self {
758 max_files,
759 max_bytes,
760 max_file_size: MAX_MMAP_FILE_SIZE,
761 cached_count: AtomicUsize::new(0),
762 cached_bytes: AtomicU64::new(0),
763 }
764 }
765
766 pub fn from_overrides(max_files: usize, max_bytes: u64, max_file_size: u64) -> Option<Self> {
774 if max_files == 0 && max_bytes == 0 && max_file_size == 0 {
775 return None;
776 }
777
778 let mut budget = Self::default();
779 if max_files > 0 {
780 budget.max_files = max_files;
781 }
782 if max_bytes > 0 {
783 budget.max_bytes = max_bytes;
784 }
785 if max_file_size > 0 {
786 budget.max_file_size = max_file_size;
787 }
788 Some(budget)
789 }
790
791 pub fn reset(&self) {
792 self.cached_count.store(0, Ordering::Relaxed);
793 self.cached_bytes.store(0, Ordering::Relaxed);
794 }
795}
796
797impl Default for ContentCacheBudget {
798 fn default() -> Self {
799 Self::new_for_repo(30_000)
800 }
801}
802
803#[cfg(test)]
804impl FileItem {
805 pub fn new_for_test(
807 rel_path: &str,
808 size: u64,
809 modified: u64,
810 git_status: Option<git2::Status>,
811 is_binary: bool,
812 ) -> Self {
813 let (item, _arena) =
814 Self::new_for_test_with_arena(rel_path, size, modified, git_status, is_binary);
815 item
816 }
817
818 pub(crate) fn new_for_test_with_arena(
819 rel_path: &str,
820 size: u64,
821 modified: u64,
822 git_status: Option<git2::Status>,
823 is_binary: bool,
824 ) -> (Self, ArenaPtr) {
825 let filename_start = rel_path
826 .rfind(std::path::is_separator)
827 .map(|i| i + 1)
828 .unwrap_or(0) as u16;
829 let mut item = Self::new_raw(filename_start, size, modified, git_status, is_binary);
830 let paths = [rel_path.to_string()];
831 let (store, strings) = crate::simd_path::build_chunked_path_store_from_strings(
832 &paths,
833 std::slice::from_ref(&item),
834 );
835 let cs = strings.into_iter().next().unwrap();
836 let arena = store.as_arena_ptr();
837 item.set_path(cs);
838 std::mem::forget(store);
839 (item, arena)
840 }
841}