Skip to main content

ffs_search/
types.rs

1use std::io::Read;
2use std::path::{Path, PathBuf};
3use std::sync::OnceLock;
4use std::sync::atomic::{AtomicI32, AtomicU64, AtomicUsize, Ordering};
5
6use crate::constraints::Constrainable;
7use crate::query_tracker::QueryMatchEntry;
8use crate::simd_path::{ArenaPtr, PATH_BUF_SIZE};
9use ffs_query_parser::{FfsQuery, FuzzyQuery, Location};
10
11/// Different sources of the string storage used by ffs
12/// implements as a deduplicated 16-bytes alined heap
13/// can be stored in RAM or on disk
14pub trait FfsStringStorage {
15    /// Resolve the arena for a [`FileItem`] (handles base vs overflow split).
16    fn arena_for(&self, file: &FileItem) -> ArenaPtr;
17
18    /// The base arena (scan-time paths).
19    fn base_arena(&self) -> ArenaPtr;
20    /// The overflow arena (paths added after the last full scan).
21    fn overflow_arena(&self) -> ArenaPtr;
22}
23
24impl FfsStringStorage for ArenaPtr {
25    #[inline]
26    fn arena_for(&self, _file: &FileItem) -> ArenaPtr {
27        *self
28    }
29
30    #[inline]
31    fn base_arena(&self) -> ArenaPtr {
32        *self
33    }
34
35    #[inline]
36    fn overflow_arena(&self) -> ArenaPtr {
37        *self
38    }
39}
40
41/// Cached file contents — mmap on Unix, heap buffer on Windows.
42///
43/// On Windows, memory-mapped files hold the file handle open and prevent
44/// editors from saving (writing/replacing) those files. Reading into a
45/// `Vec<u8>` releases the handle immediately after the read completes.
46///
47/// The `Buffer` variant is also used on Unix for temporary (uncached) reads
48/// where the mmap/munmap syscall overhead exceeds the cost of a heap copy.
49#[derive(Debug)]
50#[allow(dead_code)] // variants are conditionally used per platform
51pub enum FileContent {
52    #[cfg(not(target_os = "windows"))]
53    Mmap(memmap2::Mmap),
54    Buffer(Vec<u8>),
55}
56
57impl std::ops::Deref for FileContent {
58    type Target = [u8];
59    fn deref(&self) -> &[u8] {
60        match self {
61            #[cfg(not(target_os = "windows"))]
62            FileContent::Mmap(m) => m,
63            FileContent::Buffer(b) => b,
64        }
65    }
66}
67
68pub struct FileItemFlags;
69
70impl FileItemFlags {
71    pub const BINARY: u8 = 1 << 0;
72    /// Tombstone — file was deleted but index slot is preserved so
73    /// bigram indices for other files stay valid.
74    pub const DELETED: u8 = 1 << 1;
75    /// File was added after the last full reindex; its indices point
76    /// into the overflow builder arena, not the base arena.
77    pub const OVERFLOW: u8 = 1 << 2;
78}
79
80pub struct DirFlags;
81
82impl DirFlags {
83    pub const OVERFLOW: u8 = 1 << 0;
84}
85
86/// A directory in the file index. Shares chunk arena with file paths.
87#[derive(Debug)]
88pub struct DirItem {
89    flags: u8,
90    pub(crate) path: crate::simd_path::ChunkedString,
91    /// Byte offset where the last path segment begins (e.g. for `src/components/`
92    /// this is 4, pointing to `components/`). Used for dirname-bonus scoring.
93    last_segment_offset: u16,
94    /// Maximum `access_frecency_score` among direct child files.
95    /// Atomic so parallel frecency updates can write directly without juggling.
96    max_access_frecency: AtomicI32,
97}
98
99impl Clone for DirItem {
100    fn clone(&self) -> Self {
101        Self {
102            flags: self.flags,
103            path: self.path.clone(),
104            last_segment_offset: self.last_segment_offset,
105            max_access_frecency: AtomicI32::new(self.max_access_frecency()),
106        }
107    }
108}
109
110impl DirItem {
111    #[inline(always)]
112    pub fn is_overflow(&self) -> bool {
113        self.flags & DirFlags::OVERFLOW == 0
114    }
115
116    pub(crate) fn new(path: crate::simd_path::ChunkedString, last_segment_offset: u16) -> Self {
117        Self {
118            path,
119            flags: 0,
120            last_segment_offset,
121            max_access_frecency: AtomicI32::new(0),
122        }
123    }
124
125    /// Byte offset of the last path segment within the directory path.
126    #[inline]
127    pub fn last_segment_offset(&self) -> u16 {
128        self.last_segment_offset
129    }
130
131    /// Current max access frecency score.
132    #[inline]
133    pub fn max_access_frecency(&self) -> i32 {
134        self.max_access_frecency.load(Ordering::Relaxed)
135    }
136
137    /// Atomically update the directory's frecency score if the given score is larger.
138    /// Safe to call from parallel threads.
139    #[inline]
140    pub fn update_frecency_if_larger(&self, score: i32) {
141        self.max_access_frecency.fetch_max(score, Ordering::Relaxed);
142    }
143
144    /// Reset frecency to zero (used before full recomputation).
145    #[inline]
146    pub fn reset_frecency(&self) {
147        self.max_access_frecency.store(0, Ordering::Relaxed);
148    }
149
150    pub(crate) fn read_relative_path<'a>(&self, arena: ArenaPtr, buf: &'a mut [u8]) -> &'a str {
151        self.path.read_to_buf(arena, buf)
152    }
153
154    /// Relative dir path as owned String (cold path).
155    pub fn relative_path(&self, arena: impl FfsStringStorage) -> String {
156        let mut out = String::new();
157        let ptr = if self.is_overflow() {
158            arena.overflow_arena()
159        } else {
160            arena.base_arena()
161        };
162
163        self.path.write_to_string(ptr, &mut out);
164        out
165    }
166
167    /// Write the last segment (dirname) of this directory path to `out`.
168    pub fn write_dir_name(&self, arena: ArenaPtr, out: &mut String) {
169        out.clear();
170        let total = self.path.byte_len as usize;
171        let offset = self.last_segment_offset as usize;
172        if offset >= total {
173            return;
174        }
175        // Read the full path, then slice from last_segment_offset
176        let mut buf = [0u8; PATH_BUF_SIZE];
177        let full = self.path.read_to_buf(arena, &mut buf);
178        out.push_str(&full[offset..]);
179    }
180
181    /// The dirname (last segment) as an owned String. Cold path.
182    pub fn dir_name(&self, arena: impl FfsStringStorage) -> String {
183        let mut out = String::new();
184        let ptr = if self.is_overflow() {
185            arena.overflow_arena()
186        } else {
187            arena.base_arena()
188        };
189        self.write_dir_name(ptr, &mut out);
190        out
191    }
192
193    /// A path = base_path + "/" + relative. Cold path, allocates.
194    pub fn absolute_path(&self, arena: impl FfsStringStorage, base_path: &Path) -> PathBuf {
195        let rel = self.relative_path(arena);
196        if rel.is_empty() {
197            base_path.to_path_buf()
198        } else {
199            base_path.join(&rel)
200        }
201    }
202}
203
204impl Constrainable for DirItem {
205    #[inline]
206    fn write_file_name(&self, arena: ArenaPtr, out: &mut String) {
207        // For dirs, the "file name" equivalent is the last path segment
208        self.write_dir_name(arena, out);
209    }
210
211    #[inline]
212    fn write_relative_path(&self, arena: ArenaPtr, out: &mut String) {
213        self.path.write_to_string(arena, out);
214    }
215
216    #[inline]
217    fn git_status(&self) -> Option<git2::Status> {
218        None
219    }
220}
221
222#[derive(Debug)]
223pub struct FileItem {
224    pub size: u64,
225    pub modified: u64,
226    pub access_frecency_score: i16,
227    pub modification_frecency_score: i16,
228    pub git_status: Option<git2::Status>,
229    pub(crate) path: crate::simd_path::ChunkedString,
230    parent_dir: u32,
231    flags: u8,
232    content: OnceLock<FileContent>,
233}
234
235impl Clone for FileItem {
236    fn clone(&self) -> Self {
237        Self {
238            path: self.path.clone(),
239            parent_dir: self.parent_dir,
240            size: self.size,
241            modified: self.modified,
242            access_frecency_score: self.access_frecency_score,
243            modification_frecency_score: self.modification_frecency_score,
244            git_status: self.git_status,
245            flags: self.flags,
246            // on clone we have to reset the content lock
247            content: OnceLock::new(),
248        }
249    }
250}
251
252impl FileItem {
253    pub fn new_raw(
254        filename_start: u16,
255        size: u64,
256        modified: u64,
257        git_status: Option<git2::Status>,
258        is_binary: bool,
259    ) -> Self {
260        let mut flags = 0u8;
261        if is_binary {
262            flags |= FileItemFlags::BINARY;
263        }
264
265        let mut path = crate::simd_path::ChunkedString::empty();
266        path.filename_offset = filename_start;
267
268        Self {
269            path,
270            parent_dir: u32::MAX,
271            size,
272            modified,
273            access_frecency_score: 0,
274            modification_frecency_score: 0,
275            git_status,
276            flags,
277            content: OnceLock::new(),
278        }
279    }
280
281    /// Returns an absolute path of the file
282    pub fn absolute_path(&self, arena: impl FfsStringStorage, base_path: &Path) -> PathBuf {
283        let mut buf = [0u8; PATH_BUF_SIZE];
284        let rel = self.path.read_to_buf(arena.arena_for(self), &mut buf);
285        base_path.join(rel)
286    }
287
288    pub(crate) fn set_path(&mut self, path: crate::simd_path::ChunkedString) {
289        self.path = path;
290    }
291
292    pub(crate) fn parent_dir_index(&self) -> u32 {
293        self.parent_dir
294    }
295
296    pub(crate) fn set_parent_dir(&mut self, idx: u32) {
297        self.parent_dir = idx;
298    }
299
300    pub fn dir_str(&self, arena: impl FfsStringStorage) -> String {
301        let mut s = String::with_capacity(64);
302        self.path.write_dir_to(arena.arena_for(self), &mut s);
303        s
304    }
305
306    pub(crate) fn write_dir_str(&self, arena: ArenaPtr, out: &mut String) {
307        self.path.write_dir_to(arena, out);
308    }
309
310    pub fn file_name(&self, arena: impl FfsStringStorage) -> String {
311        let mut s = String::with_capacity(32);
312        self.path.write_filename_to(arena.arena_for(self), &mut s);
313        s
314    }
315
316    pub(crate) fn write_file_name_from_arena(&self, arena: ArenaPtr, out: &mut String) {
317        self.path.write_filename_to(arena, out);
318    }
319
320    pub fn relative_path(&self, arena: impl FfsStringStorage) -> String {
321        let mut s = String::with_capacity(64);
322        self.path.write_to_string(arena.arena_for(self), &mut s);
323        s
324    }
325
326    pub(crate) fn write_relative_path_from_arena(&self, arena: ArenaPtr, out: &mut String) {
327        self.path.write_to_string(arena, out);
328    }
329
330    pub fn relative_path_len(&self) -> usize {
331        self.path.byte_len as usize
332    }
333
334    pub fn filename_offset_in_relative_path(&self) -> usize {
335        self.path.filename_offset as usize
336    }
337
338    pub(crate) fn relative_path_eq(&self, arena: ArenaPtr, other: &str) -> bool {
339        if other.len() != self.path.byte_len as usize {
340            return false;
341        }
342        let mut buf = [0u8; 512];
343        let mine = self.path.read_to_buf(arena, &mut buf);
344        mine == other
345    }
346
347    pub(crate) fn relative_path_starts_with(&self, arena: ArenaPtr, prefix: &str) -> bool {
348        let mut buf = [0u8; PATH_BUF_SIZE];
349        let path = self.path.read_to_buf(arena, &mut buf);
350        path.starts_with(prefix)
351    }
352
353    /// Write `base_path + '/' + relative_path` into `buf` and return it
354    /// as `&Path`. Takes a fixed-size array so the buffer can live on
355    /// the stack (no heap allocation, no bounds checks in the hot loop).
356    pub(crate) fn write_absolute_path<'a>(
357        &self,
358        arena: ArenaPtr,
359        base_path: &Path,
360        buf: &'a mut [u8; PATH_BUF_SIZE],
361    ) -> &'a Path {
362        let base = base_path.as_os_str().as_encoded_bytes();
363        let base_len = base.len();
364        buf[..base_len].copy_from_slice(base);
365        let sep_len = if base_len > 0 && base[base_len - 1] != std::path::MAIN_SEPARATOR as u8 {
366            buf[base_len] = std::path::MAIN_SEPARATOR as u8;
367            1
368        } else {
369            0
370        };
371
372        let base_end_idx = base_len + sep_len;
373        let relative_portion_str = self.path.read_to_buf(arena, &mut buf[base_end_idx..]);
374        let total = base_end_idx + relative_portion_str.len();
375        Path::new(unsafe { std::str::from_utf8_unchecked(&buf[..total]) })
376    }
377
378    /// Write the relative path into `buf` and NUL-terminate, returning
379    /// a `&CStr`. Fixed-size array so the buffer is stack-allocatable.
380    ///
381    /// Paired with a parent-directory fd this eliminates the per-file
382    /// absolute-path memcpy: `openat(dir_fd, cstr.as_ptr(), O_RDONLY)`
383    /// resolves the name relative to `dir_fd`. Unix-only.
384    #[cfg(unix)]
385    pub(crate) fn write_relative_cstr<'a>(
386        &self,
387        arena: ArenaPtr,
388        buf: &'a mut [u8; PATH_BUF_SIZE],
389    ) -> &'a std::ffi::CStr {
390        // Reserve the last byte for the NUL terminator.
391        let rel = self.path.read_to_buf(arena, &mut buf[..PATH_BUF_SIZE - 1]);
392        let n = rel.len();
393        buf[n] = 0;
394        // SAFETY: `buf[..=n]` ends with the NUL we just wrote and
395        // filesystem paths never contain interior NULs.
396        unsafe { std::ffi::CStr::from_bytes_with_nul_unchecked(&buf[..=n]) }
397    }
398
399    #[inline]
400    pub fn total_frecency_score(&self) -> i32 {
401        self.access_frecency_score as i32 + self.modification_frecency_score as i32
402    }
403
404    #[inline]
405    pub fn is_binary(&self) -> bool {
406        self.flags & FileItemFlags::BINARY != 0
407    }
408
409    #[inline]
410    pub fn set_binary(&mut self, val: bool) {
411        if val {
412            self.flags |= FileItemFlags::BINARY;
413        } else {
414            self.flags &= !FileItemFlags::BINARY;
415        }
416    }
417
418    #[inline]
419    pub fn is_deleted(&self) -> bool {
420        self.flags & FileItemFlags::DELETED != 0
421    }
422
423    #[inline]
424    pub fn set_deleted(&mut self, val: bool) {
425        if val {
426            self.flags |= FileItemFlags::DELETED;
427        } else {
428            self.flags &= !FileItemFlags::DELETED;
429        }
430    }
431
432    #[inline]
433    pub fn is_overflow(&self) -> bool {
434        self.flags & FileItemFlags::OVERFLOW != 0
435    }
436
437    #[inline]
438    pub fn set_overflow(&mut self, val: bool) {
439        if val {
440            self.flags |= FileItemFlags::OVERFLOW;
441        } else {
442            self.flags &= !FileItemFlags::OVERFLOW;
443        }
444    }
445}
446
447impl FileItem {
448    /// Invalidate the cached content so the next `get_content()` call creates a fresh one.
449    ///
450    /// Call this when the background watcher detects that the file has been modified.
451    /// On Unix, a file that is truncated while mapped can cause SIGBUS. On Windows,
452    /// the stale buffer simply won't reflect the new contents. In both cases,
453    /// invalidating ensures a fresh read on the next access.
454    pub fn invalidate_mmap(&mut self, budget: &ContentCacheBudget) {
455        if self.content.get().is_some() {
456            budget.cached_count.fetch_sub(1, Ordering::Relaxed);
457            budget.cached_bytes.fetch_sub(self.size, Ordering::Relaxed);
458        }
459
460        self.content = OnceLock::new();
461    }
462
463    pub fn update_metadata(
464        &mut self,
465        budget: &ContentCacheBudget,
466        modified_secs: Option<u64>,
467        new_size: Option<u64>,
468    ) {
469        if let Some(modified) = modified_secs
470            && self.modified < modified
471        {
472            self.modified = modified;
473        }
474
475        self.invalidate_mmap(budget);
476
477        if let Some(size) = new_size {
478            self.size = size;
479        }
480    }
481
482    /// Get the cached file contents or lazily load and cache them.
483    ///
484    /// Returns `None` if the file is too large, empty, can't be opened, **or
485    /// the cache budget is exhausted**. Callers that need content regardless
486    /// of the budget should use [`get_content_for_search`].
487    ///
488    /// After the first call, this is lock-free (just an atomic load + pointer deref).
489    pub(crate) fn get_content(
490        &self,
491        arena: ArenaPtr,
492        base_path: &Path,
493        budget: &ContentCacheBudget,
494    ) -> Option<&[u8]> {
495        if let Some(content) = self.content.get() {
496            return Some(content);
497        }
498
499        let max_file_size = budget.max_file_size;
500        if self.size == 0 || self.size > max_file_size {
501            return None;
502        }
503
504        // Check cache budget before creating a new persistent cache entry.
505        let count = budget.cached_count.load(Ordering::Relaxed);
506        let bytes = budget.cached_bytes.load(Ordering::Relaxed);
507        let max_files = budget.max_files;
508        let max_bytes = budget.max_bytes;
509        if count >= max_files || bytes + self.size > max_bytes {
510            return None;
511        }
512
513        let content = load_file_content(&self.absolute_path(arena, base_path), self.size)?;
514        let result = self.content.get_or_init(|| content);
515
516        // Bump counters. Slight over-count under races is fine — the budget
517        // is a soft limit and the overshoot is bounded by rayon thread count.
518        budget.cached_count.fetch_add(1, Ordering::Relaxed);
519        budget.cached_bytes.fetch_add(self.size, Ordering::Relaxed);
520
521        Some(result)
522    }
523
524    /// Get file content for searching — **always returns content** for eligible
525    /// files, even when the persistent cache budget is exhausted.
526    ///
527    /// The caller provides a reusable `path_buf` (pre-filled with `base_path/`)
528    /// and its `base_len` to avoid allocations when constructing the absolute path.
529    #[inline]
530    pub(crate) fn get_content_for_search<'a>(
531        &'a self,
532        buf: &'a mut Vec<u8>, // we allow it to grow
533        arena: ArenaPtr,
534        base_path: &Path,
535        budget: &ContentCacheBudget,
536    ) -> Option<&'a [u8]> {
537        // Fast path: persistent cache hit (zero-copy).
538        if let Some(cached) = self.get_content(arena, base_path, budget) {
539            return Some(cached);
540        }
541
542        let max_file_size = budget.max_file_size;
543        if self.is_binary() || self.size == 0 || self.size > max_file_size {
544            return None;
545        }
546
547        // Slow path: read into the reusable buffer — open() + read_exact() + close().
548        // No mmap()/munmap() syscalls, no page table setup/teardown.
549        // We know the exact size so we use read_exact (1 read syscall) instead of
550        // read_to_end (2 read syscalls — one for data, one for EOF confirmation).
551        let abs = self.absolute_path(arena, base_path);
552        let len = self.size as usize;
553        buf.resize(len, 0);
554        let mut file = std::fs::File::open(&abs).ok()?;
555        file.read_exact(buf).ok()?;
556        Some(buf.as_slice())
557    }
558}
559
560/// Files smaller than one page waste the remainder when mmapped.
561/// Unused on Windows where `load_file_content` does not mmap.
562#[cfg(all(not(target_os = "windows"), target_arch = "aarch64"))]
563const MMAP_THRESHOLD: u64 = 16 * 1024;
564#[cfg(all(not(target_os = "windows"), not(target_arch = "aarch64")))]
565const MMAP_THRESHOLD: u64 = 4 * 1024;
566
567fn load_file_content(path: &Path, size: u64) -> Option<FileContent> {
568    #[cfg(not(target_os = "windows"))]
569    {
570        if size < MMAP_THRESHOLD {
571            let data = std::fs::read(path).ok()?;
572            Some(FileContent::Buffer(data))
573        } else {
574            let file = std::fs::File::open(path).ok()?;
575            // SAFETY: The mmap is backed by the kernel page cache and automatically
576            // reflects file modifications. The only risk is SIGBUS if the file is
577            // truncated while mapped.
578            let mmap = unsafe { memmap2::Mmap::map(&file) }.ok()?;
579            Some(FileContent::Mmap(mmap))
580        }
581    }
582
583    #[cfg(target_os = "windows")]
584    {
585        let _ = size;
586        let data = std::fs::read(path).ok()?;
587        Some(FileContent::Buffer(data))
588    }
589}
590
591impl Constrainable for FileItem {
592    #[inline]
593    fn write_file_name(&self, arena: ArenaPtr, out: &mut String) {
594        self.path.write_filename_to(arena, out);
595    }
596
597    #[inline]
598    fn write_relative_path(&self, arena: ArenaPtr, out: &mut String) {
599        self.path.write_to_string(arena, out);
600    }
601
602    #[inline]
603    fn git_status(&self) -> Option<git2::Status> {
604        self.git_status
605    }
606}
607
608#[derive(Debug, Clone, Default)]
609pub struct Score {
610    pub total: i32,
611    pub base_score: i32,
612    pub filename_bonus: i32,
613    pub special_filename_bonus: i32,
614    pub frecency_boost: i32,
615    pub git_status_boost: i32,
616    pub distance_penalty: i32,
617    pub current_file_penalty: i32,
618    pub combo_match_boost: i32,
619    pub path_alignment_bonus: i32,
620    pub exact_match: bool,
621    pub match_type: &'static str,
622}
623
624#[derive(Debug, Clone, Copy)]
625pub struct PaginationArgs {
626    pub offset: usize,
627    pub limit: usize,
628}
629
630impl Default for PaginationArgs {
631    fn default() -> Self {
632        Self {
633            offset: 0,
634            limit: 100,
635        }
636    }
637}
638
639#[derive(Debug, Clone)]
640pub struct ScoringContext<'a> {
641    pub query: &'a FfsQuery<'a>,
642    pub project_path: Option<&'a Path>,
643    pub current_file: Option<&'a str>,
644    pub max_typos: u16,
645    pub max_threads: usize,
646    pub last_same_query_match: Option<QueryMatchEntry>,
647    pub combo_boost_score_multiplier: i32,
648    pub min_combo_count: u32,
649    pub pagination: PaginationArgs,
650}
651
652impl ScoringContext<'_> {
653    pub fn effective_query(&self) -> &str {
654        match &self.query.fuzzy_query {
655            FuzzyQuery::Text(t) => t,
656            FuzzyQuery::Parts(parts) if !parts.is_empty() => parts[0],
657            _ => self.query.raw_query.trim(),
658        }
659    }
660}
661
662#[derive(Debug, Clone, Default)]
663pub struct SearchResult<'a> {
664    pub items: Vec<&'a FileItem>,
665    pub scores: Vec<Score>,
666    pub total_matched: usize,
667    pub total_files: usize,
668    pub location: Option<Location>,
669}
670
671/// Search result for directory-only fuzzy search.
672#[derive(Debug, Clone, Default)]
673pub struct DirSearchResult<'a> {
674    pub items: Vec<&'a DirItem>,
675    pub scores: Vec<Score>,
676    pub total_matched: usize,
677    pub total_dirs: usize,
678}
679
680/// A single item in a mixed (files + directories) search result.
681#[derive(Debug, Clone)]
682pub enum MixedItemRef<'a> {
683    File(&'a FileItem),
684    Dir(&'a DirItem),
685}
686
687/// Search result for mixed (files + directories) fuzzy search.
688/// Items are interleaved by total score in descending order.
689#[derive(Debug, Clone, Default)]
690pub struct MixedSearchResult<'a> {
691    pub items: Vec<MixedItemRef<'a>>,
692    pub scores: Vec<Score>,
693    pub total_matched: usize,
694    pub total_files: usize,
695    pub total_dirs: usize,
696    pub location: Option<Location>,
697}
698
699impl Default for MixedItemRef<'_> {
700    fn default() -> Self {
701        // Should never be used, exists only for Default derive on MixedSearchResult
702        unreachable!("MixedItemRef::default should not be called")
703    }
704}
705
706const MAX_MMAP_FILE_SIZE: u64 = 10 * 1024 * 1024;
707
708const MAX_CACHED_CONTENT_BYTES: u64 = 512 * 1024 * 1024;
709
710#[derive(Debug)]
711pub struct ContentCacheBudget {
712    pub max_files: usize,
713    pub max_bytes: u64,
714    pub max_file_size: u64,
715    pub cached_count: AtomicUsize,
716    pub cached_bytes: AtomicU64,
717}
718
719impl ContentCacheBudget {
720    pub fn unlimited() -> Self {
721        Self {
722            max_files: usize::MAX,
723            max_bytes: u64::MAX,
724            max_file_size: MAX_MMAP_FILE_SIZE,
725            cached_count: AtomicUsize::new(0),
726            cached_bytes: AtomicU64::new(0),
727        }
728    }
729
730    pub fn zero() -> Self {
731        Self {
732            max_files: 0,
733            max_bytes: 0,
734            max_file_size: 0,
735            cached_count: AtomicUsize::new(0),
736            cached_bytes: AtomicU64::new(0),
737        }
738    }
739
740    pub fn new_for_repo(file_count: usize) -> Self {
741        let max_files = if file_count > 50_000 {
742            5_000
743        } else if file_count > 10_000 {
744            10_000
745        } else {
746            30_000 // effectively unlimited for small repos
747        };
748
749        let max_bytes = if file_count > 50_000 {
750            128 * 1024 * 1024 // 128 MB
751        } else if file_count > 10_000 {
752            256 * 1024 * 1024 // 256 MB
753        } else {
754            MAX_CACHED_CONTENT_BYTES // 512 MB
755        };
756
757        Self {
758            max_files,
759            max_bytes,
760            max_file_size: MAX_MMAP_FILE_SIZE,
761            cached_count: AtomicUsize::new(0),
762            cached_bytes: AtomicU64::new(0),
763        }
764    }
765
766    /// Build a budget from caller-supplied overrides.
767    ///
768    /// Each argument is a cap; `0` means "use the library default for that
769    /// cap" (inherits from [`Self::default`], which is `new_for_repo(30_000)`).
770    /// Returns `None` when every cap is `0`, signalling to the picker that it
771    /// should auto-size the budget from the final scanned file count rather
772    /// than applying an explicit override.
773    pub fn from_overrides(max_files: usize, max_bytes: u64, max_file_size: u64) -> Option<Self> {
774        if max_files == 0 && max_bytes == 0 && max_file_size == 0 {
775            return None;
776        }
777
778        let mut budget = Self::default();
779        if max_files > 0 {
780            budget.max_files = max_files;
781        }
782        if max_bytes > 0 {
783            budget.max_bytes = max_bytes;
784        }
785        if max_file_size > 0 {
786            budget.max_file_size = max_file_size;
787        }
788        Some(budget)
789    }
790
791    pub fn reset(&self) {
792        self.cached_count.store(0, Ordering::Relaxed);
793        self.cached_bytes.store(0, Ordering::Relaxed);
794    }
795}
796
797impl Default for ContentCacheBudget {
798    fn default() -> Self {
799        Self::new_for_repo(30_000)
800    }
801}
802
803#[cfg(test)]
804impl FileItem {
805    /// Leaks a single-file arena so the pointer stays valid forever.
806    pub fn new_for_test(
807        rel_path: &str,
808        size: u64,
809        modified: u64,
810        git_status: Option<git2::Status>,
811        is_binary: bool,
812    ) -> Self {
813        let (item, _arena) =
814            Self::new_for_test_with_arena(rel_path, size, modified, git_status, is_binary);
815        item
816    }
817
818    pub(crate) fn new_for_test_with_arena(
819        rel_path: &str,
820        size: u64,
821        modified: u64,
822        git_status: Option<git2::Status>,
823        is_binary: bool,
824    ) -> (Self, ArenaPtr) {
825        let filename_start = rel_path
826            .rfind(std::path::is_separator)
827            .map(|i| i + 1)
828            .unwrap_or(0) as u16;
829        let mut item = Self::new_raw(filename_start, size, modified, git_status, is_binary);
830        let paths = [rel_path.to_string()];
831        let (store, strings) = crate::simd_path::build_chunked_path_store_from_strings(
832            &paths,
833            std::slice::from_ref(&item),
834        );
835        let cs = strings.into_iter().next().unwrap();
836        let arena = store.as_arena_ptr();
837        item.set_path(cs);
838        std::mem::forget(store);
839        (item, arena)
840    }
841}