Skip to main content

fff_search/
types.rs

1use std::path::{Path, PathBuf};
2use std::sync::OnceLock;
3
4use crate::constraints::Constrainable;
5use crate::query_tracker::QueryMatchEntry;
6use fff_query_parser::{FFFQuery, FuzzyQuery, Location};
7
8/// Cached file contents — mmap on Unix, heap buffer on Windows.
9///
10/// On Windows, memory-mapped files hold the file handle open and prevent
11/// editors from saving (writing/replacing) those files. Reading into a
12/// `Vec<u8>` releases the handle immediately after the read completes.
13#[derive(Debug)]
14#[allow(dead_code)] // variants are conditionally used per platform
15enum FileContent {
16    #[cfg(not(target_os = "windows"))]
17    Mmap(memmap2::Mmap),
18    #[cfg(target_os = "windows")]
19    Buffer(Vec<u8>),
20}
21
22impl std::ops::Deref for FileContent {
23    type Target = [u8];
24    fn deref(&self) -> &[u8] {
25        match self {
26            #[cfg(not(target_os = "windows"))]
27            FileContent::Mmap(m) => m,
28            #[cfg(target_os = "windows")]
29            FileContent::Buffer(b) => b,
30        }
31    }
32}
33
34/// A single indexed file with metadata, frecency scores, and lazy content cache.
35///
36/// File contents are initialized lazily on the first grep access and cached for
37/// subsequent searches. On Unix, uses mmap backed by the kernel page cache. On
38/// Windows, reads into a heap buffer to avoid holding file handles open.
39///
40/// Thread-safety: `OnceLock` provides lock-free reads after initialization.
41/// Each file is only searched by one rayon worker at a time via `par_iter`.
42#[derive(Debug)]
43pub struct FileItem {
44    pub path: PathBuf,
45    pub relative_path: String,
46    pub relative_path_lower: String,
47    pub file_name: String,
48    pub file_name_lower: String,
49    pub size: u64,
50    pub modified: u64,
51    pub access_frecency_score: i64,
52    pub modification_frecency_score: i64,
53    pub total_frecency_score: i64,
54    pub git_status: Option<git2::Status>,
55    pub is_binary: bool,
56    /// Lazily-initialized file contents for grep.
57    /// Initialized on first grep access via `OnceLock`; lock-free on subsequent reads.
58    content: OnceLock<FileContent>,
59}
60
61impl Clone for FileItem {
62    fn clone(&self) -> Self {
63        Self {
64            path: self.path.clone(),
65            relative_path: self.relative_path.clone(),
66            relative_path_lower: self.relative_path_lower.clone(),
67            file_name: self.file_name.clone(),
68            file_name_lower: self.file_name_lower.clone(),
69            size: self.size,
70            modified: self.modified,
71            access_frecency_score: self.access_frecency_score,
72            modification_frecency_score: self.modification_frecency_score,
73            total_frecency_score: self.total_frecency_score,
74            git_status: self.git_status,
75            is_binary: self.is_binary,
76            // Don't clone the content — the clone lazily re-creates it on demand
77            content: OnceLock::new(),
78        }
79    }
80}
81
82const MAX_MMAP_FILE_SIZE: u64 = 10 * 1024 * 1024;
83
84impl FileItem {
85    /// Create a new `FileItem` with all fields specified and an empty (not yet loaded) mmap.
86    pub fn new_raw(
87        path: PathBuf,
88        relative_path: String,
89        file_name: String,
90        size: u64,
91        modified: u64,
92        git_status: Option<git2::Status>,
93        is_binary: bool,
94    ) -> Self {
95        Self {
96            relative_path_lower: relative_path.to_lowercase(),
97            file_name_lower: file_name.to_lowercase(),
98            path,
99            relative_path,
100            file_name,
101            size,
102            modified,
103            access_frecency_score: 0,
104            modification_frecency_score: 0,
105            total_frecency_score: 0,
106            git_status,
107            is_binary,
108            content: OnceLock::new(),
109        }
110    }
111
112    /// Invalidate the cached content so the next `get_content()` call creates a fresh one.
113    ///
114    /// Call this when the background watcher detects that the file has been modified.
115    /// On Unix, a file that is truncated while mapped can cause SIGBUS. On Windows,
116    /// the stale buffer simply won't reflect the new contents. In both cases,
117    /// invalidating ensures a fresh read on the next access.
118    pub fn invalidate_mmap(&mut self) {
119        self.content = OnceLock::new();
120    }
121
122    /// Get the cached file contents or lazily load them. Returns `None` if the
123    /// file is too large, empty, or can't be opened.
124    ///
125    /// After the first call, this is lock-free (just an atomic load + pointer deref).
126    /// On Unix, uses mmap backed by the kernel page cache. On Windows, reads into
127    /// a heap buffer so the file handle is released immediately.
128    #[inline]
129    pub fn get_content(&self) -> Option<&[u8]> {
130        if let Some(content) = self.content.get() {
131            return Some(content);
132        }
133
134        if self.size == 0 || self.size > MAX_MMAP_FILE_SIZE {
135            return None;
136        }
137
138        let content = load_file_content(&self.path)?;
139
140        // If another thread raced us, OnceLock discards ours and returns theirs.
141        Some(self.content.get_or_init(|| content))
142    }
143
144    /// Backward-compatible alias for `get_content`.
145    #[inline]
146    pub fn get_mmap(&self) -> Option<&[u8]> {
147        self.get_content()
148    }
149}
150
151/// Load file contents: mmap on Unix, heap buffer on Windows.
152fn load_file_content(path: &Path) -> Option<FileContent> {
153    #[cfg(not(target_os = "windows"))]
154    {
155        let file = std::fs::File::open(path).ok()?;
156        // SAFETY: The mmap is backed by the kernel page cache and automatically
157        // reflects file modifications. The only risk is SIGBUS if the file is
158        // truncated while mapped.
159        let mmap = unsafe { memmap2::Mmap::map(&file) }.ok()?;
160        Some(FileContent::Mmap(mmap))
161    }
162
163    #[cfg(target_os = "windows")]
164    {
165        let data = std::fs::read(path).ok()?;
166        Some(FileContent::Buffer(data))
167    }
168}
169
170impl Constrainable for FileItem {
171    #[inline]
172    fn relative_path(&self) -> &str {
173        &self.relative_path
174    }
175
176    #[inline]
177    fn relative_path_lower(&self) -> &str {
178        &self.relative_path_lower
179    }
180
181    #[inline]
182    fn file_name(&self) -> &str {
183        &self.file_name
184    }
185
186    #[inline]
187    fn git_status(&self) -> Option<git2::Status> {
188        self.git_status
189    }
190}
191
192#[derive(Debug, Clone, Default)]
193pub struct Score {
194    pub total: i32,
195    pub base_score: i32,
196    pub filename_bonus: i32,
197    pub special_filename_bonus: i32,
198    pub frecency_boost: i32,
199    pub git_status_boost: i32,
200    pub distance_penalty: i32,
201    pub current_file_penalty: i32,
202    pub combo_match_boost: i32,
203    pub exact_match: bool,
204    pub match_type: &'static str,
205}
206
207#[derive(Debug, Clone, Copy)]
208pub struct PaginationArgs {
209    pub offset: usize,
210    pub limit: usize,
211}
212
213/// Context for scoring files during search.
214///
215/// The `query` field contains the pre-parsed query with constraints,
216/// fuzzy parts, and location information. Parsing is done once at the API
217/// boundary and passed through.
218#[derive(Debug, Clone)]
219pub struct ScoringContext<'a> {
220    /// Parsed query containing raw text, constraints, fuzzy parts, and location
221    pub query: &'a FFFQuery<'a>,
222    pub project_path: Option<&'a Path>,
223    pub current_file: Option<&'a str>,
224    pub max_typos: u16,
225    pub max_threads: usize,
226    pub last_same_query_match: Option<&'a QueryMatchEntry>,
227    pub combo_boost_score_multiplier: i32,
228    pub min_combo_count: u32,
229    pub pagination: PaginationArgs,
230}
231
232impl ScoringContext<'_> {
233    /// Get the effective fuzzy query string for matching.
234    /// Returns the first fuzzy part, or the raw query if no parsing was done.
235    pub fn effective_query(&self) -> &str {
236        match &self.query.fuzzy_query {
237            FuzzyQuery::Text(t) => t,
238            FuzzyQuery::Parts(parts) if !parts.is_empty() => parts[0],
239            _ => self.query.raw_query.trim(),
240        }
241    }
242}
243
244#[derive(Debug, Clone, Default)]
245pub struct SearchResult<'a> {
246    pub items: Vec<&'a FileItem>,
247    pub scores: Vec<Score>,
248    pub total_matched: usize,
249    pub total_files: usize,
250    pub location: Option<Location>,
251}