sqry_core/io/
file_reader.rs1use anyhow::{Context, Result, bail};
7use memmap2::Mmap;
8use std::fs::File;
9use std::io::Read;
10use std::path::Path;
11
12use crate::config::buffers::{max_source_file_size, mmap_threshold};
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum ReaderPolicy {
19 Buffered,
21 Mmap,
23 Auto {
25 threshold: u64,
27 },
28}
29
30impl Default for ReaderPolicy {
31 fn default() -> Self {
32 Self::Auto {
34 threshold: mmap_threshold(),
35 }
36 }
37}
38
39pub enum FileReader {
41 Mmap {
43 #[allow(dead_code)]
45 file: File,
46 mmap: Mmap,
48 },
49 Buffered {
51 data: Vec<u8>,
53 },
54}
55
56impl FileReader {
57 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
63 Self::open_with_policy(path, ReaderPolicy::default())
64 }
65
66 pub fn open_with_policy<P: AsRef<Path>>(path: P, policy: ReaderPolicy) -> Result<Self> {
73 let path = path.as_ref();
74 let file =
75 File::open(path).with_context(|| format!("Failed to open file: {}", path.display()))?;
76
77 let metadata = file
78 .metadata()
79 .with_context(|| format!("Failed to read file metadata: {}", path.display()))?;
80
81 let file_size = metadata.len();
82
83 let max_size = max_source_file_size();
85 if file_size > max_size {
86 bail!(
87 "File too large to index: {} ({} MB exceeds {} MB limit). \
88 Adjust SQRY_MAX_SOURCE_FILE_SIZE environment variable if needed.",
89 path.display(),
90 file_size / (1024 * 1024),
91 max_size / (1024 * 1024)
92 );
93 }
94
95 let use_mmap = match policy {
97 ReaderPolicy::Buffered => false,
98 ReaderPolicy::Mmap => true,
99 ReaderPolicy::Auto { threshold } => file_size >= threshold,
100 };
101
102 if use_mmap {
103 match Self::try_mmap(file, path) {
105 Ok(reader) => Ok(reader),
106 Err(_e) => {
107 let mut file = File::open(path)?;
110 Self::read_buffered(&mut file, path)
111 }
112 }
113 } else {
114 let mut file_for_read = file;
115 Self::read_buffered(&mut file_for_read, path)
116 }
117 }
118
119 fn try_mmap(file: File, path: &Path) -> Result<Self> {
121 let mmap = unsafe {
124 Mmap::map(&file).with_context(|| format!("Failed to mmap file: {}", path.display()))?
125 };
126
127 Ok(FileReader::Mmap { file, mmap })
128 }
129
130 fn read_buffered(file: &mut File, path: &Path) -> Result<Self> {
132 let mut data = Vec::new();
133 file.read_to_end(&mut data)
134 .with_context(|| format!("Failed to read file: {}", path.display()))?;
135
136 Ok(FileReader::Buffered { data })
137 }
138
139 #[must_use]
141 pub fn as_slice(&self) -> &[u8] {
142 match self {
143 FileReader::Mmap { mmap, .. } => &mmap[..],
144 FileReader::Buffered { data } => &data[..],
145 }
146 }
147
148 #[must_use]
150 pub fn len(&self) -> usize {
151 self.as_slice().len()
152 }
153
154 #[must_use]
156 pub fn is_empty(&self) -> bool {
157 self.len() == 0
158 }
159
160 pub fn chunks(&self, chunk_size: usize) -> impl Iterator<Item = &[u8]> {
162 self.as_slice().chunks(chunk_size)
163 }
164}
165
166impl AsRef<[u8]> for FileReader {
167 fn as_ref(&self) -> &[u8] {
168 self.as_slice()
169 }
170}
171
172#[cfg(test)]
173mod tests {
174 use super::*;
175 use std::io::Write;
176 use tempfile::NamedTempFile;
177
178 fn create_temp_file(size: usize) -> (NamedTempFile, Vec<u8>) {
179 let mut file = NamedTempFile::new().expect("Failed to create temp file");
180 #[allow(clippy::cast_possible_truncation)]
182 let data: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
183 file.write_all(&data).expect("Failed to write temp file");
184 file.flush().expect("Failed to flush temp file");
185 (file, data)
186 }
187
188 #[test]
189 fn test_buffered_small_file() {
190 let (file, expected_data) = create_temp_file(1024);
191
192 let reader = FileReader::open_with_policy(file.path(), ReaderPolicy::Buffered)
193 .expect("Failed to open file");
194
195 assert_eq!(reader.as_slice(), &expected_data[..]);
196 assert_eq!(reader.len(), 1024);
197 assert!(!reader.is_empty());
198 }
199
200 #[test]
201 fn test_mmap_large_file() {
202 let size = 15 * 1024 * 1024; let (file, expected_data) = create_temp_file(size);
204
205 let reader = FileReader::open_with_policy(file.path(), ReaderPolicy::Mmap)
206 .expect("Failed to open file");
207
208 assert_eq!(reader.as_slice(), &expected_data[..]);
209 assert_eq!(reader.len(), size);
210 }
211
212 #[test]
213 fn test_auto_policy_small_file() {
214 let (file, expected_data) = create_temp_file(1024);
215
216 let reader = FileReader::open_with_policy(
217 file.path(),
218 ReaderPolicy::Auto {
219 threshold: 10 * 1024 * 1024,
220 },
221 )
222 .expect("Failed to open file");
223
224 assert_eq!(reader.as_slice(), &expected_data[..]);
225 }
226
227 #[test]
228 fn test_auto_policy_large_file() {
229 let size = 15 * 1024 * 1024; let (file, expected_data) = create_temp_file(size);
231
232 let reader = FileReader::open_with_policy(
233 file.path(),
234 ReaderPolicy::Auto {
235 threshold: 10 * 1024 * 1024,
236 },
237 )
238 .expect("Failed to open file");
239
240 assert_eq!(reader.as_slice(), &expected_data[..]);
241 assert_eq!(reader.len(), size);
242 }
243
244 #[test]
245 fn test_chunks_iteration() {
246 let (file, _) = create_temp_file(1000);
247
248 let reader = FileReader::open(file.path()).expect("Failed to open file");
249
250 let chunks: Vec<_> = reader.chunks(100).collect();
251 assert_eq!(chunks.len(), 10);
252 assert_eq!(chunks[0].len(), 100);
253 assert_eq!(chunks[9].len(), 100);
254 }
255
256 #[test]
257 fn test_empty_file() {
258 let file = NamedTempFile::new().expect("Failed to create temp file");
259
260 let reader = FileReader::open(file.path()).expect("Failed to open file");
261
262 assert!(reader.is_empty());
263 assert_eq!(reader.len(), 0);
264 }
265
266 #[test]
267 fn test_threshold_boundary() {
268 let threshold = 5 * 1024; let (file_small, data_small) =
273 create_temp_file(threshold.try_into().unwrap_or(usize::MAX).saturating_sub(1));
274 let reader_small =
275 FileReader::open_with_policy(file_small.path(), ReaderPolicy::Auto { threshold })
276 .expect("Failed to open small file");
277 assert_eq!(reader_small.as_slice(), &data_small[..]);
278
279 let (file_exact, data_exact) = create_temp_file(threshold.try_into().unwrap_or(usize::MAX));
281 let reader_exact =
282 FileReader::open_with_policy(file_exact.path(), ReaderPolicy::Auto { threshold })
283 .expect("Failed to open exact file");
284 assert_eq!(reader_exact.as_slice(), &data_exact[..]);
285
286 let (file_large, data_large) =
288 create_temp_file(threshold.try_into().unwrap_or(usize::MAX).saturating_add(1));
289 let reader_large =
290 FileReader::open_with_policy(file_large.path(), ReaderPolicy::Auto { threshold })
291 .expect("Failed to open large file");
292 assert_eq!(reader_large.as_slice(), &data_large[..]);
293 }
294}