fast_yaml_parallel/io/
reader.rs1use std::fs::File;
4use std::path::Path;
5
6use memmap2::Mmap;
7
8use crate::error::{Error, Result};
9
10const MMAP_THRESHOLD: u64 = 512 * 1024;
12
13#[derive(Debug)]
15pub enum FileContent {
16 String(String),
18 Mmap(Mmap),
20}
21
22impl FileContent {
23 pub fn as_str(&self) -> Result<&str> {
28 match self {
29 Self::String(s) => Ok(s),
30 Self::Mmap(mmap) => std::str::from_utf8(mmap).map_err(|source| Error::Utf8 { source }),
31 }
32 }
33
34 pub const fn is_mmap(&self) -> bool {
36 matches!(self, Self::Mmap(_))
37 }
38
39 pub fn len(&self) -> usize {
41 match self {
42 Self::String(s) => s.len(),
43 Self::Mmap(mmap) => mmap.len(),
44 }
45 }
46
47 pub fn is_empty(&self) -> bool {
49 self.len() == 0
50 }
51}
52
53#[derive(Debug)]
58pub struct SmartReader {
59 mmap_threshold: u64,
60}
61
62impl SmartReader {
63 pub const fn new() -> Self {
80 Self::with_threshold(MMAP_THRESHOLD)
81 }
82
83 pub const fn with_threshold(threshold: u64) -> Self {
94 Self {
95 mmap_threshold: threshold,
96 }
97 }
98
99 pub fn read(&self, path: &Path) -> Result<FileContent> {
114 let metadata = std::fs::metadata(path).map_err(|source| Error::Io {
115 path: path.to_path_buf(),
116 source,
117 })?;
118
119 if metadata.is_dir() {
120 return Err(Error::Io {
121 path: path.to_path_buf(),
122 source: std::io::Error::new(
123 std::io::ErrorKind::InvalidInput,
124 "path is a directory, not a file",
125 ),
126 });
127 }
128
129 let size = metadata.len();
130
131 if size >= self.mmap_threshold {
132 Self::read_mmap(path).or_else(|_| {
133 Self::read_string(path)
135 })
136 } else {
137 Self::read_string(path)
138 }
139 }
140
141 fn read_string(path: &Path) -> Result<FileContent> {
143 let content = std::fs::read_to_string(path).map_err(|source| Error::Io {
144 path: path.to_path_buf(),
145 source,
146 })?;
147 Ok(FileContent::String(content))
148 }
149
150 #[allow(unsafe_code)]
152 fn read_mmap(path: &Path) -> Result<FileContent> {
153 let file = File::open(path).map_err(|source| Error::Io {
154 path: path.to_path_buf(),
155 source,
156 })?;
157
158 let mmap = unsafe {
167 Mmap::map(&file).map_err(|source| Error::Io {
168 path: path.to_path_buf(),
169 source,
170 })?
171 };
172
173 Ok(FileContent::Mmap(mmap))
174 }
175}
176
177impl Default for SmartReader {
178 fn default() -> Self {
179 Self::new()
180 }
181}
182
183#[cfg(test)]
184mod tests {
185 use super::*;
186 use std::io::Write;
187 use tempfile::NamedTempFile;
188
189 #[test]
190 fn test_file_content_as_str_string() {
191 let content = FileContent::String("test content".to_string());
192 assert_eq!(content.as_str().unwrap(), "test content");
193 assert!(!content.is_mmap());
194 assert_eq!(content.len(), 12);
195 assert!(!content.is_empty());
196 }
197
198 #[test]
199 fn test_file_content_is_empty() {
200 let content = FileContent::String(String::new());
201 assert!(content.is_empty());
202 }
203
204 #[test]
205 fn test_reader_small_file_uses_string() {
206 let mut file = NamedTempFile::new().unwrap();
207 write!(file, "small: content").unwrap();
208
209 let reader = SmartReader::new();
210 let content = reader.read(file.path()).unwrap();
211
212 assert!(!content.is_mmap());
213 assert_eq!(content.as_str().unwrap(), "small: content");
214 }
215
216 #[test]
217 fn test_reader_large_file_uses_mmap() {
218 let mut file = NamedTempFile::new().unwrap();
219
220 let large_content = "x".repeat(600 * 1024);
222 write!(file, "{large_content}").unwrap();
223
224 let reader = SmartReader::new();
225 let content = reader.read(file.path()).unwrap();
226
227 assert!(content.is_mmap());
228 assert_eq!(content.len(), large_content.len());
229 }
230
231 #[test]
232 fn test_reader_custom_threshold() {
233 let mut file = NamedTempFile::new().unwrap();
234 write!(file, "test content").unwrap();
235
236 let reader = SmartReader::with_threshold(5);
238 let content = reader.read(file.path()).unwrap();
239
240 assert!(content.is_mmap());
242 }
243
244 #[test]
245 fn test_reader_default_equals_new() {
246 let reader1 = SmartReader::new();
247 let reader2 = SmartReader::default();
248
249 assert_eq!(reader1.mmap_threshold, reader2.mmap_threshold);
250 }
251
252 #[test]
253 fn test_read_nonexistent_file() {
254 let reader = SmartReader::new();
255 let result = reader.read(Path::new("/nonexistent/file.yaml"));
256 assert!(result.is_err());
257 }
258
259 #[test]
260 fn test_file_content_len() {
261 let content = FileContent::String("hello".to_string());
262 assert_eq!(content.len(), 5);
263 }
264
265 #[test]
266 fn test_read_utf8_validation_with_mmap() {
267 let mut file = NamedTempFile::new().unwrap();
268
269 let content = "valid: utf8 content\n".repeat(30_000);
271 write!(file, "{content}").unwrap();
272
273 let reader = SmartReader::new();
274 let file_content = reader.read(file.path()).unwrap();
275
276 assert!(file_content.is_mmap());
278 assert!(file_content.as_str().is_ok());
279 }
280
281 #[test]
282 #[cfg(unix)]
283 fn test_symlink_handling() {
284 use std::os::unix::fs::symlink;
285
286 let temp_dir = tempfile::tempdir().unwrap();
287 let target = temp_dir.path().join("target.yaml");
288 let link = temp_dir.path().join("link.yaml");
289
290 std::fs::write(&target, "key: value\n").unwrap();
292
293 symlink(&target, &link).unwrap();
295
296 let reader = SmartReader::new();
298 let content = reader.read(&link).unwrap();
299
300 assert_eq!(content.as_str().unwrap(), "key: value\n");
301 }
302
303 #[test]
304 #[cfg(unix)]
305 fn test_broken_symlink_error() {
306 use std::os::unix::fs::symlink;
307
308 let temp_dir = tempfile::tempdir().unwrap();
309 let nonexistent = temp_dir.path().join("nonexistent.yaml");
310 let link = temp_dir.path().join("broken_link.yaml");
311
312 symlink(&nonexistent, &link).unwrap();
314
315 let reader = SmartReader::new();
317 let result = reader.read(&link);
318
319 assert!(result.is_err());
320 }
321
322 #[test]
323 fn test_file_exactly_at_threshold() {
324 let mut file = NamedTempFile::new().unwrap();
325
326 let content = "x".repeat(512 * 1024);
328 write!(file, "{content}").unwrap();
329
330 let reader = SmartReader::new();
331 let file_content = reader.read(file.path()).unwrap();
332
333 assert!(file_content.is_mmap());
335 assert_eq!(file_content.len(), 512 * 1024);
336 }
337
338 #[test]
339 fn test_file_just_below_threshold() {
340 let mut file = NamedTempFile::new().unwrap();
341
342 let content = "x".repeat(512 * 1024 - 1);
344 write!(file, "{content}").unwrap();
345
346 let reader = SmartReader::new();
347 let file_content = reader.read(file.path()).unwrap();
348
349 assert!(!file_content.is_mmap());
351 assert_eq!(file_content.len(), 512 * 1024 - 1);
352 }
353
354 #[test]
355 fn test_file_just_above_threshold() {
356 let mut file = NamedTempFile::new().unwrap();
357
358 let content = "x".repeat(512 * 1024 + 1);
360 write!(file, "{content}").unwrap();
361
362 let reader = SmartReader::new();
363 let file_content = reader.read(file.path()).unwrap();
364
365 assert!(file_content.is_mmap());
367 assert_eq!(file_content.len(), 512 * 1024 + 1);
368 }
369
370 #[test]
371 fn test_zero_length_file() {
372 let file = NamedTempFile::new().unwrap();
373 let reader = SmartReader::new();
376 let content = reader.read(file.path()).unwrap();
377
378 assert!(content.is_empty());
379 assert_eq!(content.len(), 0);
380 assert_eq!(content.as_str().unwrap(), "");
381 }
382
383 #[test]
384 fn test_directory_instead_of_file() {
385 let temp_dir = tempfile::tempdir().unwrap();
386
387 let reader = SmartReader::new();
388 let result = reader.read(temp_dir.path());
389
390 assert!(result.is_err());
392 }
393
394 #[test]
395 fn test_invalid_utf8_with_string() {
396 let temp_dir = tempfile::tempdir().unwrap();
397 let path = temp_dir.path().join("invalid.bin");
398
399 let invalid_bytes = b"\xFF\xFE invalid utf8";
401 std::fs::write(&path, invalid_bytes).unwrap();
402
403 let reader = SmartReader::new();
404 let result = reader.read(&path);
405
406 assert!(result.is_err());
408 }
409
410 #[test]
411 fn test_invalid_utf8_with_mmap() {
412 let temp_dir = tempfile::tempdir().unwrap();
413 let path = temp_dir.path().join("invalid_large.bin");
414
415 let mut invalid_content = vec![0xFF; 600 * 1024];
417 invalid_content.extend_from_slice(b" invalid utf8");
418 std::fs::write(&path, invalid_content).unwrap();
419
420 let reader = SmartReader::new();
421 let file_content = reader.read(&path).unwrap();
422
423 assert!(file_content.is_mmap());
425
426 let result = file_content.as_str();
428 assert!(result.is_err());
429 }
430
431 #[test]
432 fn test_empty_mmap_file() {
433 let temp_dir = tempfile::tempdir().unwrap();
434 let path = temp_dir.path().join("empty.yaml");
435
436 std::fs::write(&path, "").unwrap();
438
439 let reader = SmartReader::with_threshold(0);
441 let content = reader.read(&path).unwrap();
442
443 assert!(content.is_empty());
446 assert_eq!(content.as_str().unwrap(), "");
447 }
448
449 #[test]
450 fn test_file_content_mmap_is_mmap() {
451 let mut file = NamedTempFile::new().unwrap();
452 let content = "x".repeat(600 * 1024);
453 write!(file, "{content}").unwrap();
454
455 let reader = SmartReader::new();
456 let file_content = reader.read(file.path()).unwrap();
457
458 assert!(file_content.is_mmap());
459 assert_eq!(file_content.len(), 600 * 1024);
460 }
461
462 #[test]
463 #[cfg(unix)]
464 fn test_directory_symlink_rejection() {
465 use std::os::unix::fs::symlink;
466
467 let temp_dir = tempfile::tempdir().unwrap();
468 let target_dir = temp_dir.path().join("target_dir");
469 let link = temp_dir.path().join("dir_link");
470
471 std::fs::create_dir(&target_dir).unwrap();
473
474 symlink(&target_dir, &link).unwrap();
476
477 let reader = SmartReader::new();
479 let result = reader.read(&link);
480
481 assert!(result.is_err());
482 match result {
483 Err(Error::Io { source, .. }) => {
484 assert_eq!(source.kind(), std::io::ErrorKind::InvalidInput);
485 }
486 _ => panic!("expected Io error"),
487 }
488 }
489}