1#![allow(unsafe_code)]
8
9use crate::error::{IoError, Result};
10use memmap2::Mmap;
11use std::fs::File;
12use std::io::Read;
13use std::path::Path;
14
15const MMAP_THRESHOLD: u64 = 1024 * 1024;
17
18const MAX_FILE_SIZE: u64 = 1024 * 1024 * 1024;
20
21pub struct FileReader {
36 file: File,
38 size: u64,
40 path: String,
42}
43
44impl FileReader {
45 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
55 let path_ref = path.as_ref();
56 let path_str = path_ref.to_string_lossy().to_string();
57
58 if !path_ref.exists() {
59 return Err(IoError::FileNotFound { path: path_str }.into());
60 }
61
62 let file = File::open(path_ref).map_err(|e| IoError::ReadFailed {
63 path: path_str.clone(),
64 reason: e.to_string(),
65 })?;
66
67 let metadata = file.metadata().map_err(|e| IoError::ReadFailed {
68 path: path_str.clone(),
69 reason: e.to_string(),
70 })?;
71
72 let size = metadata.len();
73
74 if size > MAX_FILE_SIZE {
75 return Err(IoError::ReadFailed {
76 path: path_str,
77 reason: format!("file too large: {size} bytes (max: {MAX_FILE_SIZE} bytes)"),
78 }
79 .into());
80 }
81
82 Ok(Self {
83 file,
84 size,
85 path: path_str,
86 })
87 }
88
89 #[must_use]
91 pub const fn size(&self) -> u64 {
92 self.size
93 }
94
95 #[must_use]
97 pub fn path(&self) -> &str {
98 &self.path
99 }
100
101 pub fn read_to_string(&self) -> Result<String> {
109 if self.size >= MMAP_THRESHOLD {
110 self.read_mmap()
111 } else {
112 self.read_direct()
113 }
114 }
115
116 pub fn read_to_bytes(&self) -> Result<Vec<u8>> {
122 if self.size >= MMAP_THRESHOLD {
123 self.read_mmap_bytes()
124 } else {
125 self.read_direct_bytes()
126 }
127 }
128
129 fn read_mmap(&self) -> Result<String> {
131 let bytes = self.read_mmap_bytes()?;
132 String::from_utf8(bytes).map_err(|e| {
133 IoError::ReadFailed {
134 path: self.path.clone(),
135 reason: format!("invalid UTF-8: {e}"),
136 }
137 .into()
138 })
139 }
140
141 fn read_mmap_bytes(&self) -> Result<Vec<u8>> {
143 let mmap = unsafe {
145 Mmap::map(&self.file).map_err(|e| IoError::MmapFailed {
146 path: self.path.clone(),
147 reason: e.to_string(),
148 })?
149 };
150
151 Ok(mmap.to_vec())
152 }
153
154 fn read_direct(&self) -> Result<String> {
156 let bytes = self.read_direct_bytes()?;
157 String::from_utf8(bytes).map_err(|e| {
158 IoError::ReadFailed {
159 path: self.path.clone(),
160 reason: format!("invalid UTF-8: {e}"),
161 }
162 .into()
163 })
164 }
165
166 #[allow(clippy::cast_possible_truncation)]
168 fn read_direct_bytes(&self) -> Result<Vec<u8>> {
169 let mut file = &self.file;
170 let mut buffer = Vec::with_capacity(self.size as usize);
171 file.read_to_end(&mut buffer)
172 .map_err(|e| IoError::ReadFailed {
173 path: self.path.clone(),
174 reason: e.to_string(),
175 })?;
176 Ok(buffer)
177 }
178
179 pub fn mmap(&self) -> Result<Mmap> {
188 unsafe {
190 Mmap::map(&self.file).map_err(|e| {
191 IoError::MmapFailed {
192 path: self.path.clone(),
193 reason: e.to_string(),
194 }
195 .into()
196 })
197 }
198 }
199}
200
201pub fn read_file<P: AsRef<Path>>(path: P) -> Result<String> {
219 FileReader::open(path)?.read_to_string()
220}
221
222pub fn read_file_mmap<P: AsRef<Path>>(path: P) -> Result<Mmap> {
235 FileReader::open(path)?.mmap()
236}
237
238pub fn write_file<P: AsRef<Path>>(path: P, content: &str) -> Result<()> {
249 let path_ref = path.as_ref();
250 let path_str = path_ref.to_string_lossy().to_string();
251
252 if let Some(parent) = path_ref.parent()
254 && !parent.exists()
255 {
256 std::fs::create_dir_all(parent).map_err(|e| IoError::DirectoryFailed {
257 path: parent.to_string_lossy().to_string(),
258 reason: e.to_string(),
259 })?;
260 }
261
262 std::fs::write(path_ref, content).map_err(|e| IoError::WriteFailed {
263 path: path_str,
264 reason: e.to_string(),
265 })?;
266
267 Ok(())
268}
269
270pub fn write_chunks<'a, P, I>(out_dir: P, chunks: I, prefix: &str) -> Result<Vec<String>>
286where
287 P: AsRef<Path>,
288 I: Iterator<Item = (usize, &'a str)>,
289{
290 let out_path = out_dir.as_ref();
291 let out_str = out_path.to_string_lossy().to_string();
292
293 if !out_path.exists() {
295 std::fs::create_dir_all(out_path).map_err(|e| IoError::DirectoryFailed {
296 path: out_str.clone(),
297 reason: e.to_string(),
298 })?;
299 }
300
301 let mut paths = Vec::new();
302
303 for (index, content) in chunks {
304 let filename = format!("{prefix}_{index:04}.txt");
305 let file_path = out_path.join(&filename);
306 let file_str = file_path.to_string_lossy().to_string();
307
308 std::fs::write(&file_path, content).map_err(|e| IoError::WriteFailed {
309 path: file_str.clone(),
310 reason: e.to_string(),
311 })?;
312
313 paths.push(file_str);
314 }
315
316 Ok(paths)
317}
318
319#[cfg(test)]
320mod tests {
321 use super::*;
322 use tempfile::TempDir;
323
324 #[test]
325 fn test_read_small_file() {
326 let temp_dir = TempDir::new().unwrap();
327 let file_path = temp_dir.path().join("small.txt");
328 std::fs::write(&file_path, "Hello, world!").unwrap();
329
330 let content = read_file(&file_path).unwrap();
331 assert_eq!(content, "Hello, world!");
332 }
333
334 #[test]
335 fn test_read_nonexistent_file() {
336 let result = read_file("/nonexistent/path/file.txt");
337 assert!(result.is_err());
338 }
339
340 #[test]
341 fn test_file_reader_size() {
342 let temp_dir = TempDir::new().unwrap();
343 let file_path = temp_dir.path().join("test.txt");
344 std::fs::write(&file_path, "Hello").unwrap();
345
346 let reader = FileReader::open(&file_path).unwrap();
347 assert_eq!(reader.size(), 5);
348 }
349
350 #[test]
351 fn test_file_reader_path() {
352 let temp_dir = TempDir::new().unwrap();
353 let file_path = temp_dir.path().join("test.txt");
354 std::fs::write(&file_path, "Hello").unwrap();
355
356 let reader = FileReader::open(&file_path).unwrap();
357 assert!(reader.path().contains("test.txt"));
358 }
359
360 #[test]
361 fn test_write_file() {
362 let temp_dir = TempDir::new().unwrap();
363 let file_path = temp_dir.path().join("subdir/output.txt");
364
365 write_file(&file_path, "Test content").unwrap();
366
367 let content = std::fs::read_to_string(&file_path).unwrap();
368 assert_eq!(content, "Test content");
369 }
370
371 #[test]
372 fn test_write_file_existing_dir() {
373 let temp_dir = TempDir::new().unwrap();
374 let file_path = temp_dir.path().join("output.txt");
375
376 write_file(&file_path, "Test content").unwrap();
377
378 let content = std::fs::read_to_string(&file_path).unwrap();
379 assert_eq!(content, "Test content");
380 }
381
382 #[test]
383 fn test_write_chunks() {
384 let temp_dir = TempDir::new().unwrap();
385 let out_dir = temp_dir.path().join("chunks");
386
387 let chunks = vec![(0, "First chunk"), (1, "Second chunk")];
388 let paths = write_chunks(&out_dir, chunks.into_iter(), "chunk").unwrap();
389
390 assert_eq!(paths.len(), 2);
391
392 let content0 = std::fs::read_to_string(&paths[0]).unwrap();
393 let content1 = std::fs::read_to_string(&paths[1]).unwrap();
394 assert_eq!(content0, "First chunk");
395 assert_eq!(content1, "Second chunk");
396 }
397
398 #[test]
399 fn test_write_chunks_existing_dir() {
400 let temp_dir = TempDir::new().unwrap();
401 let out_dir = temp_dir.path().join("existing");
402 std::fs::create_dir_all(&out_dir).unwrap();
403
404 let chunks = vec![(0, "Content")];
405 let paths = write_chunks(&out_dir, chunks.into_iter(), "data").unwrap();
406
407 assert_eq!(paths.len(), 1);
408 assert!(paths[0].contains("data_0000.txt"));
409 }
410
411 #[test]
412 fn test_read_utf8_file() {
413 let temp_dir = TempDir::new().unwrap();
414 let file_path = temp_dir.path().join("unicode.txt");
415 std::fs::write(&file_path, "Hello, δΈη! π").unwrap();
416
417 let content = read_file(&file_path).unwrap();
418 assert_eq!(content, "Hello, δΈη! π");
419 }
420
421 #[test]
422 fn test_read_to_bytes() {
423 let temp_dir = TempDir::new().unwrap();
424 let file_path = temp_dir.path().join("bytes.bin");
425 std::fs::write(&file_path, b"binary\x00data").unwrap();
426
427 let reader = FileReader::open(&file_path).unwrap();
428 let bytes = reader.read_to_bytes().unwrap();
429 assert_eq!(bytes, b"binary\x00data");
430 }
431
432 #[test]
433 fn test_read_file_mmap() {
434 let temp_dir = TempDir::new().unwrap();
435 let file_path = temp_dir.path().join("mmap.txt");
436 std::fs::write(&file_path, "Memory mapped content").unwrap();
437
438 let mmap = read_file_mmap(&file_path).unwrap();
439 assert_eq!(&mmap[..], b"Memory mapped content");
440 }
441
442 #[test]
443 fn test_file_reader_mmap() {
444 let temp_dir = TempDir::new().unwrap();
445 let file_path = temp_dir.path().join("mmap.txt");
446 std::fs::write(&file_path, "Test content for mmap").unwrap();
447
448 let reader = FileReader::open(&file_path).unwrap();
449 let mmap = reader.mmap().unwrap();
450 assert_eq!(&mmap[..], b"Test content for mmap");
451 }
452
453 #[test]
454 fn test_read_empty_file() {
455 let temp_dir = TempDir::new().unwrap();
456 let file_path = temp_dir.path().join("empty.txt");
457 std::fs::write(&file_path, "").unwrap();
458
459 let content = read_file(&file_path).unwrap();
460 assert!(content.is_empty());
461 }
462
463 #[test]
464 fn test_read_large_file_mmap_path() {
465 let temp_dir = TempDir::new().unwrap();
467 let file_path = temp_dir.path().join("large.txt");
468
469 let large_content = "x".repeat(1024 * 1024 + 512 * 1024);
471 std::fs::write(&file_path, &large_content).unwrap();
472
473 let reader = FileReader::open(&file_path).unwrap();
474 assert!(reader.size() >= MMAP_THRESHOLD);
475
476 let content = reader.read_to_string().unwrap();
477 assert_eq!(content.len(), large_content.len());
478
479 let bytes = FileReader::open(&file_path)
480 .unwrap()
481 .read_to_bytes()
482 .unwrap();
483 assert_eq!(bytes.len(), large_content.len());
484 }
485
486 #[test]
487 fn test_read_invalid_utf8() {
488 let temp_dir = TempDir::new().unwrap();
489 let file_path = temp_dir.path().join("invalid.bin");
490 std::fs::write(&file_path, [0xff, 0xfe, 0x00, 0x01]).unwrap();
492
493 let reader = FileReader::open(&file_path).unwrap();
494 let result = reader.read_to_string();
495 assert!(result.is_err());
496 }
497
498 #[test]
499 fn test_read_invalid_utf8_via_mmap() {
500 let temp_dir = TempDir::new().unwrap();
501 let file_path = temp_dir.path().join("large_invalid.bin");
502
503 let mut content = vec![0x78u8; 1024 * 1024 + 100]; content[0] = 0xff; std::fs::write(&file_path, &content).unwrap();
508
509 let reader = FileReader::open(&file_path).unwrap();
510 let result = reader.read_to_string();
511 assert!(result.is_err());
512 }
513
514 #[test]
515 fn test_write_chunks_empty() {
516 let temp_dir = TempDir::new().unwrap();
517 let out_dir = temp_dir.path().join("empty_chunks");
518
519 let chunks: Vec<(usize, &str)> = vec![];
520 let paths = write_chunks(&out_dir, chunks.into_iter(), "chunk").unwrap();
521
522 assert!(paths.is_empty());
523 }
524
525 #[test]
526 fn test_file_reader_read_to_string_small() {
527 let temp_dir = TempDir::new().unwrap();
529 let file_path = temp_dir.path().join("small_string.txt");
530 let content = "Small file content for direct read";
531 std::fs::write(&file_path, content).unwrap();
532
533 let reader = FileReader::open(&file_path).unwrap();
534 assert!(reader.size() < MMAP_THRESHOLD);
535 let result = reader.read_to_string().unwrap();
536 assert_eq!(result, content);
537 }
538
539 #[test]
540 fn test_file_reader_read_to_bytes_small() {
541 let temp_dir = TempDir::new().unwrap();
543 let file_path = temp_dir.path().join("small_bytes.bin");
544 let content = b"Small binary content";
545 std::fs::write(&file_path, content).unwrap();
546
547 let reader = FileReader::open(&file_path).unwrap();
548 assert!(reader.size() < MMAP_THRESHOLD);
549 let result = reader.read_to_bytes().unwrap();
550 assert_eq!(result, content);
551 }
552
553 #[test]
554 fn test_write_file_to_nested_dirs() {
555 let temp_dir = TempDir::new().unwrap();
557 let file_path = temp_dir.path().join("a/b/c/deep.txt");
558
559 write_file(&file_path, "Deep content").unwrap();
560
561 let content = std::fs::read_to_string(&file_path).unwrap();
562 assert_eq!(content, "Deep content");
563 }
564
565 #[test]
566 fn test_write_chunks_creates_directory() {
567 let temp_dir = TempDir::new().unwrap();
569 let out_dir = temp_dir.path().join("new_chunks_dir");
570
571 assert!(!out_dir.exists());
572
573 let chunks = vec![(0, "Chunk content")];
574 let paths = write_chunks(&out_dir, chunks.into_iter(), "test").unwrap();
575
576 assert!(out_dir.exists());
577 assert_eq!(paths.len(), 1);
578 }
579
580 #[test]
581 fn test_file_reader_read_to_bytes_binary() {
582 let temp_dir = TempDir::new().unwrap();
583 let file_path = temp_dir.path().join("bytes.bin");
584 let content = b"\x00\x01\x02\x03\x04";
585 std::fs::write(&file_path, content).unwrap();
586
587 let reader = FileReader::open(&file_path).unwrap();
588 let bytes = reader.read_to_bytes().unwrap();
589 assert_eq!(bytes, content);
590 }
591
592 #[test]
593 fn test_file_reader_read_to_bytes_large() {
594 let temp_dir = TempDir::new().unwrap();
596 let file_path = temp_dir.path().join("large_bytes.bin");
597
598 let large_content: Vec<u8> = (0..255u8).cycle().take(1024 * 1024 + 100).collect();
600 std::fs::write(&file_path, &large_content).unwrap();
601
602 let reader = FileReader::open(&file_path).unwrap();
603 let bytes = reader.read_to_bytes().unwrap();
604 assert_eq!(bytes.len(), large_content.len());
605 }
606
607 #[test]
608 fn test_read_file_mmap_nonexistent() {
609 let result = read_file_mmap("/nonexistent/path/file.txt");
610 assert!(result.is_err());
611 }
612
613 #[test]
614 fn test_file_reader_open_nonexistent() {
615 let result = FileReader::open("/nonexistent/path/file.bin");
616 assert!(result.is_err());
617 }
618}