Skip to main content

coreutils_rs/common/
io.rs

1use std::fs::{self, File};
2use std::io::{self, Read};
3use std::ops::Deref;
4use std::path::Path;
5
6use memmap2::Mmap;
7
8/// Threshold above which we use mmap instead of buffered read.
9/// mmap has overhead from page table setup; for small files buffered read wins.
10const MMAP_THRESHOLD: u64 = 64 * 1024; // 64KB
11
12/// Holds file data — either zero-copy mmap or an owned Vec.
13/// Dereferences to `&[u8]` for transparent use.
14pub enum FileData {
15    Mmap(Mmap),
16    Owned(Vec<u8>),
17}
18
19impl Deref for FileData {
20    type Target = [u8];
21
22    fn deref(&self) -> &[u8] {
23        match self {
24            FileData::Mmap(m) => m,
25            FileData::Owned(v) => v,
26        }
27    }
28}
29
30/// Read a file with zero-copy mmap for large files, buffered read for small ones.
31pub fn read_file(path: &Path) -> io::Result<FileData> {
32    let metadata = fs::metadata(path)?;
33
34    if metadata.len() >= MMAP_THRESHOLD {
35        let file = File::open(path)?;
36        // SAFETY: Read-only mapping. File must not be truncated during use.
37        let mmap = unsafe { Mmap::map(&file)? };
38        Ok(FileData::Mmap(mmap))
39    } else {
40        Ok(FileData::Owned(fs::read(path)?))
41    }
42}
43
44/// Get file size without reading it (for byte-count-only optimization).
45pub fn file_size(path: &Path) -> io::Result<u64> {
46    Ok(fs::metadata(path)?.len())
47}
48
49/// Read all bytes from stdin into a Vec.
50pub fn read_stdin() -> io::Result<Vec<u8>> {
51    let mut buf = Vec::new();
52    io::stdin().lock().read_to_end(&mut buf)?;
53    Ok(buf)
54}