Skip to main content

coreutils_rs/common/
io.rs

1use std::fs::{self, File};
2use std::io::{self, Read};
3use std::ops::Deref;
4use std::path::Path;
5
6use memmap2::{Mmap, MmapOptions};
7
8/// Holds file data — either zero-copy mmap or an owned Vec.
9/// Dereferences to `&[u8]` for transparent use.
10pub enum FileData {
11    Mmap(Mmap),
12    Owned(Vec<u8>),
13}
14
15impl Deref for FileData {
16    type Target = [u8];
17
18    fn deref(&self) -> &[u8] {
19        match self {
20            FileData::Mmap(m) => m,
21            FileData::Owned(v) => v,
22        }
23    }
24}
25
26/// Threshold below which we use read() instead of mmap.
27/// For small files, read() is faster since mmap has setup/teardown overhead
28/// (page table creation, TLB flush on munmap) that exceeds the zero-copy benefit.
29const MMAP_THRESHOLD: u64 = 256 * 1024;
30
31/// Open a file with O_NOATIME on Linux to avoid atime inode writes.
32/// Falls back to normal open if O_NOATIME fails (e.g., not file owner).
33#[cfg(target_os = "linux")]
34fn open_noatime(path: &Path) -> io::Result<File> {
35    use std::os::unix::fs::OpenOptionsExt;
36    // O_NOATIME = 0o1000000 on Linux
37    match fs::OpenOptions::new()
38        .read(true)
39        .custom_flags(libc::O_NOATIME)
40        .open(path)
41    {
42        Ok(f) => Ok(f),
43        Err(_) => File::open(path),
44    }
45}
46
47#[cfg(not(target_os = "linux"))]
48fn open_noatime(path: &Path) -> io::Result<File> {
49    File::open(path)
50}
51
52/// Read a file with zero-copy mmap for large files or read() for small files.
53/// Opens once with O_NOATIME, uses fstat for metadata to save a syscall.
54pub fn read_file(path: &Path) -> io::Result<FileData> {
55    let file = open_noatime(path)?;
56    let metadata = file.metadata()?;
57    let len = metadata.len();
58
59    if len > 0 && metadata.file_type().is_file() {
60        // Small files: read from already-open fd (avoids double open + page table overhead)
61        if len < MMAP_THRESHOLD {
62            let mut buf = Vec::with_capacity(len as usize);
63            let mut reader = file;
64            reader.read_to_end(&mut buf)?;
65            return Ok(FileData::Owned(buf));
66        }
67
68        // SAFETY: Read-only mapping. File must not be truncated during use.
69        // Don't use populate() — it blocks until all pages are loaded.
70        // Instead, MADV_SEQUENTIAL triggers async readahead which overlaps with processing.
71        match unsafe { MmapOptions::new().map(&file) } {
72            Ok(mmap) => {
73                #[cfg(target_os = "linux")]
74                {
75                    let _ = mmap.advise(memmap2::Advice::Sequential);
76                    // WILLNEED triggers immediate async readahead
77                    unsafe {
78                        libc::madvise(
79                            mmap.as_ptr() as *mut libc::c_void,
80                            mmap.len(),
81                            libc::MADV_WILLNEED,
82                        );
83                    }
84                    if len >= 2 * 1024 * 1024 {
85                        unsafe {
86                            libc::madvise(
87                                mmap.as_ptr() as *mut libc::c_void,
88                                mmap.len(),
89                                libc::MADV_HUGEPAGE,
90                            );
91                        }
92                    }
93                }
94                Ok(FileData::Mmap(mmap))
95            }
96            Err(_) => {
97                // mmap failed — fall back to read
98                let mut buf = Vec::with_capacity(len as usize);
99                let mut reader = file;
100                reader.read_to_end(&mut buf)?;
101                Ok(FileData::Owned(buf))
102            }
103        }
104    } else if len > 0 {
105        // Non-regular file (special files) — read from open fd
106        let mut buf = Vec::new();
107        let mut reader = file;
108        reader.read_to_end(&mut buf)?;
109        Ok(FileData::Owned(buf))
110    } else {
111        Ok(FileData::Owned(Vec::new()))
112    }
113}
114
115/// Get file size without reading it (for byte-count-only optimization).
116pub fn file_size(path: &Path) -> io::Result<u64> {
117    Ok(fs::metadata(path)?.len())
118}
119
120/// Read all bytes from stdin into a Vec.
121pub fn read_stdin() -> io::Result<Vec<u8>> {
122    let mut buf = Vec::new();
123    io::stdin().lock().read_to_end(&mut buf)?;
124    Ok(buf)
125}