Skip to main content

coreutils_rs/common/
io.rs

1use std::fs::{self, File};
2use std::io::{self, Read};
3use std::ops::Deref;
4use std::path::Path;
5
6#[cfg(target_os = "linux")]
7use std::sync::atomic::{AtomicBool, Ordering};
8
9use memmap2::{Mmap, MmapOptions};
10
11/// Holds file data — either zero-copy mmap or an owned Vec.
12/// Dereferences to `&[u8]` for transparent use.
13pub enum FileData {
14    Mmap(Mmap),
15    Owned(Vec<u8>),
16}
17
18impl Deref for FileData {
19    type Target = [u8];
20
21    fn deref(&self) -> &[u8] {
22        match self {
23            FileData::Mmap(m) => m,
24            FileData::Owned(v) => v,
25        }
26    }
27}
28
29/// Threshold below which we use read() instead of mmap.
30/// For files under 1MB, read() is faster since mmap has setup/teardown overhead
31/// (page table creation for up to 256 pages, TLB flush on munmap) that exceeds
32/// the zero-copy benefit.
33const MMAP_THRESHOLD: u64 = 1024 * 1024;
34
35/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
36/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
37#[cfg(target_os = "linux")]
38static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
39
40/// Open a file with O_NOATIME on Linux to avoid atime inode writes.
41/// Caches whether O_NOATIME works to avoid double-open on every file.
42#[cfg(target_os = "linux")]
43fn open_noatime(path: &Path) -> io::Result<File> {
44    use std::os::unix::fs::OpenOptionsExt;
45    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
46        match fs::OpenOptions::new()
47            .read(true)
48            .custom_flags(libc::O_NOATIME)
49            .open(path)
50        {
51            Ok(f) => return Ok(f),
52            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
53                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
54                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
55            }
56            Err(e) => return Err(e), // Real error, propagate
57        }
58    }
59    File::open(path)
60}
61
62#[cfg(not(target_os = "linux"))]
63fn open_noatime(path: &Path) -> io::Result<File> {
64    File::open(path)
65}
66
67/// Read a file with zero-copy mmap for large files or read() for small files.
68/// Opens once with O_NOATIME, uses fstat for metadata to save a syscall.
69pub fn read_file(path: &Path) -> io::Result<FileData> {
70    let file = open_noatime(path)?;
71    let metadata = file.metadata()?;
72    let len = metadata.len();
73
74    if len > 0 && metadata.file_type().is_file() {
75        // Small files: read from already-open fd (avoids double open + page table overhead)
76        if len < MMAP_THRESHOLD {
77            let mut buf = Vec::with_capacity(len as usize);
78            let mut reader = file;
79            reader.read_to_end(&mut buf)?;
80            return Ok(FileData::Owned(buf));
81        }
82
83        // SAFETY: Read-only mapping. File must not be truncated during use.
84        // Don't use populate() — it blocks until all pages are loaded.
85        // Instead, MADV_SEQUENTIAL triggers async readahead which overlaps with processing.
86        match unsafe { MmapOptions::new().map(&file) } {
87            Ok(mmap) => {
88                #[cfg(target_os = "linux")]
89                {
90                    let _ = mmap.advise(memmap2::Advice::Sequential);
91                    // WILLNEED triggers immediate async readahead
92                    unsafe {
93                        libc::madvise(
94                            mmap.as_ptr() as *mut libc::c_void,
95                            mmap.len(),
96                            libc::MADV_WILLNEED,
97                        );
98                    }
99                    if len >= 2 * 1024 * 1024 {
100                        unsafe {
101                            libc::madvise(
102                                mmap.as_ptr() as *mut libc::c_void,
103                                mmap.len(),
104                                libc::MADV_HUGEPAGE,
105                            );
106                        }
107                    }
108                }
109                Ok(FileData::Mmap(mmap))
110            }
111            Err(_) => {
112                // mmap failed — fall back to read
113                let mut buf = Vec::with_capacity(len as usize);
114                let mut reader = file;
115                reader.read_to_end(&mut buf)?;
116                Ok(FileData::Owned(buf))
117            }
118        }
119    } else if len > 0 {
120        // Non-regular file (special files) — read from open fd
121        let mut buf = Vec::new();
122        let mut reader = file;
123        reader.read_to_end(&mut buf)?;
124        Ok(FileData::Owned(buf))
125    } else {
126        Ok(FileData::Owned(Vec::new()))
127    }
128}
129
130/// Get file size without reading it (for byte-count-only optimization).
131pub fn file_size(path: &Path) -> io::Result<u64> {
132    Ok(fs::metadata(path)?.len())
133}
134
135/// Read all bytes from stdin into a Vec.
136pub fn read_stdin() -> io::Result<Vec<u8>> {
137    let mut buf = Vec::new();
138    io::stdin().lock().read_to_end(&mut buf)?;
139    Ok(buf)
140}