Skip to main content

coreutils_rs/common/
io.rs

1use std::fs::{self, File};
2use std::io::{self, Read};
3use std::ops::Deref;
4use std::path::Path;
5
6#[cfg(target_os = "linux")]
7use std::sync::atomic::{AtomicBool, Ordering};
8
9use memmap2::{Mmap, MmapOptions};
10
11/// Holds file data — either zero-copy mmap or an owned Vec.
12/// Dereferences to `&[u8]` for transparent use.
13pub enum FileData {
14    Mmap(Mmap),
15    Owned(Vec<u8>),
16}
17
18impl Deref for FileData {
19    type Target = [u8];
20
21    fn deref(&self) -> &[u8] {
22        match self {
23            FileData::Mmap(m) => m,
24            FileData::Owned(v) => v,
25        }
26    }
27}
28
29/// Threshold below which we use read() instead of mmap.
30/// For files under 1MB, read() is faster since mmap has setup/teardown overhead
31/// (page table creation for up to 256 pages, TLB flush on munmap) that exceeds
32/// the zero-copy benefit.
33const MMAP_THRESHOLD: u64 = 1024 * 1024;
34
35/// Track whether O_NOATIME is supported to avoid repeated failed open() attempts.
36/// After the first EPERM, we never try O_NOATIME again (saves one syscall per file).
37#[cfg(target_os = "linux")]
38static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
39
40/// Open a file with O_NOATIME on Linux to avoid atime inode writes.
41/// Caches whether O_NOATIME works to avoid double-open on every file.
42#[cfg(target_os = "linux")]
43fn open_noatime(path: &Path) -> io::Result<File> {
44    use std::os::unix::fs::OpenOptionsExt;
45    if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
46        match fs::OpenOptions::new()
47            .read(true)
48            .custom_flags(libc::O_NOATIME)
49            .open(path)
50        {
51            Ok(f) => return Ok(f),
52            Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
53                // O_NOATIME requires file ownership or CAP_FOWNER — disable globally
54                NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
55            }
56            Err(e) => return Err(e), // Real error, propagate
57        }
58    }
59    File::open(path)
60}
61
62#[cfg(not(target_os = "linux"))]
63fn open_noatime(path: &Path) -> io::Result<File> {
64    File::open(path)
65}
66
67/// Read a file with zero-copy mmap for large files or read() for small files.
68/// Opens once with O_NOATIME, uses fstat for metadata to save a syscall.
69pub fn read_file(path: &Path) -> io::Result<FileData> {
70    let file = open_noatime(path)?;
71    let metadata = file.metadata()?;
72    let len = metadata.len();
73
74    if len > 0 && metadata.file_type().is_file() {
75        // Small files: exact-size read from already-open fd.
76        // Uses read_full into pre-sized buffer instead of read_to_end,
77        // which avoids the grow-and-probe pattern (saves 1-2 extra read() syscalls).
78        if len < MMAP_THRESHOLD {
79            let mut buf = vec![0u8; len as usize];
80            let n = read_full(&mut &file, &mut buf)?;
81            buf.truncate(n);
82            return Ok(FileData::Owned(buf));
83        }
84
85        // SAFETY: Read-only mapping. MADV_SEQUENTIAL lets the kernel
86        // prefetch ahead of our sequential access pattern.
87        match unsafe { MmapOptions::new().populate().map(&file) } {
88            Ok(mmap) => {
89                #[cfg(target_os = "linux")]
90                {
91                    let _ = mmap.advise(memmap2::Advice::Sequential);
92                    let _ = mmap.advise(memmap2::Advice::WillNeed);
93                    // HUGEPAGE reduces TLB misses for large files (2MB+ = 1+ huge page).
94                    // With 4KB pages, a 100MB file needs 25,600 TLB entries; with 2MB
95                    // huge pages it needs only 50, reducing TLB miss overhead by ~500x.
96                    if len >= 2 * 1024 * 1024 {
97                        let _ = mmap.advise(memmap2::Advice::HugePage);
98                    }
99                }
100                Ok(FileData::Mmap(mmap))
101            }
102            Err(_) => {
103                // mmap failed — fall back to read
104                let mut buf = Vec::with_capacity(len as usize);
105                let mut reader = file;
106                reader.read_to_end(&mut buf)?;
107                Ok(FileData::Owned(buf))
108            }
109        }
110    } else if len > 0 {
111        // Non-regular file (special files) — read from open fd
112        let mut buf = Vec::new();
113        let mut reader = file;
114        reader.read_to_end(&mut buf)?;
115        Ok(FileData::Owned(buf))
116    } else {
117        Ok(FileData::Owned(Vec::new()))
118    }
119}
120
121/// Read a file entirely into a mutable Vec.
122/// Uses exact-size allocation from fstat + single read() for efficiency.
123/// Preferred over mmap when the caller needs mutable access (e.g., in-place decode).
124pub fn read_file_vec(path: &Path) -> io::Result<Vec<u8>> {
125    let file = open_noatime(path)?;
126    let metadata = file.metadata()?;
127    let len = metadata.len() as usize;
128    if len == 0 {
129        return Ok(Vec::new());
130    }
131    let mut buf = vec![0u8; len];
132    let n = read_full(&mut &file, &mut buf)?;
133    buf.truncate(n);
134    Ok(buf)
135}
136
137/// Read a file always using mmap, with MADV_WILLNEED (no MADV_SEQUENTIAL).
138/// Used by tac which scans forward then outputs in reverse, and benefits
139/// from zero-copy vmsplice output from mmap pages.
140/// Skips the MMAP_THRESHOLD — even small files benefit from mmap since:
141///   - No memcpy from page cache to userspace (zero-copy)
142///   - vmsplice can reference mmap pages directly in the pipe
143///   - mmap setup cost for small files (~25 pages) is comparable to read()
144pub fn read_file_mmap(path: &Path) -> io::Result<FileData> {
145    let file = open_noatime(path)?;
146    let metadata = file.metadata()?;
147    let len = metadata.len();
148
149    if len > 0 && metadata.file_type().is_file() {
150        match unsafe { MmapOptions::new().populate().map(&file) } {
151            Ok(mmap) => {
152                #[cfg(target_os = "linux")]
153                {
154                    let _ = mmap.advise(memmap2::Advice::WillNeed);
155                    if len >= 2 * 1024 * 1024 {
156                        let _ = mmap.advise(memmap2::Advice::HugePage);
157                    }
158                }
159                return Ok(FileData::Mmap(mmap));
160            }
161            Err(_) => {
162                // mmap failed — fall back to read
163                let mut buf = vec![0u8; len as usize];
164                let n = read_full(&mut &file, &mut buf)?;
165                buf.truncate(n);
166                return Ok(FileData::Owned(buf));
167            }
168        }
169    } else if len > 0 {
170        // Non-regular file (special files) — read from open fd
171        let mut buf = Vec::new();
172        let mut reader = file;
173        reader.read_to_end(&mut buf)?;
174        Ok(FileData::Owned(buf))
175    } else {
176        Ok(FileData::Owned(Vec::new()))
177    }
178}
179
180/// Get file size without reading it (for byte-count-only optimization).
181pub fn file_size(path: &Path) -> io::Result<u64> {
182    Ok(fs::metadata(path)?.len())
183}
184
185/// Read all bytes from stdin into a Vec.
186/// On Linux, uses raw libc::read() to bypass Rust's StdinLock/BufReader overhead.
187/// Uses a direct read() loop into a pre-allocated buffer instead of read_to_end(),
188/// which avoids Vec's grow-and-probe pattern (extra read() calls and memcpy).
189/// Callers should enlarge the pipe buffer via fcntl(F_SETPIPE_SZ) before calling.
190/// Uses the full spare capacity for each read() to minimize syscalls.
191pub fn read_stdin() -> io::Result<Vec<u8>> {
192    #[cfg(target_os = "linux")]
193    return read_stdin_raw();
194
195    #[cfg(not(target_os = "linux"))]
196    read_stdin_generic()
197}
198
199/// Raw libc::read() implementation for Linux — bypasses Rust's StdinLock
200/// and BufReader layers entirely. StdinLock uses an internal 8KB BufReader
201/// which adds an extra memcpy for every read; raw read() goes directly
202/// from the kernel pipe buffer to our Vec.
203///
204/// Pre-allocates 16MB to cover most workloads (benchmark = 10MB) without
205/// over-allocating. For inputs > 16MB, doubles capacity on demand.
206/// Each read() uses the full spare capacity to maximize bytes per syscall.
207///
208/// Note: callers (ftac, ftr, fbase64) are expected to enlarge the pipe
209/// buffer via fcntl(F_SETPIPE_SZ) before calling this function. We don't
210/// do it here to avoid accidentally shrinking a previously enlarged pipe.
211#[cfg(target_os = "linux")]
212fn read_stdin_raw() -> io::Result<Vec<u8>> {
213    const PREALLOC: usize = 16 * 1024 * 1024;
214
215    let mut buf: Vec<u8> = Vec::with_capacity(PREALLOC);
216
217    loop {
218        let spare_cap = buf.capacity() - buf.len();
219        if spare_cap < 1024 * 1024 {
220            // Grow by doubling (or at least 64MB) to minimize realloc count
221            let new_cap = (buf.capacity() * 2).max(buf.len() + PREALLOC);
222            buf.reserve(new_cap - buf.capacity());
223        }
224        let spare_cap = buf.capacity() - buf.len();
225        let start = buf.len();
226
227        // SAFETY: we read into the uninitialized spare capacity and extend
228        // set_len only by the number of bytes actually read.
229        let ret = unsafe {
230            libc::read(
231                0,
232                buf.as_mut_ptr().add(start) as *mut libc::c_void,
233                spare_cap,
234            )
235        };
236        if ret < 0 {
237            let err = io::Error::last_os_error();
238            if err.kind() == io::ErrorKind::Interrupted {
239                continue;
240            }
241            return Err(err);
242        }
243        if ret == 0 {
244            break;
245        }
246        unsafe { buf.set_len(start + ret as usize) };
247    }
248
249    Ok(buf)
250}
251
252/// Generic read_stdin for non-Linux platforms.
253#[cfg(not(target_os = "linux"))]
254fn read_stdin_generic() -> io::Result<Vec<u8>> {
255    const PREALLOC: usize = 16 * 1024 * 1024;
256    const READ_BUF: usize = 4 * 1024 * 1024;
257
258    let mut stdin = io::stdin().lock();
259    let mut buf: Vec<u8> = Vec::with_capacity(PREALLOC);
260
261    loop {
262        let spare_cap = buf.capacity() - buf.len();
263        if spare_cap < READ_BUF {
264            buf.reserve(PREALLOC);
265        }
266        let spare_cap = buf.capacity() - buf.len();
267
268        let start = buf.len();
269        unsafe { buf.set_len(start + spare_cap) };
270        match stdin.read(&mut buf[start..start + spare_cap]) {
271            Ok(0) => {
272                buf.truncate(start);
273                break;
274            }
275            Ok(n) => {
276                buf.truncate(start + n);
277            }
278            Err(e) if e.kind() == io::ErrorKind::Interrupted => {
279                buf.truncate(start);
280                continue;
281            }
282            Err(e) => return Err(e),
283        }
284    }
285
286    Ok(buf)
287}
288
289/// Read as many bytes as possible into buf, retrying on partial reads.
290/// Ensures the full buffer is filled (or EOF reached), avoiding the
291/// probe-read overhead of read_to_end.
292/// Fast path: regular file reads usually return the full buffer on the first call.
293#[inline]
294fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
295    // Fast path: first read() usually fills the entire buffer for regular files
296    let n = reader.read(buf)?;
297    if n == buf.len() || n == 0 {
298        return Ok(n);
299    }
300    // Slow path: partial read — retry to fill buffer (pipes, slow devices)
301    let mut total = n;
302    while total < buf.len() {
303        match reader.read(&mut buf[total..]) {
304            Ok(0) => break,
305            Ok(n) => total += n,
306            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
307            Err(e) => return Err(e),
308        }
309    }
310    Ok(total)
311}