Skip to main content

coreutils_rs/common/
io.rs

1use std::fs::{self, File};
2use std::io::{self, Read};
3use std::ops::Deref;
4use std::path::Path;
5
6use memmap2::{Mmap, MmapOptions};
7
8/// Holds file data — either zero-copy mmap or an owned Vec.
9/// Dereferences to `&[u8]` for transparent use.
10pub enum FileData {
11    Mmap(Mmap),
12    Owned(Vec<u8>),
13}
14
15impl Deref for FileData {
16    type Target = [u8];
17
18    fn deref(&self) -> &[u8] {
19        match self {
20            FileData::Mmap(m) => m,
21            FileData::Owned(v) => v,
22        }
23    }
24}
25
26/// Read a file with zero-copy mmap. Uses populate() for eager page table setup
27/// and MADV_HUGEPAGE for TLB efficiency on large files.
28pub fn read_file(path: &Path) -> io::Result<FileData> {
29    let metadata = fs::metadata(path)?;
30    let len = metadata.len();
31
32    if len > 0 && metadata.file_type().is_file() {
33        let file = File::open(path)?;
34        // SAFETY: Read-only mapping. File must not be truncated during use.
35        match unsafe { MmapOptions::new().populate().map(&file) } {
36            Ok(mmap) => {
37                #[cfg(target_os = "linux")]
38                {
39                    let _ = mmap.advise(memmap2::Advice::Sequential);
40                    if len >= 2 * 1024 * 1024 {
41                        unsafe {
42                            libc::madvise(
43                                mmap.as_ptr() as *mut libc::c_void,
44                                mmap.len(),
45                                libc::MADV_HUGEPAGE,
46                            );
47                        }
48                    }
49                }
50                Ok(FileData::Mmap(mmap))
51            }
52            Err(_) => Ok(FileData::Owned(fs::read(path)?)),
53        }
54    } else if len > 0 {
55        // Non-regular file (special files)
56        Ok(FileData::Owned(fs::read(path)?))
57    } else {
58        Ok(FileData::Owned(Vec::new()))
59    }
60}
61
62/// Get file size without reading it (for byte-count-only optimization).
63pub fn file_size(path: &Path) -> io::Result<u64> {
64    Ok(fs::metadata(path)?.len())
65}
66
67/// Read all bytes from stdin into a Vec.
68pub fn read_stdin() -> io::Result<Vec<u8>> {
69    let mut buf = Vec::new();
70    io::stdin().lock().read_to_end(&mut buf)?;
71    Ok(buf)
72}