use std::fs::{self, File};
use std::io::{self, Read};
use std::ops::Deref;
use std::path::Path;
#[cfg(target_os = "linux")]
use std::sync::atomic::{AtomicBool, Ordering};
use memmap2::{Mmap, MmapOptions};
pub enum FileData {
Mmap(Mmap),
Owned(Vec<u8>),
}
impl Deref for FileData {
type Target = [u8];
fn deref(&self) -> &[u8] {
match self {
FileData::Mmap(m) => m,
FileData::Owned(v) => v,
}
}
}
pub const MMAP_THRESHOLD: u64 = 1024 * 1024;
#[cfg(target_os = "linux")]
static NOATIME_SUPPORTED: AtomicBool = AtomicBool::new(true);
#[cfg(target_os = "linux")]
pub fn open_noatime(path: &Path) -> io::Result<File> {
use std::os::unix::fs::OpenOptionsExt;
if NOATIME_SUPPORTED.load(Ordering::Relaxed) {
match fs::OpenOptions::new()
.read(true)
.custom_flags(libc::O_NOATIME)
.open(path)
{
Ok(f) => return Ok(f),
Err(ref e) if e.raw_os_error() == Some(libc::EPERM) => {
NOATIME_SUPPORTED.store(false, Ordering::Relaxed);
}
Err(e) => return Err(e), }
}
File::open(path)
}
#[cfg(not(target_os = "linux"))]
pub fn open_noatime(path: &Path) -> io::Result<File> {
File::open(path)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MmapHints {
Eager,
Lazy,
}
pub fn read_file(path: &Path) -> io::Result<FileData> {
read_file_with_hints(path, MmapHints::Eager)
}
pub fn read_file_with_hints(path: &Path, hints: MmapHints) -> io::Result<FileData> {
let _ = &hints; let file = open_noatime(path)?;
let metadata = file.metadata()?;
let len = metadata.len();
if len > 0 && metadata.file_type().is_file() {
if len < MMAP_THRESHOLD {
let mut buf = vec![0u8; len as usize];
let n = read_full(&mut &file, &mut buf)?;
buf.truncate(n);
return Ok(FileData::Owned(buf));
}
match unsafe { MmapOptions::new().map(&file) } {
Ok(mmap) => {
#[cfg(target_os = "linux")]
{
if len >= 2 * 1024 * 1024 {
let _ = mmap.advise(memmap2::Advice::HugePage);
}
let _ = mmap.advise(memmap2::Advice::Sequential);
match hints {
MmapHints::Eager => {
if len >= 4 * 1024 * 1024 {
if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
let _ = mmap.advise(memmap2::Advice::WillNeed);
}
} else {
let _ = mmap.advise(memmap2::Advice::WillNeed);
}
}
MmapHints::Lazy => {
if len < 4 * 1024 * 1024 {
let _ = mmap.advise(memmap2::Advice::WillNeed);
}
}
}
}
Ok(FileData::Mmap(mmap))
}
Err(_) => {
let mut buf = Vec::with_capacity(len as usize);
let mut reader = file;
reader.read_to_end(&mut buf)?;
Ok(FileData::Owned(buf))
}
}
} else if !metadata.file_type().is_file() {
let mut buf = Vec::new();
let mut reader = file;
reader.read_to_end(&mut buf)?;
Ok(FileData::Owned(buf))
} else {
Ok(FileData::Owned(Vec::new()))
}
}
pub fn read_file_vec(path: &Path) -> io::Result<Vec<u8>> {
let file = open_noatime(path)?;
let metadata = file.metadata()?;
let len = metadata.len() as usize;
if len == 0 {
return Ok(Vec::new());
}
let mut buf = vec![0u8; len];
let n = read_full(&mut &file, &mut buf)?;
buf.truncate(n);
Ok(buf)
}
pub fn read_file_mmap(path: &Path) -> io::Result<FileData> {
let file = open_noatime(path)?;
let metadata = file.metadata()?;
let len = metadata.len();
if len > 0 && metadata.file_type().is_file() {
let mmap_result = unsafe { MmapOptions::new().map(&file) };
match mmap_result {
Ok(mmap) => {
#[cfg(target_os = "linux")]
{
if len >= 2 * 1024 * 1024 {
let _ = mmap.advise(memmap2::Advice::HugePage);
}
if len >= 4 * 1024 * 1024 {
if mmap.advise(memmap2::Advice::PopulateRead).is_err() {
let _ = mmap.advise(memmap2::Advice::WillNeed);
}
} else {
let _ = mmap.advise(memmap2::Advice::WillNeed);
}
}
return Ok(FileData::Mmap(mmap));
}
Err(_) => {
let mut buf = vec![0u8; len as usize];
let n = read_full(&mut &file, &mut buf)?;
buf.truncate(n);
return Ok(FileData::Owned(buf));
}
}
} else if !metadata.file_type().is_file() {
let mut buf = Vec::new();
let mut reader = file;
reader.read_to_end(&mut buf)?;
Ok(FileData::Owned(buf))
} else {
Ok(FileData::Owned(Vec::new()))
}
}
pub fn read_file_direct(path: &Path) -> io::Result<FileData> {
let file = open_noatime(path)?;
let metadata = file.metadata()?;
#[cfg(target_os = "linux")]
{
if metadata.len() >= 65536 {
use std::os::unix::io::AsRawFd;
unsafe {
libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
}
}
}
let len = metadata.len();
if len > 0 && metadata.file_type().is_file() {
let mut buf = Vec::with_capacity(len as usize);
io::Read::read_to_end(&mut &file, &mut buf)?;
Ok(FileData::Owned(buf))
} else if !metadata.file_type().is_file() {
let mut buf = Vec::new();
let mut reader = file;
reader.read_to_end(&mut buf)?;
Ok(FileData::Owned(buf))
} else {
Ok(FileData::Owned(Vec::new()))
}
}
pub fn file_size(path: &Path) -> io::Result<u64> {
Ok(fs::metadata(path)?.len())
}
pub fn read_stdin() -> io::Result<Vec<u8>> {
#[cfg(target_os = "linux")]
return read_stdin_raw();
#[cfg(not(target_os = "linux"))]
read_stdin_generic()
}
#[cfg(target_os = "linux")]
fn read_stdin_raw() -> io::Result<Vec<u8>> {
const PREALLOC: usize = 16 * 1024 * 1024;
let mut buf: Vec<u8> = Vec::with_capacity(PREALLOC);
loop {
let spare_cap = buf.capacity() - buf.len();
if spare_cap < 1024 * 1024 {
let new_cap = (buf.capacity() * 2).max(buf.len() + PREALLOC);
buf.reserve(new_cap - buf.capacity());
}
let spare_cap = buf.capacity() - buf.len();
let start = buf.len();
let ret = unsafe {
libc::read(
0,
buf.as_mut_ptr().add(start) as *mut libc::c_void,
spare_cap,
)
};
if ret < 0 {
let err = io::Error::last_os_error();
if err.kind() == io::ErrorKind::Interrupted {
continue;
}
return Err(err);
}
if ret == 0 {
break;
}
unsafe { buf.set_len(start + ret as usize) };
}
Ok(buf)
}
#[cfg(target_os = "linux")]
pub fn splice_stdin_to_mmap() -> io::Result<Option<memmap2::MmapMut>> {
use std::os::unix::io::FromRawFd;
let mut stat: libc::stat = unsafe { std::mem::zeroed() };
if unsafe { libc::fstat(0, &mut stat) } != 0 {
return Ok(None);
}
if (stat.st_mode & libc::S_IFMT) != libc::S_IFIFO {
return Ok(None);
}
let memfd =
unsafe { libc::syscall(libc::SYS_memfd_create, c"stdin_splice".as_ptr(), 0u32) as i32 };
if memfd < 0 {
return Ok(None); }
let mut total: usize = 0;
loop {
let n = unsafe {
libc::splice(
0,
std::ptr::null_mut(),
memfd,
std::ptr::null_mut(),
1024 * 1024 * 1024,
libc::SPLICE_F_MOVE,
)
};
if n > 0 {
total += n as usize;
} else if n == 0 {
break; } else {
let err = io::Error::last_os_error();
if err.kind() == io::ErrorKind::Interrupted {
continue;
}
unsafe { libc::close(memfd) };
return Ok(None); }
}
if total == 0 {
unsafe { libc::close(memfd) };
return Ok(None);
}
if unsafe { libc::ftruncate(memfd, total as libc::off_t) } != 0 {
unsafe { libc::close(memfd) };
return Ok(None);
}
let file = unsafe { File::from_raw_fd(memfd) };
let mmap = unsafe { MmapOptions::new().populate().map_mut(&file) };
drop(file);
match mmap {
Ok(mut mm) => {
unsafe {
libc::madvise(
mm.as_mut_ptr() as *mut libc::c_void,
total,
libc::MADV_SEQUENTIAL,
);
if total >= 2 * 1024 * 1024 {
libc::madvise(
mm.as_mut_ptr() as *mut libc::c_void,
total,
libc::MADV_HUGEPAGE,
);
}
}
Ok(Some(mm))
}
Err(_) => Ok(None),
}
}
#[cfg(not(target_os = "linux"))]
fn read_stdin_generic() -> io::Result<Vec<u8>> {
const PREALLOC: usize = 16 * 1024 * 1024;
const READ_BUF: usize = 4 * 1024 * 1024;
let mut stdin = io::stdin().lock();
let mut buf: Vec<u8> = Vec::with_capacity(PREALLOC);
loop {
let spare_cap = buf.capacity() - buf.len();
if spare_cap < READ_BUF {
buf.reserve(PREALLOC);
}
let spare_cap = buf.capacity() - buf.len();
let start = buf.len();
unsafe { buf.set_len(start + spare_cap) };
match stdin.read(&mut buf[start..start + spare_cap]) {
Ok(0) => {
buf.truncate(start);
break;
}
Ok(n) => {
buf.truncate(start + n);
}
Err(e) if e.kind() == io::ErrorKind::Interrupted => {
buf.truncate(start);
continue;
}
Err(e) => return Err(e),
}
}
Ok(buf)
}
#[inline]
pub fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
let n = reader.read(buf)?;
if n == buf.len() || n == 0 {
return Ok(n);
}
let mut total = n;
while total < buf.len() {
match reader.read(&mut buf[total..]) {
Ok(0) => break,
Ok(n) => total += n,
Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
}
}
Ok(total)
}
#[cfg(unix)]
pub fn try_mmap_stdin(min_size: u64) -> Option<Mmap> {
try_mmap_stdin_with_hints(min_size, true)
}
#[cfg(unix)]
pub fn try_mmap_stdin_with_hints(min_size: u64, sequential: bool) -> Option<Mmap> {
use std::os::unix::io::{AsRawFd, FromRawFd};
let stdin = std::io::stdin();
let fd = stdin.as_raw_fd();
let mut stat: libc::stat = unsafe { std::mem::zeroed() };
if unsafe { libc::fstat(fd, &mut stat) } != 0 {
return None;
}
if (stat.st_mode & libc::S_IFMT) != libc::S_IFREG || stat.st_size <= 0 {
return None;
}
if (stat.st_size as u64) < min_size {
return None;
}
let file = unsafe { std::fs::File::from_raw_fd(fd) };
let mmap = unsafe { MmapOptions::new().map(&file) }.ok();
std::mem::forget(file); #[cfg(target_os = "linux")]
if let Some(ref m) = mmap {
unsafe {
if m.len() >= 2 * 1024 * 1024 {
libc::madvise(
m.as_ptr() as *mut libc::c_void,
m.len(),
libc::MADV_HUGEPAGE,
);
}
if sequential {
libc::madvise(
m.as_ptr() as *mut libc::c_void,
m.len(),
libc::MADV_SEQUENTIAL,
);
}
if m.len() >= 4 * 1024 * 1024 {
libc::madvise(
m.as_ptr() as *mut libc::c_void,
m.len(),
libc::MADV_WILLNEED,
);
}
}
}
mmap
}