use memmap2::{MmapMut, MmapOptions};
use std::fs::{File, OpenOptions};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
#[derive(Debug)]
pub enum MmapOrVec<T: Copy + Default + 'static> {
Heap {
data: Vec<T>,
},
Mapped {
mmap: MmapMut,
len: usize,
capacity: usize, file: File,
path: PathBuf,
_phantom: std::marker::PhantomData<T>,
},
}
impl<T: Copy + Default + 'static> MmapOrVec<T> {
pub fn new() -> Self {
MmapOrVec::Heap { data: Vec::new() }
}
pub fn with_capacity(cap: usize) -> Self {
MmapOrVec::Heap {
data: Vec::with_capacity(cap),
}
}
pub fn from_vec(data: Vec<T>) -> Self {
MmapOrVec::Heap { data }
}
pub fn mapped_zeroed(path: &Path, count: usize) -> io::Result<Self> {
let cap = count.max(64);
let byte_len = cap * std::mem::size_of::<T>();
let file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.open(path)?;
file.set_len(byte_len as u64)?;
let mmap = unsafe { MmapOptions::new().len(byte_len).map_mut(&file)? };
Ok(MmapOrVec::Mapped {
mmap,
len: cap, capacity: cap,
file,
path: path.to_path_buf(),
_phantom: std::marker::PhantomData,
})
}
pub fn mapped(path: &Path, initial_cap: usize) -> io::Result<Self> {
let cap = initial_cap.max(64); let byte_len = cap * std::mem::size_of::<T>();
let file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.open(path)?;
file.set_len(byte_len as u64)?;
let mmap = unsafe { MmapOptions::new().len(byte_len).map_mut(&file)? };
Ok(MmapOrVec::Mapped {
mmap,
len: 0,
capacity: cap,
file,
path: path.to_path_buf(),
_phantom: std::marker::PhantomData,
})
}
pub fn mapped_prefilled(path: &Path, count: usize) -> io::Result<Self> {
let cap = count.max(64);
let byte_len = cap * std::mem::size_of::<T>();
let file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.open(path)?;
file.set_len(byte_len as u64)?;
let mmap = unsafe { MmapOptions::new().len(byte_len).map_mut(&file)? };
Ok(MmapOrVec::Mapped {
mmap,
len: count, capacity: cap,
file,
path: path.to_path_buf(),
_phantom: std::marker::PhantomData,
})
}
pub fn load_mapped(path: &Path, len: usize) -> io::Result<Self> {
let file = OpenOptions::new().read(true).write(true).open(path)?;
let file_len = file.metadata()?.len() as usize;
let elem_size = std::mem::size_of::<T>();
let capacity = file_len.checked_div(elem_size).unwrap_or(len);
if capacity < len {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"File too small: {} bytes for {} elements of size {}",
file_len, len, elem_size
),
));
}
let mmap = unsafe { MmapOptions::new().len(file_len).map_mut(&file)? };
Ok(MmapOrVec::Mapped {
mmap,
len,
capacity,
file,
path: path.to_path_buf(),
_phantom: std::marker::PhantomData,
})
}
pub fn len(&self) -> usize {
match self {
MmapOrVec::Heap { data } => data.len(),
MmapOrVec::Mapped { len, .. } => *len,
}
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[cfg(unix)]
pub fn advise_willneed(&self) {
if let MmapOrVec::Mapped { mmap, .. } = self {
let _ = mmap.advise(memmap2::Advice::WillNeed);
}
}
#[cfg(not(unix))]
pub fn advise_willneed(&self) {}
#[cfg(unix)]
pub fn advise_sequential(&self) {
if let MmapOrVec::Mapped { mmap, len, .. } = self {
let byte_len = *len * std::mem::size_of::<T>();
if byte_len > 0 {
let _ = mmap.advise(memmap2::Advice::Sequential);
}
}
}
#[cfg(not(unix))]
pub fn advise_sequential(&self) {}
#[cfg(unix)]
pub fn advise_dontneed(&self) {
if let MmapOrVec::Mapped { mmap, len, .. } = self {
let byte_len = *len * std::mem::size_of::<T>();
if byte_len > 0 {
unsafe {
let _ = mmap.unchecked_advise(memmap2::UncheckedAdvice::DontNeed);
}
}
}
}
#[cfg(not(unix))]
pub fn advise_dontneed(&self) {}
#[cfg(target_os = "linux")]
pub fn fadvise_dontneed(&self) {
if let MmapOrVec::Mapped { file, len, .. } = self {
let byte_len = (*len * std::mem::size_of::<T>()) as libc::off_t;
if byte_len > 0 {
use std::os::unix::io::AsRawFd;
unsafe {
let _ = libc::posix_fadvise(
file.as_raw_fd(),
0,
byte_len,
libc::POSIX_FADV_DONTNEED,
);
}
}
}
}
#[cfg(target_os = "macos")]
pub fn fadvise_dontneed(&self) {
if let MmapOrVec::Mapped { file, .. } = self {
use std::os::unix::io::AsRawFd;
unsafe {
let _ = libc::fcntl(file.as_raw_fd(), libc::F_NOCACHE, 1);
}
}
}
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
pub fn fadvise_dontneed(&self) {}
#[allow(dead_code)]
#[cfg(unix)]
pub fn flush_and_release_pages(&self) -> std::io::Result<()> {
if let MmapOrVec::Mapped { mmap, len, .. } = self {
let byte_len = *len * std::mem::size_of::<T>();
if byte_len > 0 {
mmap.flush()?;
unsafe {
let _ = mmap.unchecked_advise(memmap2::UncheckedAdvice::DontNeed);
}
}
}
Ok(())
}
#[allow(dead_code)]
#[cfg(not(unix))]
pub fn flush_and_release_pages(&self) -> std::io::Result<()> {
Ok(())
}
pub fn get(&self, index: usize) -> T {
match self {
MmapOrVec::Heap { data } => data[index],
MmapOrVec::Mapped { mmap, len, .. } => {
assert!(index < *len, "MmapOrVec index out of bounds");
let offset = index * std::mem::size_of::<T>();
unsafe { std::ptr::read(mmap.as_ptr().add(offset) as *const T) }
}
}
}
pub fn set(&mut self, index: usize, value: T) {
match self {
MmapOrVec::Heap { data } => data[index] = value,
MmapOrVec::Mapped { mmap, len, .. } => {
assert!(index < *len, "MmapOrVec index out of bounds");
let offset = index * std::mem::size_of::<T>();
unsafe {
std::ptr::write(mmap.as_mut_ptr().add(offset) as *mut T, value);
}
}
}
}
pub fn push(&mut self, value: T) {
match self {
MmapOrVec::Heap { data } => data.push(value),
MmapOrVec::Mapped {
mmap,
len,
capacity,
file,
path,
..
} => {
if *len >= *capacity {
let new_cap = (*capacity * 2).max(64);
let new_byte_len = new_cap * std::mem::size_of::<T>();
file.set_len(new_byte_len as u64).expect("ftruncate failed");
*mmap = unsafe {
MmapOptions::new()
.len(new_byte_len)
.map_mut(&*file)
.unwrap_or_else(|e| {
panic!("mmap remap failed for {}: {}", path.display(), e)
})
};
*capacity = new_cap;
}
let offset = *len * std::mem::size_of::<T>();
unsafe {
std::ptr::write(mmap.as_mut_ptr().add(offset) as *mut T, value);
}
*len += 1;
}
}
}
pub fn as_mut_slice(&mut self) -> &mut [T] {
match self {
MmapOrVec::Heap { data } => data.as_mut_slice(),
MmapOrVec::Mapped { mmap, len, .. } => unsafe {
std::slice::from_raw_parts_mut(mmap.as_mut_ptr() as *mut T, *len)
},
}
}
pub fn materialize_to_file(&mut self, path: &Path) -> io::Result<()> {
if matches!(self, MmapOrVec::Mapped { .. }) {
return Ok(()); }
let MmapOrVec::Heap { data } = self else {
unreachable!()
};
let len = data.len();
let cap = len.max(64);
let byte_len = cap * std::mem::size_of::<T>();
let file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.open(path)?;
file.set_len(byte_len as u64)?;
let mut mmap = unsafe { MmapOptions::new().len(byte_len).map_mut(&file)? };
let src_bytes = unsafe {
std::slice::from_raw_parts(data.as_ptr() as *const u8, len * std::mem::size_of::<T>())
};
mmap[..src_bytes.len()].copy_from_slice(src_bytes);
mmap.flush_async()?;
*self = MmapOrVec::Mapped {
mmap,
len,
capacity: cap,
file,
path: path.to_path_buf(),
_phantom: std::marker::PhantomData,
};
Ok(())
}
#[allow(dead_code)] pub fn materialize_to_heap(&mut self) {
if matches!(self, MmapOrVec::Heap { .. }) {
return;
}
let data = match self {
MmapOrVec::Mapped { mmap, len, .. } => {
unsafe { std::slice::from_raw_parts(mmap.as_ptr() as *const T, *len) }.to_vec()
}
_ => unreachable!(),
};
*self = MmapOrVec::Heap { data };
}
pub fn is_mapped(&self) -> bool {
matches!(self, MmapOrVec::Mapped { .. })
}
pub fn heap_bytes(&self) -> usize {
match self {
MmapOrVec::Heap { data } => data.len() * std::mem::size_of::<T>(),
MmapOrVec::Mapped { .. } => 0,
}
}
pub fn as_raw_bytes(&self) -> &[u8] {
match self {
MmapOrVec::Heap { data } => unsafe {
std::slice::from_raw_parts(
data.as_ptr() as *const u8,
data.len() * std::mem::size_of::<T>(),
)
},
MmapOrVec::Mapped { mmap, len, .. } => &mmap[..*len * std::mem::size_of::<T>()],
}
}
pub fn write_to(&self, writer: &mut impl Write) -> io::Result<()> {
writer.write_all(self.as_raw_bytes())
}
pub fn trim_to_logical_length(&mut self) -> io::Result<()> {
match self {
MmapOrVec::Heap { .. } => Ok(()),
MmapOrVec::Mapped {
mmap,
len,
capacity,
file,
path,
..
} => {
mmap.flush()?;
let byte_len = *len * std::mem::size_of::<T>();
file.set_len(byte_len as u64)?;
let map_len = byte_len.max(1);
*mmap = unsafe {
MmapOptions::new()
.len(map_len)
.map_mut(&*file)
.unwrap_or_else(|e| {
panic!("trim remap failed for {}: {}", path.display(), e)
})
};
*capacity = *len;
Ok(())
}
}
}
pub fn save_to_file(&self, path: &Path) -> io::Result<()> {
match self {
MmapOrVec::Heap { data } => {
let bytes = unsafe {
std::slice::from_raw_parts(
data.as_ptr() as *const u8,
data.len() * std::mem::size_of::<T>(),
)
};
std::fs::write(path, bytes)
}
MmapOrVec::Mapped {
mmap, len, file, ..
} => {
mmap.flush()?;
let byte_len = *len * std::mem::size_of::<T>();
let src_path = self.file_path();
if let Some(sp) = src_path {
if sp == path {
file.set_len(byte_len as u64)?;
return Ok(());
}
}
std::fs::write(path, &mmap[..byte_len])
}
}
}
pub fn file_path(&self) -> Option<&Path> {
match self {
MmapOrVec::Heap { .. } => None,
MmapOrVec::Mapped { path, .. } => Some(path.as_path()),
}
}
pub fn to_vec(&self) -> Vec<T> {
match self {
MmapOrVec::Heap { data } => data.clone(),
MmapOrVec::Mapped { mmap, len, .. } => {
unsafe { std::slice::from_raw_parts(mmap.as_ptr() as *const T, *len) }.to_vec()
}
}
}
}
impl<T: Copy + Default + 'static> Clone for MmapOrVec<T> {
fn clone(&self) -> Self {
MmapOrVec::Heap {
data: self.to_vec(),
}
}
}
impl<T: Copy + Default + 'static> Default for MmapOrVec<T> {
fn default() -> Self {
MmapOrVec::new()
}
}
#[derive(Debug)]
pub enum MmapBytes {
Heap {
data: Vec<u8>,
},
Mapped {
mmap: MmapMut,
len: usize,
capacity: usize,
file: File,
path: PathBuf,
},
}
impl MmapBytes {
pub fn new() -> Self {
MmapBytes::Heap { data: Vec::new() }
}
pub fn mapped(path: &Path, initial_cap: usize) -> io::Result<Self> {
let cap = initial_cap.max(4096); let file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.open(path)?;
file.set_len(cap as u64)?;
let mmap = unsafe { MmapOptions::new().len(cap).map_mut(&file)? };
Ok(MmapBytes::Mapped {
mmap,
len: 0,
capacity: cap,
file,
path: path.to_path_buf(),
})
}
pub fn load_mapped(path: &Path, len: usize) -> io::Result<Self> {
let file = OpenOptions::new().read(true).write(true).open(path)?;
let capacity = file.metadata()?.len() as usize;
if capacity < len {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"File too small for byte buffer",
));
}
let mmap = unsafe { MmapOptions::new().len(capacity).map_mut(&file)? };
Ok(MmapBytes::Mapped {
mmap,
len,
capacity,
file,
path: path.to_path_buf(),
})
}
pub fn len(&self) -> usize {
match self {
MmapBytes::Heap { data } => data.len(),
MmapBytes::Mapped { len, .. } => *len,
}
}
pub fn extend(&mut self, bytes: &[u8]) -> usize {
let start = self.len();
match self {
MmapBytes::Heap { data } => data.extend_from_slice(bytes),
MmapBytes::Mapped {
mmap,
len,
capacity,
file,
path,
} => {
let needed = *len + bytes.len();
if needed > *capacity {
let new_cap = (needed * 2).max(*capacity * 2);
file.set_len(new_cap as u64).expect("ftruncate failed");
*mmap = unsafe {
MmapOptions::new()
.len(new_cap)
.map_mut(&*file)
.unwrap_or_else(|e| {
panic!("mmap remap failed for {}: {}", path.display(), e)
})
};
*capacity = new_cap;
}
mmap[*len..*len + bytes.len()].copy_from_slice(bytes);
*len += bytes.len();
}
}
start
}
pub fn slice(&self, start: usize, end: usize) -> &[u8] {
match self {
MmapBytes::Heap { data } => &data[start..end],
MmapBytes::Mapped { mmap, .. } => &mmap[start..end],
}
}
#[allow(dead_code)]
#[cfg(target_os = "linux")]
pub fn fadvise_dontneed(&self) {
if let MmapBytes::Mapped { file, len, .. } = self {
if *len > 0 {
use std::os::unix::io::AsRawFd;
unsafe {
let _ = libc::posix_fadvise(
file.as_raw_fd(),
0,
*len as libc::off_t,
libc::POSIX_FADV_DONTNEED,
);
}
}
}
}
#[allow(dead_code)]
#[cfg(target_os = "macos")]
pub fn fadvise_dontneed(&self) {
if let MmapBytes::Mapped { file, .. } = self {
use std::os::unix::io::AsRawFd;
unsafe {
let _ = libc::fcntl(file.as_raw_fd(), libc::F_NOCACHE, 1);
}
}
}
#[allow(dead_code)]
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
pub fn fadvise_dontneed(&self) {}
#[allow(dead_code)]
#[cfg(unix)]
pub fn flush_and_release_pages(&self) -> io::Result<()> {
if let MmapBytes::Mapped { mmap, len, .. } = self {
if *len > 0 {
mmap.flush()?;
unsafe {
let _ = mmap.unchecked_advise(memmap2::UncheckedAdvice::DontNeed);
}
}
}
Ok(())
}
#[allow(dead_code)]
#[cfg(not(unix))]
pub fn flush_and_release_pages(&self) -> io::Result<()> {
Ok(())
}
pub fn materialize_to_file(&mut self, path: &Path) -> io::Result<()> {
if matches!(self, MmapBytes::Mapped { .. }) {
return Ok(());
}
let MmapBytes::Heap { data } = self else {
unreachable!()
};
let len = data.len();
let cap = len.max(4096);
let file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.open(path)?;
file.set_len(cap as u64)?;
let mut mmap = unsafe { MmapOptions::new().len(cap).map_mut(&file)? };
mmap[..len].copy_from_slice(data);
mmap.flush_async()?;
*self = MmapBytes::Mapped {
mmap,
len,
capacity: cap,
file,
path: path.to_path_buf(),
};
Ok(())
}
#[allow(dead_code)] pub fn materialize_to_heap(&mut self) {
if matches!(self, MmapBytes::Heap { .. }) {
return;
}
let len = self.len();
let data = match self {
MmapBytes::Mapped { mmap, .. } => mmap[..len].to_vec(),
_ => unreachable!(),
};
*self = MmapBytes::Heap { data };
}
pub fn is_mapped(&self) -> bool {
matches!(self, MmapBytes::Mapped { .. })
}
pub fn heap_bytes(&self) -> usize {
match self {
MmapBytes::Heap { data } => data.len(),
MmapBytes::Mapped { .. } => 0,
}
}
pub fn as_raw_bytes(&self) -> &[u8] {
match self {
MmapBytes::Heap { data } => data,
MmapBytes::Mapped { mmap, len, .. } => &mmap[..*len],
}
}
pub fn write_to(&self, writer: &mut impl Write) -> io::Result<()> {
writer.write_all(self.as_raw_bytes())
}
#[allow(dead_code)] pub fn save_to_file(&self, path: &Path) -> io::Result<()> {
match self {
MmapBytes::Heap { data } => std::fs::write(path, data),
MmapBytes::Mapped { mmap, len, .. } => std::fs::write(path, &mmap[..*len]),
}
}
pub fn to_vec(&self) -> Vec<u8> {
match self {
MmapBytes::Heap { data } => data.clone(),
MmapBytes::Mapped { mmap, len, .. } => mmap[..*len].to_vec(),
}
}
}
impl Clone for MmapBytes {
fn clone(&self) -> Self {
MmapBytes::Heap {
data: self.to_vec(),
}
}
}
impl Default for MmapBytes {
fn default() -> Self {
MmapBytes::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
use tempfile::TempDir;
fn tmp_path(dir: &TempDir, name: &str) -> PathBuf {
dir.path().join(name)
}
#[test]
fn test_heap_basic() {
let mut buf: MmapOrVec<i64> = MmapOrVec::new();
buf.push(10);
buf.push(20);
buf.push(30);
assert_eq!(buf.len(), 3);
assert_eq!(buf.get(0), 10);
assert_eq!(buf.get(1), 20);
assert_eq!(buf.get(2), 30);
buf.set(1, 99);
assert_eq!(buf.get(1), 99);
assert!(!buf.is_mapped());
}
#[test]
fn test_mapped_basic() {
let dir = TempDir::new().unwrap();
let path = tmp_path(&dir, "col.bin");
let mut buf: MmapOrVec<i64> = MmapOrVec::mapped(&path, 4).unwrap();
assert!(buf.is_mapped());
assert_eq!(buf.len(), 0);
buf.push(100);
buf.push(200);
assert_eq!(buf.len(), 2);
assert_eq!(buf.get(0), 100);
assert_eq!(buf.get(1), 200);
buf.set(0, 999);
assert_eq!(buf.get(0), 999);
}
#[test]
fn test_mapped_grow() {
let dir = TempDir::new().unwrap();
let path = tmp_path(&dir, "grow.bin");
let mut buf: MmapOrVec<u32> = MmapOrVec::mapped(&path, 2).unwrap();
for i in 0..200 {
buf.push(i);
}
assert_eq!(buf.len(), 200);
for i in 0..200u32 {
assert_eq!(buf.get(i as usize), i);
}
}
#[test]
fn test_heap_to_mapped() {
let dir = TempDir::new().unwrap();
let path = tmp_path(&dir, "convert.bin");
let mut buf: MmapOrVec<f64> = MmapOrVec::new();
buf.push(1.5);
buf.push(2.7);
buf.push(3.9);
assert!(!buf.is_mapped());
buf.materialize_to_file(&path).unwrap();
assert!(buf.is_mapped());
assert_eq!(buf.len(), 3);
assert_eq!(buf.get(0), 1.5);
assert_eq!(buf.get(1), 2.7);
assert_eq!(buf.get(2), 3.9);
}
#[test]
fn test_mapped_to_heap() {
let dir = TempDir::new().unwrap();
let path = tmp_path(&dir, "to_heap.bin");
let mut buf: MmapOrVec<i32> = MmapOrVec::mapped(&path, 4).unwrap();
buf.push(10);
buf.push(20);
buf.materialize_to_heap();
assert!(!buf.is_mapped());
assert_eq!(buf.len(), 2);
assert_eq!(buf.get(0), 10);
assert_eq!(buf.get(1), 20);
}
#[test]
fn test_clone_always_heap() {
let dir = TempDir::new().unwrap();
let path = tmp_path(&dir, "clone.bin");
let mut buf: MmapOrVec<i64> = MmapOrVec::mapped(&path, 4).unwrap();
buf.push(42);
let cloned = buf.clone();
assert!(!cloned.is_mapped());
assert_eq!(cloned.get(0), 42);
}
#[test]
fn test_save_load_mapped() {
let dir = TempDir::new().unwrap();
let path = tmp_path(&dir, "save.bin");
let mut buf: MmapOrVec<i64> = MmapOrVec::new();
buf.push(1);
buf.push(2);
buf.push(3);
buf.save_to_file(&path).unwrap();
let loaded: MmapOrVec<i64> = MmapOrVec::load_mapped(&path, 3).unwrap();
assert!(loaded.is_mapped());
assert_eq!(loaded.get(0), 1);
assert_eq!(loaded.get(1), 2);
assert_eq!(loaded.get(2), 3);
}
#[test]
fn test_bytes_heap_basic() {
let mut buf = MmapBytes::new();
let off0 = buf.extend(b"hello");
let off1 = buf.extend(b"world");
assert_eq!(off0, 0);
assert_eq!(off1, 5);
assert_eq!(buf.slice(0, 5), b"hello");
assert_eq!(buf.slice(5, 10), b"world");
assert_eq!(buf.len(), 10);
}
#[test]
fn test_bytes_mapped() {
let dir = TempDir::new().unwrap();
let path = tmp_path(&dir, "bytes.bin");
let mut buf = MmapBytes::mapped(&path, 16).unwrap();
assert!(buf.is_mapped());
let off0 = buf.extend(b"hello");
let off1 = buf.extend(b"world");
assert_eq!(off0, 0);
assert_eq!(off1, 5);
assert_eq!(buf.slice(0, 5), b"hello");
assert_eq!(buf.slice(5, 10), b"world");
}
#[test]
fn test_bytes_mapped_grow() {
let dir = TempDir::new().unwrap();
let path = tmp_path(&dir, "bytes_grow.bin");
let mut buf = MmapBytes::mapped(&path, 16).unwrap();
let big = vec![b'x'; 5000];
buf.extend(&big);
assert_eq!(buf.len(), 5000);
assert_eq!(buf.slice(0, 3), b"xxx");
}
#[test]
fn test_bytes_save_load() {
let dir = TempDir::new().unwrap();
let path = tmp_path(&dir, "bytes_save.bin");
let mut buf = MmapBytes::new();
buf.extend(b"test data here");
buf.save_to_file(&path).unwrap();
let loaded = MmapBytes::load_mapped(&path, 14).unwrap();
assert_eq!(loaded.slice(0, 14), b"test data here");
}
#[test]
fn test_bytes_clone_always_heap() {
let dir = TempDir::new().unwrap();
let path = tmp_path(&dir, "bytes_clone.bin");
let mut buf = MmapBytes::mapped(&path, 16).unwrap();
buf.extend(b"data");
let cloned = buf.clone();
assert!(!cloned.is_mapped());
assert_eq!(cloned.slice(0, 4), b"data");
}
}