use std::os::fd::AsRawFd;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use crate::engine::EngineId;
use crate::error::{MemError, Result};
#[derive(Debug, Clone)]
pub struct OverflowSlot {
pub offset: usize,
pub size: usize,
pub engine: EngineId,
pub occupied: bool,
}
pub struct OverflowRegion {
path: PathBuf,
_fd: Arc<std::fs::File>,
base: *mut u8,
capacity: usize,
cursor: usize,
slots: Vec<OverflowSlot>,
free_list: Vec<usize>,
max_capacity: usize,
}
impl OverflowRegion {
pub const DEFAULT_INITIAL_CAPACITY: usize = 64 * 1024 * 1024;
pub const DEFAULT_MAX_CAPACITY: usize = 1024 * 1024 * 1024;
pub fn open(path: &Path) -> Result<Self> {
Self::open_with_capacity(path, Self::DEFAULT_INITIAL_CAPACITY)
}
pub fn open_with_config(
path: &Path,
initial_capacity: usize,
max_capacity: usize,
) -> Result<Self> {
let mut region = Self::open_with_capacity(path, initial_capacity)?;
region.max_capacity = max_capacity;
Ok(region)
}
pub fn open_with_capacity(path: &Path, initial_capacity: usize) -> Result<Self> {
let fd = std::fs::OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(false)
.open(path)
.map_err(|e| MemError::Overflow(format!("failed to open overflow file: {e}")))?;
let current_size = fd
.metadata()
.map_err(|e| MemError::Overflow(format!("failed to get file metadata: {e}")))?
.len() as usize;
let capacity = if current_size == 0 {
fd.set_len(initial_capacity as u64)
.map_err(|e| MemError::Overflow(format!("failed to truncate file: {e}")))?;
initial_capacity
} else {
current_size
};
let base = unsafe {
libc::mmap(
std::ptr::null_mut(),
capacity,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_SHARED,
fd.as_raw_fd(),
0,
)
};
if base == libc::MAP_FAILED {
return Err(MemError::Overflow(
"failed to mmap overflow region".to_string(),
));
}
Ok(Self {
path: path.to_path_buf(),
_fd: Arc::new(fd),
base: base as *mut u8,
capacity,
cursor: 0,
slots: Vec::new(),
free_list: Vec::new(),
max_capacity: Self::DEFAULT_MAX_CAPACITY,
})
}
pub fn write(&mut self, data: &[u8], engine: EngineId) -> Result<usize> {
if let Some(reused) = self.try_reuse_slot(data, engine) {
return Ok(reused);
}
let required = self.cursor + data.len();
if required > self.capacity {
self.grow(required)?;
}
unsafe {
std::ptr::copy_nonoverlapping(data.as_ptr(), self.base.add(self.cursor), data.len());
}
let slot_index = self.slots.len();
self.slots.push(OverflowSlot {
offset: self.cursor,
size: data.len(),
engine,
occupied: true,
});
self.cursor += data.len();
Ok(slot_index)
}
fn try_reuse_slot(&mut self, data: &[u8], engine: EngineId) -> Option<usize> {
if self.free_list.is_empty() {
return None;
}
let mut best_idx = None;
let mut best_waste = usize::MAX;
for (fl_idx, &slot_idx) in self.free_list.iter().enumerate() {
let slot_size = self.slots[slot_idx].size;
if slot_size >= data.len() {
let waste = slot_size - data.len();
if waste < best_waste {
best_waste = waste;
best_idx = Some(fl_idx);
}
}
}
let fl_idx = best_idx?;
let slot_index = self.free_list.swap_remove(fl_idx);
let slot = &mut self.slots[slot_index];
unsafe {
std::ptr::copy_nonoverlapping(data.as_ptr(), self.base.add(slot.offset), data.len());
}
slot.occupied = true;
slot.engine = engine;
Some(slot_index)
}
pub fn read(&self, slot_index: usize) -> Result<&[u8]> {
let slot = self
.slots
.get(slot_index)
.ok_or_else(|| MemError::Overflow(format!("invalid slot index: {slot_index}")))?;
if !slot.occupied {
return Err(MemError::Overflow(format!(
"slot {slot_index} is not occupied"
)));
}
let slice = unsafe { std::slice::from_raw_parts(self.base.add(slot.offset), slot.size) };
Ok(slice)
}
pub fn free(&mut self, slot_index: usize) -> Result<()> {
let slot = self
.slots
.get_mut(slot_index)
.ok_or_else(|| MemError::Overflow(format!("invalid slot index: {slot_index}")))?;
if !slot.occupied {
return Err(MemError::Overflow(format!(
"slot {slot_index} is already freed"
)));
}
slot.occupied = false;
self.free_list.push(slot_index);
Ok(())
}
pub fn used_bytes(&self) -> usize {
self.cursor
}
pub fn capacity(&self) -> usize {
self.capacity
}
pub fn path(&self) -> &Path {
&self.path
}
pub fn slot_count(&self) -> usize {
self.slots.len()
}
fn grow(&mut self, required: usize) -> Result<()> {
let new_capacity = (self.capacity * 2).max(required);
if new_capacity > self.max_capacity {
return Err(MemError::Overflow(format!(
"overflow region would exceed max capacity: {} > {}",
new_capacity, self.max_capacity
)));
}
unsafe {
if libc::ftruncate(self._fd.as_raw_fd(), new_capacity as libc::off_t) != 0 {
return Err(MemError::Overflow(
"failed to truncate file for growth".to_string(),
));
}
}
let new_base = unsafe {
libc::mremap(
self.base as *mut libc::c_void,
self.capacity,
new_capacity,
libc::MREMAP_MAYMOVE,
)
};
if new_base == libc::MAP_FAILED {
return Err(MemError::Overflow(
"failed to remap overflow region".to_string(),
));
}
self.base = new_base as *mut u8;
self.capacity = new_capacity;
Ok(())
}
}
impl Drop for OverflowRegion {
fn drop(&mut self) {
unsafe {
if !self.base.is_null() {
let _ = libc::munmap(self.base as *mut libc::c_void, self.capacity);
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn create_and_write() {
let dir = tempfile::tempdir().expect("failed to create temp dir");
let path = dir.path().join("overflow.mmap");
let mut region = OverflowRegion::open(&path).expect("failed to open region");
assert_eq!(region.used_bytes(), 0);
assert!(region.capacity() > 0);
let data = b"hello, world!";
let slot_idx = region
.write(data, EngineId::Vector)
.expect("failed to write");
assert_eq!(region.used_bytes(), data.len());
assert_eq!(slot_idx, 0);
assert_eq!(region.slot_count(), 1);
}
#[test]
fn write_and_read_roundtrip() {
let dir = tempfile::tempdir().expect("failed to create temp dir");
let path = dir.path().join("overflow.mmap");
let mut region = OverflowRegion::open(&path).expect("failed to open region");
let data1 = b"first";
let data2 = b"second";
let slot1 = region
.write(data1, EngineId::Vector)
.expect("failed to write slot 1");
let slot2 = region
.write(data2, EngineId::Sparse)
.expect("failed to write slot 2");
assert_eq!(slot1, 0);
assert_eq!(slot2, 1);
let read1 = region.read(slot1).expect("failed to read slot 1");
let read2 = region.read(slot2).expect("failed to read slot 2");
assert_eq!(read1, data1);
assert_eq!(read2, data2);
}
#[test]
fn free_slot() {
let dir = tempfile::tempdir().expect("failed to create temp dir");
let path = dir.path().join("overflow.mmap");
let mut region = OverflowRegion::open(&path).expect("failed to open region");
let slot = region
.write(b"data", EngineId::Vector)
.expect("failed to write");
assert!(region.read(slot).is_ok());
region.free(slot).expect("failed to free slot");
assert!(region.read(slot).is_err());
}
#[test]
fn grow_region() {
let dir = tempfile::tempdir().expect("failed to create temp dir");
let path = dir.path().join("overflow.mmap");
let initial = 1024; let mut region =
OverflowRegion::open_with_capacity(&path, initial).expect("failed to open region");
assert_eq!(region.capacity(), initial);
let large_data = vec![0u8; initial * 2];
let slot = region
.write(&large_data, EngineId::Vector)
.expect("failed to write large data");
assert!(region.capacity() > initial);
let read_back = region.read(slot).expect("failed to read after growth");
assert_eq!(read_back.len(), large_data.len());
assert_eq!(read_back, &large_data[..]);
}
#[test]
fn invalid_slot_index() {
let dir = tempfile::tempdir().expect("failed to create temp dir");
let path = dir.path().join("overflow.mmap");
let mut region = OverflowRegion::open(&path).expect("failed to open region");
assert!(region.read(999).is_err());
assert!(region.free(999).is_err());
}
#[test]
fn free_list_reuse() {
let dir = tempfile::tempdir().expect("failed to create temp dir");
let path = dir.path().join("overflow.mmap");
let mut region = OverflowRegion::open(&path).expect("failed to open region");
let s0 = region.write(b"aaaa", EngineId::Vector).expect("write s0");
let s1 = region.write(b"bbbb", EngineId::Sparse).expect("write s1");
let _s2 = region.write(b"cccc", EngineId::Vector).expect("write s2");
let cursor_before = region.used_bytes();
region.free(s0).expect("free s0");
region.free(s1).expect("free s1");
let s3 = region.write(b"dd", EngineId::Sparse).expect("write s3");
assert_eq!(region.used_bytes(), cursor_before);
assert!(s3 == s0 || s3 == s1);
let data = region.read(s3).expect("read s3");
assert_eq!(&data[..2], b"dd");
}
#[test]
fn double_free_is_error() {
let dir = tempfile::tempdir().expect("failed to create temp dir");
let path = dir.path().join("overflow.mmap");
let mut region = OverflowRegion::open(&path).expect("failed to open region");
let slot = region.write(b"data", EngineId::Vector).expect("write");
region.free(slot).expect("first free");
assert!(region.free(slot).is_err());
}
#[test]
fn slot_metadata() {
let dir = tempfile::tempdir().expect("failed to create temp dir");
let path = dir.path().join("overflow.mmap");
let mut region = OverflowRegion::open(&path).expect("failed to open region");
let slot1 = region
.write(b"abc", EngineId::Vector)
.expect("failed to write");
let slot2 = region
.write(b"defgh", EngineId::Sparse)
.expect("failed to write");
let s1 = ®ion.slots[slot1];
let s2 = ®ion.slots[slot2];
assert_eq!(s1.size, 3);
assert_eq!(s1.engine, EngineId::Vector);
assert!(s1.occupied);
assert_eq!(s2.size, 5);
assert_eq!(s2.engine, EngineId::Sparse);
assert!(s2.occupied);
region.free(slot1).expect("failed to free");
assert!(!region.slots[slot1].occupied);
}
}