use crate::error::{Result, ZiporaError};
use crate::memory::mmap::{MemoryMappedAllocator, MmapAllocation};
use crate::memory::cache_layout::{CacheOptimizedAllocator, CacheLayoutConfig, align_to_cache_line, AccessPattern, PrefetchHint};
use crate::memory::cache::{get_optimal_numa_node, numa_alloc_aligned};
use crate::memory::simd_ops::{fast_fill, fast_copy_cache_optimized, fast_compare, fast_prefetch_range};
use crate::simd::{AdaptiveSimdSelector, Operation};
use bytemuck::Pod;
use std::fs::OpenOptions;
use std::io::{Seek, SeekFrom, Write};
use std::marker::PhantomData;
use std::path::{Path, PathBuf};
use std::ptr::NonNull;
use std::slice;
#[repr(C)]
#[derive(Debug, Clone, Copy)]
struct MmapVecHeader {
magic: u64,
version: u32,
element_size: u32,
length: u64,
capacity: u64,
reserved: [u64; 6],
}
const MMAP_VEC_MAGIC: u64 = 0x4D4D41505F564543; const MMAP_VEC_VERSION: u32 = 1;
const HEADER_SIZE: usize = std::mem::size_of::<MmapVecHeader>();
impl MmapVecHeader {
fn new<T>() -> Self {
Self {
magic: MMAP_VEC_MAGIC,
version: MMAP_VEC_VERSION,
element_size: std::mem::size_of::<T>() as u32,
length: 0,
capacity: 0,
reserved: [0; 6],
}
}
fn validate<T>(&self) -> Result<()> {
if self.magic != MMAP_VEC_MAGIC {
return Err(ZiporaError::invalid_data("Invalid magic number"));
}
if self.version != MMAP_VEC_VERSION {
return Err(ZiporaError::invalid_data("Unsupported version"));
}
if self.element_size != std::mem::size_of::<T>() as u32 {
return Err(ZiporaError::invalid_data("Element size mismatch"));
}
if self.length > self.capacity {
return Err(ZiporaError::invalid_data("Length exceeds capacity"));
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct MmapVecConfig {
pub initial_capacity: usize,
pub growth_factor: f64,
pub read_only: bool,
pub populate_pages: bool,
pub use_huge_pages: bool,
pub sync_on_write: bool,
pub enable_cache_alignment: bool,
pub cache_config: Option<CacheLayoutConfig>,
pub enable_numa_awareness: bool,
pub access_pattern: AccessPattern,
pub enable_prefetching: bool,
pub prefetch_distance: usize,
}
impl Default for MmapVecConfig {
fn default() -> Self {
Self {
initial_capacity: 1024,
growth_factor: 1.618, read_only: false,
populate_pages: false,
use_huge_pages: false,
sync_on_write: false,
enable_cache_alignment: true,
cache_config: Some(CacheLayoutConfig::new()),
enable_numa_awareness: true,
access_pattern: AccessPattern::Mixed,
enable_prefetching: true,
prefetch_distance: 64, }
}
}
impl MmapVecConfig {
pub fn builder() -> Self { Self::default() }
pub fn with_initial_capacity(mut self, v: usize) -> Self { self.initial_capacity = v; self }
pub fn with_growth_factor(mut self, v: f64) -> Self { self.growth_factor = v; self }
pub fn with_populate_pages(mut self, v: bool) -> Self { self.populate_pages = v; self }
pub fn with_sync_on_write(mut self, v: bool) -> Self { self.sync_on_write = v; self }
pub fn build(self) -> Self { self }
pub fn read_only() -> Self {
Self {
read_only: true,
populate_pages: true, ..Self::default()
}
}
pub fn large_dataset() -> Self {
Self {
initial_capacity: 1024 * 1024, growth_factor: 1.5, populate_pages: false, use_huge_pages: true,
..Self::default()
}
}
pub fn persistent_cache() -> Self {
Self {
initial_capacity: 16384,
growth_factor: 2.0, sync_on_write: true, ..Self::default()
}
}
pub fn performance_optimized() -> Self {
Self {
initial_capacity: 8192,
growth_factor: 1.618, populate_pages: true,
use_huge_pages: cfg!(target_os = "linux"),
sync_on_write: false,
..Self::default()
}
}
pub fn memory_optimized() -> Self {
Self {
initial_capacity: 256,
growth_factor: 1.4, populate_pages: false,
use_huge_pages: false,
sync_on_write: false,
..Self::default()
}
}
pub fn realtime() -> Self {
Self {
initial_capacity: 1024,
growth_factor: 1.5, populate_pages: true, use_huge_pages: true, sync_on_write: false, ..Self::default()
}
}
}
#[derive(Debug, Clone)]
pub struct MmapVecStats {
pub len: usize,
pub capacity: usize,
pub element_size: usize,
pub header_size: usize,
pub data_size: usize,
pub total_size: usize,
pub utilization: f64,
pub file_path: PathBuf,
pub read_only: bool,
pub growth_factor: f64,
}
impl MmapVecStats {
pub fn memory_efficiency(&self) -> f64 {
self.utilization * 100.0
}
pub fn wasted_space(&self) -> usize {
(self.capacity - self.len) * self.element_size
}
pub fn needs_compaction(&self, threshold: f64) -> bool {
self.utilization < threshold
}
}
pub struct MmapVec<T> {
file_path: PathBuf,
mmap: Option<MmapAllocation>,
config: MmapVecConfig,
header: Option<NonNull<MmapVecHeader>>,
data: Option<NonNull<T>>,
is_temp_file: bool,
_phantom: PhantomData<T>,
}
impl<T> MmapVec<T>
where
T: Pod + 'static, {
pub fn create<P: AsRef<Path>>(path: P, config: MmapVecConfig) -> Result<Self> {
let file_path = path.as_ref().to_path_buf();
let initial_file_size = Self::calculate_file_size(config.initial_capacity);
Self::create_backing_file(&file_path, initial_file_size)?;
let mmap = Self::create_mmap(&file_path, &config)?;
let mut vec = Self {
file_path,
mmap: Some(mmap),
config,
header: None,
data: None,
is_temp_file: false,
_phantom: PhantomData,
};
vec.update_pointers()?;
vec.initialize_header()?;
Ok(vec)
}
pub fn open<P: AsRef<Path>>(path: P, config: MmapVecConfig) -> Result<Self> {
let file_path = path.as_ref().to_path_buf();
if !file_path.exists() {
return Err(ZiporaError::invalid_data("File does not exist"));
}
let mmap = Self::create_mmap(&file_path, &config)?;
let mut vec = Self {
file_path,
mmap: Some(mmap),
config,
header: None,
data: None,
is_temp_file: false,
_phantom: PhantomData,
};
vec.update_pointers()?;
vec.validate_header()?;
Ok(vec)
}
#[inline]
pub fn len(&self) -> usize {
self.header()
.map(|h| unsafe { h.as_ref().length as usize })
.unwrap_or(0)
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn capacity(&self) -> usize {
self.header()
.map(|h| unsafe { h.as_ref().capacity as usize })
.unwrap_or(0)
}
pub fn push(&mut self, value: T) -> Result<()> {
if self.config.read_only {
return Err(ZiporaError::invalid_data("Vector is read-only"));
}
let current_len = self.len();
let current_capacity = self.capacity();
if current_len >= current_capacity {
self.grow()?;
}
unsafe {
let data_ptr = self.data_ptr()?.as_ptr();
std::ptr::write(data_ptr.add(current_len), value);
}
self.set_length(current_len + 1)?;
if self.config.sync_on_write {
self.sync()?;
}
Ok(())
}
pub fn pop(&mut self) -> Option<T> {
if self.config.read_only {
return None;
}
let current_len = self.len();
if current_len == 0 {
return None;
}
let value = unsafe {
let data_ptr = self.data_ptr().ok()?.as_ptr();
std::ptr::read(data_ptr.add(current_len - 1))
};
if self.set_length(current_len - 1).is_err() {
return None;
}
if self.config.sync_on_write {
let _ = self.sync();
}
Some(value)
}
#[inline]
pub fn get(&self, index: usize) -> Option<&T> {
if index >= self.len() {
return None;
}
unsafe {
let data_ptr = self.data_ptr().ok()?.as_ptr();
Some(&*data_ptr.add(index))
}
}
pub fn get_mut(&mut self, index: usize) -> Option<&mut T> {
if self.config.read_only || index >= self.len() {
return None;
}
unsafe {
let data_ptr = self.data_ptr().ok()?.as_ptr();
Some(&mut *data_ptr.add(index))
}
}
pub fn as_slice(&self) -> &[T] {
let len = self.len();
if len == 0 {
return &[];
}
unsafe {
let data_ptr = self.data_ptr().expect("mmap data pointer valid").as_ptr();
slice::from_raw_parts(data_ptr, len)
}
}
pub fn as_mut_slice(&mut self) -> &mut [T] {
if self.config.read_only {
return &mut [];
}
let len = self.len();
if len == 0 {
return &mut [];
}
unsafe {
let data_ptr = self.data_ptr().expect("mmap data pointer valid").as_ptr();
slice::from_raw_parts_mut(data_ptr, len)
}
}
pub fn clear(&mut self) -> Result<()> {
if self.config.read_only {
return Err(ZiporaError::invalid_data("Vector is read-only"));
}
self.set_length(0)?;
if self.config.sync_on_write {
self.sync()?;
}
Ok(())
}
pub fn reserve(&mut self, additional: usize) -> Result<()> {
if self.config.read_only {
return Err(ZiporaError::invalid_data("Vector is read-only"));
}
let current_len = self.len();
let current_capacity = self.capacity();
let required_capacity = current_len + additional;
if required_capacity > current_capacity {
let new_capacity = required_capacity.max(
(current_capacity as f64 * self.config.growth_factor) as usize
);
self.resize_to_capacity(new_capacity)?;
}
Ok(())
}
pub fn sync(&self) -> Result<()> {
if let Some(mmap) = &self.mmap {
let file_size = self.file_size_from_header()?;
let content = unsafe {
std::slice::from_raw_parts(mmap.as_ptr(), file_size)
};
std::fs::write(&self.file_path, content)
.map_err(|e| ZiporaError::io_error(&format!("Failed to sync to file: {}", e)))?;
}
Ok(())
}
fn file_size_from_header(&self) -> Result<usize> {
let capacity = self.capacity();
Ok(HEADER_SIZE + capacity * std::mem::size_of::<T>())
}
pub fn path(&self) -> &Path {
&self.file_path
}
#[inline]
pub fn memory_usage(&self) -> usize {
self.capacity() * std::mem::size_of::<T>() + HEADER_SIZE
}
pub fn stats(&self) -> MmapVecStats {
let len = self.len();
let capacity = self.capacity();
let element_size = std::mem::size_of::<T>();
let header_size = HEADER_SIZE;
let data_size = capacity * element_size;
let total_size = header_size + data_size;
let utilization = if capacity > 0 {
len as f64 / capacity as f64
} else {
0.0
};
MmapVecStats {
len,
capacity,
element_size,
header_size,
data_size,
total_size,
utilization,
file_path: self.file_path.clone(),
read_only: self.config.read_only,
growth_factor: self.config.growth_factor,
}
}
pub fn shrink_to_fit(&mut self) -> Result<()> {
if self.config.read_only {
return Err(ZiporaError::invalid_data("Vector is read-only"));
}
let current_len = self.len();
let current_capacity = self.capacity();
if current_len < current_capacity {
let new_capacity = current_len.max(1); self.resize_to_capacity(new_capacity)?;
}
Ok(())
}
pub fn extend<I>(&mut self, iter: I) -> Result<()>
where
I: IntoIterator<Item = T>,
{
if self.config.read_only {
return Err(ZiporaError::invalid_data("Vector is read-only"));
}
let iter = iter.into_iter();
let (lower, _) = iter.size_hint();
if lower > 0 {
self.reserve(lower)?;
}
for item in iter {
self.push(item)?;
}
Ok(())
}
pub fn truncate(&mut self, len: usize) -> Result<()> {
if self.config.read_only {
return Err(ZiporaError::invalid_data("Vector is read-only"));
}
let current_len = self.len();
if len < current_len {
self.set_length(len)?;
if self.config.sync_on_write {
self.sync()?;
}
}
Ok(())
}
pub fn resize(&mut self, len: usize, value: T) -> Result<()> {
if self.config.read_only {
return Err(ZiporaError::invalid_data("Vector is read-only"));
}
let current_len = self.len();
if len > current_len {
let additional = len - current_len;
self.reserve(additional)?;
unsafe {
let data_ptr = self.data_ptr()?.as_ptr();
for i in current_len..len {
std::ptr::write(data_ptr.add(i), value);
}
}
self.set_length(len)?;
} else if len < current_len {
self.truncate(len)?;
}
if self.config.sync_on_write {
self.sync()?;
}
Ok(())
}
fn create_backing_file(path: &Path, size: u64) -> Result<()> {
let mut file = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(path)
.map_err(|e| ZiporaError::io_error(&format!("Failed to create file: {}", e)))?;
file.seek(SeekFrom::Start(size - 1))
.map_err(|e| ZiporaError::io_error(&format!("Failed to seek: {}", e)))?;
file.write_all(&[0])
.map_err(|e| ZiporaError::io_error(&format!("Failed to write: {}", e)))?;
file.sync_all()
.map_err(|e| ZiporaError::io_error(&format!("Failed to sync: {}", e)))?;
Ok(())
}
fn create_mmap(path: &Path, _config: &MmapVecConfig) -> Result<MmapAllocation> {
let file_size = std::fs::metadata(path)
.map_err(|e| ZiporaError::io_error(&format!("Failed to get file size: {}", e)))?
.len() as usize;
let min_mmap_size = 64 * 1024; let allocation_size = file_size.max(min_mmap_size);
let allocator = MemoryMappedAllocator::new(min_mmap_size);
let mut allocation = allocator.allocate(allocation_size)
.map_err(|e| ZiporaError::io_error(&format!("Failed to create mmap: {}", e)))?;
if file_size > 0 {
let file_content = std::fs::read(path)
.map_err(|e| ZiporaError::io_error(&format!("Failed to read file: {}", e)))?;
if file_content.len() <= allocation.size() {
unsafe {
std::ptr::copy_nonoverlapping(
file_content.as_ptr(),
allocation.as_mut_ptr(),
file_content.len()
);
}
}
}
Ok(allocation)
}
fn calculate_file_size(capacity: usize) -> u64 {
(HEADER_SIZE + capacity * std::mem::size_of::<T>()) as u64
}
fn update_pointers(&mut self) -> Result<()> {
let mmap = self.mmap.as_ref()
.ok_or_else(|| ZiporaError::invalid_data("No memory mapping"))?;
let base_ptr = mmap.as_ptr() as *mut u8;
self.header = Some(NonNull::new(base_ptr as *mut MmapVecHeader)
.ok_or_else(|| ZiporaError::invalid_data("Invalid header pointer"))?);
let data_ptr = unsafe { base_ptr.add(HEADER_SIZE) } as *mut T;
self.data = Some(NonNull::new(data_ptr)
.ok_or_else(|| ZiporaError::invalid_data("Invalid data pointer"))?);
Ok(())
}
fn initialize_header(&mut self) -> Result<()> {
let header_ptr = self.header()?;
let header = MmapVecHeader::new::<T>();
unsafe {
std::ptr::write(header_ptr.as_ptr(), header);
}
self.set_capacity(self.config.initial_capacity)?;
Ok(())
}
fn validate_header(&self) -> Result<()> {
let header_ptr = self.header()?;
let header = unsafe { header_ptr.as_ref() };
header.validate::<T>()
}
fn header(&self) -> Result<NonNull<MmapVecHeader>> {
self.header.ok_or_else(|| ZiporaError::invalid_data("Header not initialized"))
}
fn data_ptr(&self) -> Result<NonNull<T>> {
self.data.ok_or_else(|| ZiporaError::invalid_data("Data not initialized"))
}
fn set_length(&mut self, length: usize) -> Result<()> {
let header_ptr = self.header()?;
unsafe {
(*header_ptr.as_ptr()).length = length as u64;
}
Ok(())
}
fn set_capacity(&mut self, capacity: usize) -> Result<()> {
let header_ptr = self.header()?;
unsafe {
(*header_ptr.as_ptr()).capacity = capacity as u64;
}
Ok(())
}
fn grow(&mut self) -> Result<()> {
let current_capacity = self.capacity();
let new_capacity = std::cmp::max(
(current_capacity as f64 * self.config.growth_factor) as usize,
current_capacity + 1,
);
self.resize_to_capacity(new_capacity)
}
fn resize_to_capacity(&mut self, new_capacity: usize) -> Result<()> {
self.sync()?;
let new_file_size = Self::calculate_file_size(new_capacity);
let file = OpenOptions::new()
.write(true)
.open(&self.file_path)
.map_err(|e| ZiporaError::io_error(&format!("Failed to open file: {}", e)))?;
file.set_len(new_file_size)
.map_err(|e| ZiporaError::io_error(&format!("Failed to resize file: {}", e)))?;
drop(self.mmap.take()); self.mmap = Some(Self::create_mmap(&self.file_path, &self.config)?);
self.update_pointers()?;
self.set_capacity(new_capacity)?;
self.sync()?;
Ok(())
}
pub fn with_capacity_simd(capacity: usize) -> Result<Self>
where
T: Copy + 'static
{
use std::sync::atomic::{AtomicU64, Ordering};
static COUNTER: AtomicU64 = AtomicU64::new(0);
let temp_dir = std::env::temp_dir();
let thread_id = std::thread::current().id();
let counter = COUNTER.fetch_add(1, Ordering::Relaxed);
let file_path = temp_dir.join(format!(
"mmap_vec_simd_{}_{:?}_{}.dat",
std::process::id(),
thread_id,
counter
));
let config = MmapVecConfig::default();
let mut vec = Self::create(&file_path, config)?;
vec.is_temp_file = true;
if capacity > 0 && !std::mem::needs_drop::<T>() {
let size_bytes = capacity * std::mem::size_of::<T>();
if size_bytes >= 64 { let slice = unsafe {
std::slice::from_raw_parts_mut(
vec.data_ptr()?.as_ptr() as *mut u8,
size_bytes
)
};
fast_fill(slice, 0);
}
}
Ok(vec)
}
pub fn push_bulk_simd(&mut self, items: &[T]) -> Result<()>
where
T: Copy,
{
if self.config.read_only {
return Err(ZiporaError::invalid_data("Vector is read-only"));
}
if items.is_empty() {
return Ok(());
}
let start = std::time::Instant::now();
let new_len = self.len() + items.len();
if new_len > self.capacity() {
self.reserve(items.len())?;
}
let dst_ptr = unsafe { self.data_ptr()?.as_ptr().add(self.len()) };
let size_bytes = items.len() * std::mem::size_of::<T>();
if size_bytes >= 64 { let src_slice = unsafe {
std::slice::from_raw_parts(items.as_ptr() as *const u8, size_bytes)
};
let dst_slice = unsafe {
std::slice::from_raw_parts_mut(dst_ptr as *mut u8, size_bytes)
};
fast_copy_cache_optimized(src_slice, dst_slice)?;
} else {
unsafe {
std::ptr::copy_nonoverlapping(
items.as_ptr(),
dst_ptr,
items.len(),
);
}
}
self.set_length(new_len)?;
Self::monitor_simd_perf(Operation::Copy, start.elapsed(), items.len());
if self.config.sync_on_write {
self.sync()?;
}
Ok(())
}
pub fn pop_bulk_simd(&mut self, count: usize) -> Result<Vec<T>>
where
T: Copy,
{
if self.config.read_only {
return Err(ZiporaError::invalid_data("Vector is read-only"));
}
if count == 0 {
return Ok(Vec::new());
}
if count > self.len() {
return Err(ZiporaError::invalid_data("Count exceeds length"));
}
let start = std::time::Instant::now();
let new_len = self.len() - count;
let src_ptr = unsafe { self.data_ptr()?.as_ptr().add(new_len) };
let mut result = Vec::with_capacity(count);
let size_bytes = count * std::mem::size_of::<T>();
if size_bytes >= 64 {
let src_slice = unsafe {
std::slice::from_raw_parts(src_ptr as *const u8, size_bytes)
};
let dst_slice = unsafe {
std::slice::from_raw_parts_mut(
result.as_mut_ptr() as *mut u8,
size_bytes,
)
};
fast_copy_cache_optimized(src_slice, dst_slice)?;
} else {
unsafe {
std::ptr::copy_nonoverlapping(
src_ptr,
result.as_mut_ptr(),
count,
);
}
}
unsafe { result.set_len(count); }
self.set_length(new_len)?;
Self::monitor_simd_perf(Operation::Copy, start.elapsed(), count);
if self.config.sync_on_write {
self.sync()?;
}
Ok(result)
}
pub fn copy_from_simd(&mut self, other: &Self) -> Result<()>
where
T: Copy,
{
if self.config.read_only {
return Err(ZiporaError::invalid_data("Vector is read-only"));
}
if other.is_empty() {
self.clear()?;
return Ok(());
}
let start = std::time::Instant::now();
if self.capacity() < other.len() {
self.reserve(other.len() - self.len())?;
}
{
let size_bytes = other.len() * std::mem::size_of::<T>();
if size_bytes >= 4096 {
let src_slice = unsafe {
std::slice::from_raw_parts(other.data_ptr()?.as_ptr() as *const u8, size_bytes)
};
fast_prefetch_range(src_slice);
}
if size_bytes >= 64 {
let src_slice = unsafe {
std::slice::from_raw_parts(other.data_ptr()?.as_ptr() as *const u8, size_bytes)
};
let dst_slice = unsafe {
std::slice::from_raw_parts_mut(
self.data_ptr()?.as_ptr() as *mut u8,
size_bytes,
)
};
fast_copy_cache_optimized(src_slice, dst_slice)?;
} else {
unsafe {
std::ptr::copy_nonoverlapping(
other.data_ptr()?.as_ptr(),
self.data_ptr()?.as_ptr(),
other.len(),
);
}
}
self.set_length(other.len())?;
}
Self::monitor_simd_perf(Operation::Copy, start.elapsed(), other.len());
if self.config.sync_on_write {
self.sync()?;
}
Ok(())
}
pub fn fill_range_simd(&mut self, range: std::ops::Range<usize>, value: T) -> Result<()>
where
T: Copy,
{
if self.config.read_only {
return Err(ZiporaError::invalid_data("Vector is read-only"));
}
if range.end > self.len() {
return Err(ZiporaError::invalid_data("Range exceeds length"));
}
if range.is_empty() {
return Ok(());
}
let count = range.end - range.start;
let size_bytes = count * std::mem::size_of::<T>();
if std::mem::size_of::<T>() == 1 && size_bytes >= 64 {
let slice = unsafe {
let ptr = self.data_ptr()?.as_ptr().add(range.start);
std::slice::from_raw_parts_mut(ptr as *mut u8, size_bytes)
};
let byte_value = unsafe { *(&value as *const T as *const u8) };
fast_fill(slice, byte_value);
} else {
let slice = &mut self.as_mut_slice()[range];
slice.fill(value);
}
if self.config.sync_on_write {
self.sync()?;
}
Ok(())
}
pub fn compare_range_simd(
&self,
range: std::ops::Range<usize>,
other: &Self,
) -> Result<bool>
where
T: PartialEq + Copy,
{
if range.end > self.len() || range.len() > other.len() {
return Err(ZiporaError::invalid_data("Invalid range"));
}
if range.is_empty() {
return Ok(true);
}
let count = range.end - range.start;
let size_bytes = count * std::mem::size_of::<T>();
if size_bytes >= 64 {
let self_slice = unsafe {
let ptr = self.data_ptr()?.as_ptr().add(range.start);
std::slice::from_raw_parts(ptr as *const u8, size_bytes)
};
let other_slice = unsafe {
std::slice::from_raw_parts(other.data_ptr()?.as_ptr() as *const u8, size_bytes)
};
Ok(fast_compare(self_slice, other_slice) == 0)
} else {
Ok(&self.as_slice()[range] == &other.as_slice()[0..count])
}
}
#[inline]
fn monitor_simd_perf(operation: Operation, elapsed: std::time::Duration, size: usize) {
let selector = AdaptiveSimdSelector::global();
selector.monitor_performance(operation, elapsed, size as u64);
}
}
impl<T> Drop for MmapVec<T> {
fn drop(&mut self) {
drop(self.mmap.take());
if self.is_temp_file && self.file_path.exists() {
let _ = std::fs::remove_file(&self.file_path);
}
}
}
pub struct MmapVecIter<'a, T> {
slice: &'a [T],
index: usize,
}
impl<'a, T> Iterator for MmapVecIter<'a, T>
where
T: Pod,
{
type Item = &'a T;
fn next(&mut self) -> Option<Self::Item> {
if self.index < self.slice.len() {
let item = &self.slice[self.index];
self.index += 1;
Some(item)
} else {
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.slice.len() - self.index;
(remaining, Some(remaining))
}
}
impl<'a, T> ExactSizeIterator for MmapVecIter<'a, T>
where
T: Pod,
{
fn len(&self) -> usize {
self.slice.len() - self.index
}
}
impl<'a, T> IntoIterator for &'a MmapVec<T>
where
T: Pod + 'static,
{
type Item = &'a T;
type IntoIter = MmapVecIter<'a, T>;
fn into_iter(self) -> Self::IntoIter {
MmapVecIter {
slice: self.as_slice(),
index: 0,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_create_mmap_vec() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("test_vec.mmap");
let config = MmapVecConfig::default();
let vec = MmapVec::<u64>::create(&file_path, config).unwrap();
assert_eq!(vec.len(), 0);
assert_eq!(vec.capacity(), 1024);
assert!(file_path.exists());
}
#[test]
fn test_push_and_get() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("test_vec.mmap");
let config = MmapVecConfig::default();
let mut vec = MmapVec::<u32>::create(&file_path, config).unwrap();
vec.push(10).unwrap();
vec.push(20).unwrap();
vec.push(30).unwrap();
assert_eq!(vec.len(), 3);
assert_eq!(vec.get(0), Some(&10));
assert_eq!(vec.get(1), Some(&20));
assert_eq!(vec.get(2), Some(&30));
assert_eq!(vec.get(3), None);
}
#[test]
fn test_pop() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("test_vec.mmap");
let config = MmapVecConfig::default();
let mut vec = MmapVec::<i32>::create(&file_path, config).unwrap();
vec.push(100).unwrap();
vec.push(200).unwrap();
assert_eq!(vec.pop(), Some(200));
assert_eq!(vec.pop(), Some(100));
assert_eq!(vec.pop(), None);
assert_eq!(vec.len(), 0);
}
#[test]
fn test_persistence() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("test_vec.mmap");
{
let config = MmapVecConfig::persistent_cache();
let mut vec = MmapVec::<u64>::create(&file_path, config).unwrap();
for i in 0..100 {
vec.push(i * 2).unwrap();
}
vec.sync().unwrap();
}
{
let config = MmapVecConfig::default();
let vec = MmapVec::<u64>::open(&file_path, config).unwrap();
assert_eq!(vec.len(), 100);
assert_eq!(vec.get(0), Some(&0));
assert_eq!(vec.get(50), Some(&100));
assert_eq!(vec.get(99), Some(&198));
}
}
#[test]
fn test_growth() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("test_vec.mmap");
let config = MmapVecConfig {
initial_capacity: 10,
growth_factor: 2.0,
..MmapVecConfig::default()
};
let mut vec = MmapVec::<u8>::create(&file_path, config).unwrap();
assert_eq!(vec.capacity(), 10);
for i in 0..20 {
vec.push(i as u8).unwrap();
}
assert_eq!(vec.len(), 20);
assert!(vec.capacity() >= 20);
for i in 0..20 {
assert_eq!(vec.get(i), Some(&(i as u8)));
}
}
#[test]
fn test_slice_access() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("test_vec.mmap");
let config = MmapVecConfig::default();
let mut vec = MmapVec::<u16>::create(&file_path, config).unwrap();
for i in 0..10 {
vec.push(i * 3).unwrap();
}
let slice = vec.as_slice();
assert_eq!(slice.len(), 10);
assert_eq!(slice[5], 15);
let mut_slice = vec.as_mut_slice();
mut_slice[5] = 999;
assert_eq!(vec.get(5), Some(&999));
}
#[test]
fn test_iterator() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("test_vec.mmap");
let config = MmapVecConfig::default();
let mut vec = MmapVec::<usize>::create(&file_path, config).unwrap();
let values = vec![1, 4, 9, 16, 25];
for &value in &values {
vec.push(value).unwrap();
}
let collected: Vec<_> = vec.into_iter().copied().collect();
assert_eq!(collected, values);
}
#[test]
fn test_read_only() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("test_vec.mmap");
{
let config = MmapVecConfig::default();
let mut vec = MmapVec::<i64>::create(&file_path, config).unwrap();
vec.push(123).unwrap();
vec.push(456).unwrap();
vec.sync().unwrap();
}
{
let config = MmapVecConfig::read_only();
let mut vec = MmapVec::<i64>::open(&file_path, config).unwrap();
assert_eq!(vec.len(), 2);
assert_eq!(vec.get(0), Some(&123));
assert!(vec.push(789).is_err());
assert_eq!(vec.pop(), None);
assert!(vec.clear().is_err());
}
}
#[test]
fn test_reserve() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("test_vec.mmap");
let config = MmapVecConfig {
initial_capacity: 5,
..MmapVecConfig::default()
};
let mut vec = MmapVec::<u32>::create(&file_path, config).unwrap();
assert_eq!(vec.capacity(), 5);
vec.reserve(100).unwrap();
assert!(vec.capacity() >= 100);
let capacity_before = vec.capacity();
for i in 0..100 {
vec.push(i).unwrap();
}
assert_eq!(vec.capacity(), capacity_before);
}
#[test]
fn test_different_configs() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("large.mmap");
let config = MmapVecConfig::large_dataset();
let vec = MmapVec::<u64>::create(&file_path, config).unwrap();
assert_eq!(vec.capacity(), 1024 * 1024);
let file_path2 = temp_dir.path().join("cache.mmap");
let config = MmapVecConfig::persistent_cache();
let vec2 = MmapVec::<u32>::create(&file_path2, config).unwrap();
assert!(vec2.config.sync_on_write);
}
#[test]
fn test_builder_pattern() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("builder.mmap");
let config = MmapVecConfig::builder()
.with_initial_capacity(5000)
.with_growth_factor(1.5)
.with_populate_pages(true)
.with_sync_on_write(false)
.build();
let vec = MmapVec::<i32>::create(&file_path, config).unwrap();
assert_eq!(vec.capacity(), 5000);
assert!(!vec.config.sync_on_write);
assert_eq!(vec.config.growth_factor, 1.5);
}
#[test]
fn test_stats() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("stats.mmap");
let config = MmapVecConfig {
initial_capacity: 100,
..MmapVecConfig::default()
};
let mut vec = MmapVec::<u64>::create(&file_path, config).unwrap();
for i in 0..50 {
vec.push(i).unwrap();
}
let stats = vec.stats();
assert_eq!(stats.len, 50);
assert_eq!(stats.capacity, 100);
assert_eq!(stats.element_size, 8); assert_eq!(stats.utilization, 0.5); assert_eq!(stats.memory_efficiency(), 50.0);
assert_eq!(stats.wasted_space(), 50 * 8);
assert!(stats.needs_compaction(0.7));
assert!(!stats.needs_compaction(0.4));
}
#[test]
fn test_extend() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("extend.mmap");
let config = MmapVecConfig::default();
let mut vec = MmapVec::<u16>::create(&file_path, config).unwrap();
vec.extend(1..=10).unwrap();
assert_eq!(vec.len(), 10);
assert_eq!(vec.get(0), Some(&1));
assert_eq!(vec.get(9), Some(&10));
let more_data = vec![100, 200, 300];
vec.extend(more_data.into_iter()).unwrap();
assert_eq!(vec.len(), 13);
assert_eq!(vec.get(10), Some(&100));
assert_eq!(vec.get(12), Some(&300));
}
#[test]
fn test_truncate() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("truncate.mmap");
let config = MmapVecConfig::default();
let mut vec = MmapVec::<i16>::create(&file_path, config).unwrap();
for i in 0..20 {
vec.push(i).unwrap();
}
assert_eq!(vec.len(), 20);
vec.truncate(10).unwrap();
assert_eq!(vec.len(), 10);
assert_eq!(vec.get(9), Some(&9));
assert_eq!(vec.get(10), None);
vec.truncate(15).unwrap();
assert_eq!(vec.len(), 10);
}
#[test]
fn test_resize_with_value() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("resize_val.mmap");
let config = MmapVecConfig::default();
let mut vec = MmapVec::<i8>::create(&file_path, config).unwrap();
vec.push(1).unwrap();
vec.push(2).unwrap();
assert_eq!(vec.len(), 2);
vec.resize(5, 99).unwrap();
assert_eq!(vec.len(), 5);
assert_eq!(vec.get(0), Some(&1));
assert_eq!(vec.get(1), Some(&2));
assert_eq!(vec.get(2), Some(&99));
assert_eq!(vec.get(3), Some(&99));
assert_eq!(vec.get(4), Some(&99));
vec.resize(3, 0).unwrap();
assert_eq!(vec.len(), 3);
assert_eq!(vec.get(2), Some(&99));
assert_eq!(vec.get(3), None);
}
#[test]
fn test_shrink_to_fit() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("shrink.mmap");
let config = MmapVecConfig {
initial_capacity: 1000,
..MmapVecConfig::default()
};
let mut vec = MmapVec::<u32>::create(&file_path, config).unwrap();
assert_eq!(vec.capacity(), 1000);
for i in 0..10 {
vec.push(i).unwrap();
}
assert_eq!(vec.len(), 10);
assert_eq!(vec.capacity(), 1000);
vec.shrink_to_fit().unwrap();
assert_eq!(vec.len(), 10);
assert!(vec.capacity() <= 10);
for i in 0..10 {
assert_eq!(vec.get(i), Some(&(i as u32)));
}
}
#[test]
fn test_new_config_presets() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("perf.mmap");
let config = MmapVecConfig::performance_optimized();
let vec = MmapVec::<u64>::create(&file_path, config).unwrap();
assert_eq!(vec.capacity(), 8192);
assert_eq!(vec.config.growth_factor, 1.618);
assert!(vec.config.populate_pages);
let file_path2 = temp_dir.path().join("mem.mmap");
let config = MmapVecConfig::memory_optimized();
let vec2 = MmapVec::<u64>::create(&file_path2, config).unwrap();
assert_eq!(vec2.capacity(), 256);
assert_eq!(vec2.config.growth_factor, 1.4);
assert!(!vec2.config.populate_pages);
let file_path3 = temp_dir.path().join("rt.mmap");
let config = MmapVecConfig::realtime();
let vec3 = MmapVec::<u64>::create(&file_path3, config).unwrap();
assert_eq!(vec3.capacity(), 1024);
assert_eq!(vec3.config.growth_factor, 1.5);
assert!(vec3.config.populate_pages);
assert!(!vec3.config.sync_on_write);
}
#[test]
fn test_simd_bulk_initialization() {
let vec: MmapVec<u64> = MmapVec::with_capacity_simd(1000).unwrap();
assert!(vec.capacity() >= 1000);
assert_eq!(vec.len(), 0);
}
#[test]
fn test_simd_bulk_push() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("simd_push.mmap");
let config = MmapVecConfig::default();
let mut vec = MmapVec::<u32>::create(&file_path, config).unwrap();
let items: Vec<u32> = (0..1000).collect();
vec.push_bulk_simd(&items).unwrap();
assert_eq!(vec.len(), 1000);
assert_eq!(&vec.as_slice()[..], &items[..]);
}
#[test]
fn test_simd_bulk_pop() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("simd_pop.mmap");
let config = MmapVecConfig::default();
let mut vec = MmapVec::<u32>::create(&file_path, config).unwrap();
let items: Vec<u32> = (0..1000).collect();
vec.push_bulk_simd(&items).unwrap();
let popped = vec.pop_bulk_simd(500).unwrap();
assert_eq!(popped.len(), 500);
assert_eq!(vec.len(), 500);
assert_eq!(&popped[..], &items[500..1000]);
}
#[test]
fn test_simd_copy_from() {
let temp_dir = tempdir().unwrap();
let file_path1 = temp_dir.path().join("simd_src.mmap");
let file_path2 = temp_dir.path().join("simd_dst.mmap");
let config = MmapVecConfig::default();
let mut source = MmapVec::<u64>::create(&file_path1, config.clone()).unwrap();
let items: Vec<u64> = (0..1000).collect();
source.push_bulk_simd(&items).unwrap();
let mut dest = MmapVec::<u64>::create(&file_path2, config).unwrap();
dest.copy_from_simd(&source).unwrap();
assert_eq!(dest.len(), source.len());
assert_eq!(dest.as_slice(), source.as_slice());
}
#[test]
fn test_simd_fill_range() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("simd_fill.mmap");
let config = MmapVecConfig::default();
let mut vec = MmapVec::<u8>::create(&file_path, config).unwrap();
let zeros = vec![0u8; 1000];
vec.push_bulk_simd(&zeros).unwrap();
vec.fill_range_simd(100..500, 0xFF).unwrap();
assert!(vec.as_slice()[100..500].iter().all(|&b| b == 0xFF));
assert!(vec.as_slice()[0..100].iter().all(|&b| b == 0));
assert!(vec.as_slice()[500..1000].iter().all(|&b| b == 0));
}
#[test]
fn test_simd_compare_range() {
let temp_dir = tempdir().unwrap();
let file_path1 = temp_dir.path().join("simd_cmp1.mmap");
let file_path2 = temp_dir.path().join("simd_cmp2.mmap");
let file_path3 = temp_dir.path().join("simd_cmp3.mmap");
let config = MmapVecConfig::default();
let mut vec1 = MmapVec::<u32>::create(&file_path1, config.clone()).unwrap();
let items1: Vec<u32> = (0..1000).collect();
vec1.push_bulk_simd(&items1).unwrap();
let mut vec2 = MmapVec::<u32>::create(&file_path2, config.clone()).unwrap();
let items2: Vec<u32> = (0..1000).collect();
vec2.push_bulk_simd(&items2).unwrap();
let mut vec3 = MmapVec::<u32>::create(&file_path3, config).unwrap();
let items3: Vec<u32> = (1..1001).collect();
vec3.push_bulk_simd(&items3).unwrap();
assert!(vec1.compare_range_simd(0..1000, &vec2).unwrap());
assert!(!vec1.compare_range_simd(0..1000, &vec3).unwrap());
}
#[test]
fn test_simd_performance_different_sizes() {
let temp_dir = tempdir().unwrap();
for size in &[10, 64, 256, 1024, 4096] {
let file_path = temp_dir.path().join(format!("simd_perf_{}.mmap", size));
let config = MmapVecConfig::default();
let mut vec = MmapVec::<u64>::create(&file_path, config).unwrap();
let items: Vec<u64> = (0..*size as u64).collect();
vec.push_bulk_simd(&items).unwrap();
assert_eq!(vec.len(), *size);
let popped = vec.pop_bulk_simd(*size / 2).unwrap();
assert_eq!(popped.len(), *size / 2);
}
}
#[test]
fn test_simd_edge_cases() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("simd_edge.mmap");
let config = MmapVecConfig::default();
let mut vec = MmapVec::<u32>::create(&file_path, config).unwrap();
vec.push_bulk_simd(&[]).unwrap();
assert_eq!(vec.len(), 0);
vec.push_bulk_simd(&[42]).unwrap();
assert_eq!(vec.len(), 1);
assert_eq!(vec.as_slice()[0], 42);
assert!(vec.pop_bulk_simd(10).is_err());
}
#[test]
fn test_simd_small_operations() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("simd_small.mmap");
let config = MmapVecConfig::default();
let mut vec = MmapVec::<u8>::create(&file_path, config).unwrap();
let small_data = vec![1u8, 2, 3, 4, 5];
vec.push_bulk_simd(&small_data).unwrap();
assert_eq!(vec.as_slice(), &small_data[..]);
let popped = vec.pop_bulk_simd(2).unwrap();
assert_eq!(popped, vec![4, 5]);
assert_eq!(vec.len(), 3);
}
#[test]
fn test_simd_read_only_error() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("simd_ro.mmap");
{
let config = MmapVecConfig::default();
let mut vec = MmapVec::<u32>::create(&file_path, config).unwrap();
vec.push_bulk_simd(&[1, 2, 3]).unwrap();
vec.sync().unwrap();
}
{
let config = MmapVecConfig::read_only();
let mut vec = MmapVec::<u32>::open(&file_path, config).unwrap();
assert!(vec.push_bulk_simd(&[4, 5]).is_err());
assert!(vec.pop_bulk_simd(1).is_err());
assert!(vec.fill_range_simd(0..2, 99).is_err());
}
}
#[test]
fn test_simd_large_bulk_operations() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("simd_large.mmap");
let config = MmapVecConfig::default();
let mut vec = MmapVec::<u64>::create(&file_path, config).unwrap();
let large_data: Vec<u64> = (0..10000).collect();
vec.push_bulk_simd(&large_data).unwrap();
assert_eq!(vec.len(), 10000);
for (i, &val) in vec.as_slice().iter().enumerate() {
assert_eq!(val, i as u64);
}
let popped = vec.pop_bulk_simd(5000).unwrap();
assert_eq!(popped.len(), 5000);
assert_eq!(vec.len(), 5000);
}
#[test]
fn test_simd_fill_range_edge_cases() {
let temp_dir = tempdir().unwrap();
let file_path = temp_dir.path().join("simd_fill_edge.mmap");
let config = MmapVecConfig::default();
let mut vec = MmapVec::<u8>::create(&file_path, config).unwrap();
let data = vec![0u8; 100];
vec.push_bulk_simd(&data).unwrap();
vec.fill_range_simd(50..50, 0xFF).unwrap();
assert_eq!(vec.as_slice()[49], 0);
assert_eq!(vec.as_slice()[50], 0);
assert!(vec.fill_range_simd(50..200, 0xFF).is_err());
vec.fill_range_simd(0..100, 0xAA).unwrap();
assert!(vec.as_slice().iter().all(|&b| b == 0xAA));
}
#[test]
fn test_simd_compare_range_edge_cases() {
let temp_dir = tempdir().unwrap();
let file_path1 = temp_dir.path().join("simd_cmp_e1.mmap");
let file_path2 = temp_dir.path().join("simd_cmp_e2.mmap");
let config = MmapVecConfig::default();
let mut vec1 = MmapVec::<u32>::create(&file_path1, config.clone()).unwrap();
vec1.push_bulk_simd(&[1, 2, 3, 4, 5]).unwrap();
let mut vec2 = MmapVec::<u32>::create(&file_path2, config).unwrap();
vec2.push_bulk_simd(&[1, 2, 3]).unwrap();
assert!(vec1.compare_range_simd(0..0, &vec2).unwrap());
assert!(vec1.compare_range_simd(0..10, &vec2).is_err());
assert!(vec1.compare_range_simd(0..3, &vec2).unwrap());
assert!(!vec1.compare_range_simd(2..5, &vec2).unwrap());
}
#[test]
fn test_simd_copy_from_empty() {
let temp_dir = tempdir().unwrap();
let file_path1 = temp_dir.path().join("simd_copy_empty1.mmap");
let file_path2 = temp_dir.path().join("simd_copy_empty2.mmap");
let config = MmapVecConfig::default();
let source = MmapVec::<u64>::create(&file_path1, config.clone()).unwrap();
let mut dest = MmapVec::<u64>::create(&file_path2, config).unwrap();
dest.copy_from_simd(&source).unwrap();
assert_eq!(dest.len(), 0);
assert!(dest.is_empty());
}
#[test]
fn test_simd_copy_from_capacity_growth() {
let temp_dir = tempdir().unwrap();
let file_path1 = temp_dir.path().join("simd_copy_grow1.mmap");
let file_path2 = temp_dir.path().join("simd_copy_grow2.mmap");
let config = MmapVecConfig {
initial_capacity: 10,
..MmapVecConfig::default()
};
let mut source = MmapVec::<u64>::create(&file_path1, config.clone()).unwrap();
let large_data: Vec<u64> = (0..1000).collect();
source.push_bulk_simd(&large_data).unwrap();
let mut dest = MmapVec::<u64>::create(&file_path2, config).unwrap();
assert!(dest.capacity() < source.len());
dest.copy_from_simd(&source).unwrap();
assert_eq!(dest.len(), source.len());
assert_eq!(dest.as_slice(), source.as_slice());
}
}