use crate::error::{Result, ZiporaError};
use memmap2::Mmap;
use std::fs::{File, OpenOptions};
use std::io::Write;
use std::path::{Path, PathBuf};
pub struct ZReorderMap {
_file: File,
mmap: Mmap,
_path: PathBuf,
pos: usize,
current_value: usize,
seq_length: usize,
size: usize,
index: usize,
sign: i64,
}
impl ZReorderMap {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
let path_buf = path.as_ref().to_path_buf();
let file = OpenOptions::new()
.read(true)
.open(&path_buf)?;
let mmap = unsafe {
memmap2::MmapOptions::new()
.map(&file)
.map_err(|e| ZiporaError::Io(e))?
};
if mmap.len() < 16 {
return Err(ZiporaError::invalid_data(format!(
"ZReorderMap file too small: {} bytes, expected at least 16",
mmap.len()
)));
}
let mut map = Self {
_file: file,
mmap,
_path: path_buf,
pos: 0,
current_value: 0,
seq_length: 0,
size: 0,
index: 0,
sign: 0,
};
map.rewind()?;
Ok(map)
}
#[inline]
pub fn eof(&self) -> bool {
self.index >= self.size
}
#[inline]
pub fn size(&self) -> usize {
self.size
}
#[inline]
pub fn index(&self) -> usize {
debug_assert!(self.index < self.size, "index() called at EOF");
self.index
}
#[inline]
pub fn current(&self) -> usize {
debug_assert!(self.index < self.size, "current() called at EOF");
self.current_value
}
pub fn rewind(&mut self) -> Result<()> {
if self.mmap.len() < 16 {
return Err(ZiporaError::invalid_data(
"ZReorderMap file too small for header"
));
}
let mut header = [0u8; 8];
header.copy_from_slice(&self.mmap[0..8]);
self.size = u64::from_le_bytes(header) as usize;
const MAX_REASONABLE_SIZE: usize = usize::MAX / 100;
if self.size > MAX_REASONABLE_SIZE {
return Err(ZiporaError::invalid_data(format!(
"ZReorderMap size {} exceeds reasonable limit {}",
self.size, MAX_REASONABLE_SIZE
)));
}
header.copy_from_slice(&self.mmap[8..16]);
self.sign = i64::from_le_bytes(header);
if self.sign != 1 && self.sign != -1 {
return Err(ZiporaError::invalid_data(format!(
"Invalid sign value: {}, expected 1 or -1",
self.sign
)));
}
self.pos = 16;
self.index = 0;
self.seq_length = 0;
if self.size > 0 {
self.read_entry()?;
}
Ok(())
}
fn read_entry(&mut self) -> Result<()> {
if self.pos + 5 > self.mmap.len() {
return Err(ZiporaError::invalid_data(
"ZReorderMap: read value out of range"
));
}
let mut encoded = [0u8; 8];
encoded[..5].copy_from_slice(&self.mmap[self.pos..self.pos + 5]);
let encoded_value = u64::from_le_bytes(encoded) as usize;
self.pos += 5;
if encoded_value & 1 != 0 {
self.seq_length = 1;
self.current_value = encoded_value >> 1;
return Ok(());
}
self.current_value = encoded_value >> 1;
self.seq_length = self.read_var_uint()?;
if self.pos > self.mmap.len() {
return Err(ZiporaError::invalid_data(
"ZReorderMap: read seq out of range"
));
}
Ok(())
}
fn read_var_uint(&mut self) -> Result<usize> {
let mut result: usize = 0;
let mut shift = 0;
loop {
if self.pos >= self.mmap.len() {
return Err(ZiporaError::invalid_data(
"ZReorderMap: var_uint extends beyond file"
));
}
let byte = self.mmap[self.pos];
self.pos += 1;
if shift >= 64 {
return Err(ZiporaError::invalid_data(
"ZReorderMap: var_uint overflow"
));
}
result |= ((byte & 0x7F) as usize) << shift;
if byte & 0x80 == 0 {
break;
}
shift += 7;
}
Ok(result)
}
}
impl Iterator for ZReorderMap {
type Item = usize;
fn next(&mut self) -> Option<Self::Item> {
if self.eof() {
return None;
}
debug_assert!(self.seq_length > 0, "seq_length must be > 0");
let value = self.current_value;
self.index += 1;
if self.sign > 0 {
self.current_value = self.current_value.wrapping_add(self.sign as usize);
} else {
self.current_value = (self.current_value as i64 + self.sign) as usize;
}
self.seq_length -= 1;
if self.seq_length == 0 && !self.eof() {
if let Err(_) = self.read_entry() {
self.index = self.size;
return Some(value);
}
}
Some(value)
}
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.size.saturating_sub(self.index);
(remaining, Some(remaining))
}
}
impl ExactSizeIterator for ZReorderMap {
fn len(&self) -> usize {
self.size.saturating_sub(self.index)
}
}
pub struct ZReorderMapBuilder {
file: File,
base_value: usize,
seq_length: usize,
sign: i64,
remaining_size: usize,
buffer: Vec<u8>,
}
impl ZReorderMapBuilder {
pub fn new<P: AsRef<Path>>(path: P, size: usize, sign: i64) -> Result<Self> {
if sign != 1 && sign != -1 {
return Err(ZiporaError::invalid_data(format!(
"Invalid sign value: {}, expected 1 or -1",
sign
)));
}
let mut file = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(path)?;
file.write_all(&size.to_le_bytes())?;
file.write_all(&sign.to_le_bytes())?;
Ok(Self {
file,
base_value: usize::MAX, seq_length: 0,
sign,
remaining_size: size,
buffer: Vec::with_capacity(4096),
})
}
pub fn push(&mut self, value: usize) -> Result<()> {
if self.remaining_size == 0 {
return Err(ZiporaError::invalid_data(
"Cannot push more values: size limit reached"
));
}
if value > 0x7FFFFFFFFF {
return Err(ZiporaError::invalid_data(format!(
"Value {} exceeds 40-bit limit (0x7FFFFFFFFF)",
value
)));
}
let next_expected = if self.seq_length == 0 {
usize::MAX } else {
(self.base_value as i64 + self.seq_length as i64 * self.sign) as usize
};
if value != next_expected {
self.write_sequence()?;
self.base_value = value;
self.seq_length = 1;
} else {
self.seq_length += 1;
}
self.remaining_size -= 1;
Ok(())
}
pub fn finish(mut self) -> Result<()> {
if self.remaining_size != 0 {
return Err(ZiporaError::invalid_data(format!(
"finish() called with {} elements remaining",
self.remaining_size
)));
}
self.write_sequence()?;
self.file.write_all(&self.buffer)?;
self.buffer.clear();
self.file.flush()?;
self.file.sync_all()?;
Ok(())
}
fn write_sequence(&mut self) -> Result<()> {
if self.seq_length == 0 {
return Ok(());
}
if self.seq_length == 1 {
let encoded = (self.base_value << 1) | 1;
let bytes = encoded.to_le_bytes();
self.buffer.extend_from_slice(&bytes[..5]);
} else {
let encoded = self.base_value << 1;
let bytes = encoded.to_le_bytes();
self.buffer.extend_from_slice(&bytes[..5]);
self.write_var_uint(self.seq_length)?;
}
if self.buffer.len() >= 4096 {
self.file.write_all(&self.buffer)?;
self.buffer.clear();
}
Ok(())
}
fn write_var_uint(&mut self, mut value: usize) -> Result<()> {
loop {
let mut byte = (value & 0x7F) as u8;
value >>= 7;
if value != 0 {
byte |= 0x80; }
self.buffer.push(byte);
if value == 0 {
break;
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::NamedTempFile;
#[test]
fn test_ascending_sequence() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
{
let mut builder = ZReorderMapBuilder::new(path, 100, 1)?;
for i in 0..100 {
builder.push(i)?;
}
builder.finish()?;
}
let map = ZReorderMap::open(path)?;
assert_eq!(map.size(), 100);
let values: Vec<usize> = map.collect();
let expected: Vec<usize> = (0..100).collect();
assert_eq!(values, expected);
Ok(())
}
#[test]
fn test_descending_sequence() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
{
let mut builder = ZReorderMapBuilder::new(path, 100, -1)?;
for i in (0..100).rev() {
builder.push(i)?;
}
builder.finish()?;
}
let map = ZReorderMap::open(path)?;
assert_eq!(map.size(), 100);
let values: Vec<usize> = map.collect();
let expected: Vec<usize> = (0..100).rev().collect();
assert_eq!(values, expected);
Ok(())
}
#[test]
fn test_mixed_sequences() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
let input = vec![100, 101, 102, 200, 300, 301];
{
let mut builder = ZReorderMapBuilder::new(path, input.len(), 1)?;
for &val in &input {
builder.push(val)?;
}
builder.finish()?;
}
let map = ZReorderMap::open(path)?;
assert_eq!(map.size(), input.len());
let values: Vec<usize> = map.collect();
assert_eq!(values, input);
Ok(())
}
#[test]
fn test_single_values() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
let input = vec![5, 10, 15, 20, 25];
{
let mut builder = ZReorderMapBuilder::new(path, input.len(), 1)?;
for &val in &input {
builder.push(val)?;
}
builder.finish()?;
}
let map = ZReorderMap::open(path)?;
let values: Vec<usize> = map.collect();
assert_eq!(values, input);
Ok(())
}
#[test]
fn test_large_dataset() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
let size = 1_000_000;
{
let mut builder = ZReorderMapBuilder::new(path, size, 1)?;
for i in 0..size {
builder.push(i)?;
}
builder.finish()?;
}
let map = ZReorderMap::open(path)?;
assert_eq!(map.size(), size);
let values: Vec<usize> = map.take(5).collect();
assert_eq!(values, vec![0, 1, 2, 3, 4]);
Ok(())
}
#[test]
fn test_rewind() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
let input = vec![10, 11, 12, 20, 21];
{
let mut builder = ZReorderMapBuilder::new(path, input.len(), 1)?;
for &val in &input {
builder.push(val)?;
}
builder.finish()?;
}
let mut map = ZReorderMap::open(path)?;
for _ in 0..3 {
let values: Vec<usize> = map.by_ref().collect();
assert_eq!(values, input);
assert!(map.eof());
map.rewind()?;
assert!(!map.eof());
}
Ok(())
}
#[test]
fn test_bounds_checking() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
{
let mut builder = ZReorderMapBuilder::new(path, 1, 1)?;
let result = builder.push(0x8000000000); assert!(result.is_err());
}
{
let mut builder = ZReorderMapBuilder::new(path, 2, 1)?;
builder.push(1)?;
builder.push(2)?;
let result = builder.push(3);
assert!(result.is_err());
}
{
let builder = ZReorderMapBuilder::new(path, 5, 1)?;
let result = builder.finish();
assert!(result.is_err());
}
Ok(())
}
#[test]
fn test_invalid_sign() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
let result = ZReorderMapBuilder::new(path, 10, 0);
assert!(result.is_err());
let result = ZReorderMapBuilder::new(path, 10, 2);
assert!(result.is_err());
Ok(())
}
#[test]
fn test_empty_map() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
{
let builder = ZReorderMapBuilder::new(path, 0, 1)?;
builder.finish()?;
}
let map = ZReorderMap::open(path)?;
assert_eq!(map.size(), 0);
assert!(map.eof());
let values: Vec<usize> = map.collect();
assert!(values.is_empty());
Ok(())
}
#[test]
fn test_iterator_traits() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
let input = vec![0, 1, 2, 3, 4];
{
let mut builder = ZReorderMapBuilder::new(path, input.len(), 1)?;
for &val in &input {
builder.push(val)?;
}
builder.finish()?;
}
let map = ZReorderMap::open(path)?;
let (lower, upper) = map.size_hint();
assert_eq!(lower, input.len());
assert_eq!(upper, Some(input.len()));
assert_eq!(map.len(), input.len());
Ok(())
}
#[test]
fn test_current_and_index() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
let input = vec![100, 101, 102];
{
let mut builder = ZReorderMapBuilder::new(path, input.len(), 1)?;
for &val in &input {
builder.push(val)?;
}
builder.finish()?;
}
let mut map = ZReorderMap::open(path)?;
assert_eq!(map.current(), 100);
assert_eq!(map.index(), 0);
map.next();
assert_eq!(map.current(), 101);
assert_eq!(map.index(), 1);
map.next();
assert_eq!(map.current(), 102);
assert_eq!(map.index(), 2);
Ok(())
}
#[test]
fn test_var_uint_encoding() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
let mut input = vec![0];
for i in 1..1000 {
input.push(i);
}
{
let mut builder = ZReorderMapBuilder::new(path, input.len(), 1)?;
for &val in &input {
builder.push(val)?;
}
builder.finish()?;
}
let map = ZReorderMap::open(path)?;
let values: Vec<usize> = map.collect();
assert_eq!(values, input);
Ok(())
}
#[test]
fn test_compression_efficiency() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
let size = 10000;
{
let mut builder = ZReorderMapBuilder::new(path, size, 1)?;
for i in 0..size {
builder.push(i)?;
}
builder.finish()?;
}
let metadata = std::fs::metadata(path)?;
let file_size = metadata.len();
assert!(file_size < 100, "Expected < 100 bytes, got {}", file_size);
let map = ZReorderMap::open(path)?;
assert_eq!(map.size(), size);
Ok(())
}
#[test]
fn test_alternating_sequences() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
let input = vec![
0, 1, 2, 10, 11, 20, 21, 22, 23, 100, 200, 201, ];
{
let mut builder = ZReorderMapBuilder::new(path, input.len(), 1)?;
for &val in &input {
builder.push(val)?;
}
builder.finish()?;
}
let map = ZReorderMap::open(path)?;
let values: Vec<usize> = map.collect();
assert_eq!(values, input);
Ok(())
}
#[test]
fn test_maximum_value() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let path = temp_file.path();
let max_val = 0x7FFFFFFFFF;
{
let mut builder = ZReorderMapBuilder::new(path, 1, 1)?;
builder.push(max_val)?;
builder.finish()?;
}
let map = ZReorderMap::open(path)?;
let values: Vec<usize> = map.collect();
assert_eq!(values, vec![max_val]);
Ok(())
}
}