use crate::elias_fano::EliasFano;
use crate::error::{Error, Result};
#[derive(Debug, Clone)]
pub struct PartitionedEliasFano {
universe_size: u32,
block_size: usize,
n: usize,
bases: Vec<u32>,
blocks: Vec<EliasFano>,
}
impl PartitionedEliasFano {
#[must_use]
pub fn new(values: &[u32], universe_size: u32, block_size: usize) -> Self {
let n = values.len();
let block_size = block_size.max(1);
if n == 0 {
return Self {
universe_size,
block_size,
n: 0,
bases: Vec::new(),
blocks: Vec::new(),
};
}
let mut bases = Vec::new();
let mut blocks = Vec::new();
let mut i = 0usize;
while i < n {
let j = (i + block_size).min(n);
let base = values[i];
let last = values[j - 1];
let local_u = (last - base).saturating_add(1);
let rel: Vec<u32> = values[i..j].iter().map(|&v| v - base).collect();
bases.push(base);
blocks.push(EliasFano::new(&rel, local_u));
i = j;
}
Self {
universe_size,
block_size,
n,
bases,
blocks,
}
}
#[must_use]
pub fn universe_size(&self) -> u32 {
self.universe_size
}
#[must_use]
pub fn len(&self) -> usize {
self.n
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.n == 0
}
#[must_use]
pub fn block_size(&self) -> usize {
self.block_size
}
#[must_use]
pub fn num_blocks(&self) -> usize {
self.blocks.len()
}
pub fn get(&self, i: usize) -> Result<u32> {
if i >= self.n {
return Err(Error::IndexOutOfBounds(i));
}
let b = i / self.block_size;
let off = i % self.block_size;
let base = *self
.bases
.get(b)
.ok_or(Error::InvalidEncoding("missing block base".to_string()))?;
let block = self
.blocks
.get(b)
.ok_or(Error::InvalidEncoding("missing block".to_string()))?;
let rel = block.get(off)?;
Ok(base + rel)
}
pub fn to_bytes(&self) -> Vec<u8> {
let mut out = Vec::new();
out.extend_from_slice(b"SBITPEF1");
out.extend_from_slice(&self.universe_size.to_le_bytes());
out.extend_from_slice(&(self.block_size as u32).to_le_bytes());
out.extend_from_slice(&(self.n as u64).to_le_bytes());
out.extend_from_slice(&(self.blocks.len() as u64).to_le_bytes());
for &b in &self.bases {
out.extend_from_slice(&b.to_le_bytes());
}
for blk in &self.blocks {
let bytes = blk.to_bytes();
out.extend_from_slice(&(bytes.len() as u64).to_le_bytes());
out.extend_from_slice(&bytes);
}
out
}
pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
const MAGIC: &[u8; 8] = b"SBITPEF1";
let mut off = 0usize;
let mut take = |n: usize| -> Result<&[u8]> {
if off + n > bytes.len() {
return Err(Error::InvalidEncoding(
"unexpected end of input".to_string(),
));
}
let slice = &bytes[off..off + n];
off += n;
Ok(slice)
};
let magic = take(8)?;
if magic != MAGIC {
return Err(Error::InvalidEncoding(
"bad magic for PartitionedEliasFano".to_string(),
));
}
let universe_size = u32::from_le_bytes(take(4)?.try_into().unwrap());
let block_size = u32::from_le_bytes(take(4)?.try_into().unwrap()) as usize;
let n = u64::from_le_bytes(take(8)?.try_into().unwrap()) as usize;
let num_blocks = u64::from_le_bytes(take(8)?.try_into().unwrap()) as usize;
if num_blocks.saturating_mul(4) > bytes.len() {
return Err(Error::InvalidEncoding(format!(
"PEF num_blocks ({num_blocks}) too large for input ({} bytes)",
bytes.len()
)));
}
let mut bases = Vec::with_capacity(num_blocks);
for _ in 0..num_blocks {
let b = u32::from_le_bytes(take(4)?.try_into().unwrap());
bases.push(b);
}
let mut blocks = Vec::with_capacity(num_blocks);
for _ in 0..num_blocks {
let len_bytes = u64::from_le_bytes(take(8)?.try_into().unwrap()) as usize;
let blk_bytes = take(len_bytes)?;
let ef = EliasFano::from_bytes(blk_bytes)?;
blocks.push(ef);
}
if off != bytes.len() {
return Err(Error::InvalidEncoding(
"trailing bytes after PartitionedEliasFano".to_string(),
));
}
if block_size == 0 {
return Err(Error::InvalidEncoding(
"block_size must be >= 1".to_string(),
));
}
let actual_n: usize = blocks.iter().map(|b| b.len()).sum();
if actual_n != n {
return Err(Error::InvalidEncoding(format!(
"PEF n ({n}) does not match sum of block lengths ({actual_n})"
)));
}
Ok(Self {
universe_size,
block_size,
n,
bases,
blocks,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn partitioned_roundtrip_basic() {
let values = vec![10, 20, 30, 31, 32, 100, 1000];
let pef = PartitionedEliasFano::new(&values, 2000, 3);
assert_eq!(pef.len(), values.len());
for (i, &v) in values.iter().enumerate() {
assert_eq!(pef.get(i).unwrap(), v);
}
let bytes = pef.to_bytes();
let pef2 = PartitionedEliasFano::from_bytes(&bytes).unwrap();
assert_eq!(pef2.len(), values.len());
for (i, &v) in values.iter().enumerate() {
assert_eq!(pef2.get(i).unwrap(), v);
}
}
#[test]
fn partitioned_single_element() {
let pef = PartitionedEliasFano::new(&[42], 100, 64);
assert_eq!(pef.len(), 1);
assert_eq!(pef.get(0).unwrap(), 42);
let bytes = pef.to_bytes();
let pef2 = PartitionedEliasFano::from_bytes(&bytes).unwrap();
assert_eq!(pef2.get(0).unwrap(), 42);
}
#[test]
fn partitioned_empty() {
let pef = PartitionedEliasFano::new(&[], 100, 64);
assert!(pef.is_empty());
assert!(pef.get(0).is_err());
}
#[test]
fn partitioned_block_boundary() {
let values = vec![0, 1, 2, 10, 11, 12];
let pef = PartitionedEliasFano::new(&values, 20, 3);
assert_eq!(pef.num_blocks(), 2);
for (i, &v) in values.iter().enumerate() {
assert_eq!(pef.get(i).unwrap(), v);
}
}
#[test]
fn partitioned_block_size_larger_than_n() {
let values = vec![5, 10, 15];
let pef = PartitionedEliasFano::new(&values, 20, 100);
assert_eq!(pef.num_blocks(), 1);
for (i, &v) in values.iter().enumerate() {
assert_eq!(pef.get(i).unwrap(), v);
}
}
#[test]
fn partitioned_rejects_corrupted_n() {
let values = vec![10, 20, 30];
let pef = PartitionedEliasFano::new(&values, 100, 2);
let mut bytes = pef.to_bytes();
let bad_n: u64 = 999;
bytes[16..24].copy_from_slice(&bad_n.to_le_bytes());
assert!(PartitionedEliasFano::from_bytes(&bytes).is_err());
}
}