use crate::elias_fano::EliasFano;
use crate::error::{Error, Result};
use alloc::vec::Vec;
use alloc::{format, string::ToString};
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct PartitionedEliasFano {
universe_size: u64,
block_size: usize,
n: usize,
bases: Vec<u64>,
blocks: Vec<EliasFano>,
}
impl PartitionedEliasFano {
#[must_use]
pub fn new(values: &[u64], universe_size: u64, block_size: usize) -> Self {
let n = values.len();
let block_size = block_size.max(1);
if n == 0 {
return Self {
universe_size,
block_size,
n: 0,
bases: Vec::new(),
blocks: Vec::new(),
};
}
let mut bases = Vec::new();
let mut blocks = Vec::new();
let mut i = 0usize;
while i < n {
let j = (i + block_size).min(n);
let base = values[i];
let last = values[j - 1];
let local_u = (last - base).saturating_add(1);
let rel: Vec<u64> = values[i..j].iter().map(|&v| v - base).collect();
bases.push(base);
blocks.push(EliasFano::new(&rel, local_u));
i = j;
}
Self {
universe_size,
block_size,
n,
bases,
blocks,
}
}
#[must_use]
pub fn universe_size(&self) -> u64 {
self.universe_size
}
#[must_use]
pub fn len(&self) -> usize {
self.n
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.n == 0
}
#[must_use]
pub fn block_size(&self) -> usize {
self.block_size
}
#[must_use]
pub fn num_blocks(&self) -> usize {
self.blocks.len()
}
pub fn get(&self, i: usize) -> Result<u64> {
if i >= self.n {
return Err(Error::IndexOutOfBounds(i));
}
let b = i / self.block_size;
let off = i % self.block_size;
let base = *self
.bases
.get(b)
.ok_or(Error::InvalidEncoding("missing block base".to_string()))?;
let block = self
.blocks
.get(b)
.ok_or(Error::InvalidEncoding("missing block".to_string()))?;
let rel = block.get(off)?;
Ok(base + rel)
}
pub fn iter(&self) -> PefIter<'_> {
PefIter {
pef: self,
block_idx: 0,
elem_in_block: 0,
remaining: self.n,
}
}
pub fn heap_bytes(&self) -> usize {
self.bases.len() * 8 + self.blocks.iter().map(|b| b.heap_bytes()).sum::<usize>()
}
pub fn to_bytes(&self) -> Vec<u8> {
let mut out = Vec::new();
out.extend_from_slice(b"SBITPEF2");
out.extend_from_slice(&self.universe_size.to_le_bytes());
out.extend_from_slice(&(self.block_size as u32).to_le_bytes());
out.extend_from_slice(&(self.n as u64).to_le_bytes());
out.extend_from_slice(&(self.blocks.len() as u64).to_le_bytes());
for &b in &self.bases {
out.extend_from_slice(&b.to_le_bytes());
}
for blk in &self.blocks {
let bytes = blk.to_bytes();
out.extend_from_slice(&(bytes.len() as u64).to_le_bytes());
out.extend_from_slice(&bytes);
}
out
}
pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
use crate::error::ByteReader;
let mut r = ByteReader::new(bytes);
r.read_magic(b"SBITPEF2", "PartitionedEliasFano")?;
let universe_size = r.read_u64()?;
let block_size = r.read_u32()? as usize;
let n = r.read_u64()? as usize;
let num_blocks = r.read_u64()? as usize;
let bases = r.read_u64_vec(num_blocks)?;
let mut blocks = Vec::with_capacity(num_blocks);
for _ in 0..num_blocks {
let len_bytes = r.read_u64()? as usize;
let blk_bytes = r.take(len_bytes)?;
let ef = EliasFano::from_bytes(blk_bytes)?;
blocks.push(ef);
}
r.expect_eof("PartitionedEliasFano")?;
if block_size == 0 {
return Err(Error::InvalidEncoding(
"block_size must be >= 1".to_string(),
));
}
let actual_n: usize = blocks.iter().map(|b| b.len()).sum();
if actual_n != n {
return Err(Error::InvalidEncoding(format!(
"PEF n ({n}) does not match sum of block lengths ({actual_n})"
)));
}
Ok(Self {
universe_size,
block_size,
n,
bases,
blocks,
})
}
}
pub struct PefIter<'a> {
pef: &'a PartitionedEliasFano,
block_idx: usize,
elem_in_block: usize,
remaining: usize,
}
impl Iterator for PefIter<'_> {
type Item = u64;
fn next(&mut self) -> Option<u64> {
if self.remaining == 0 {
return None;
}
while self.block_idx < self.pef.blocks.len()
&& self.elem_in_block >= self.pef.blocks[self.block_idx].len()
{
self.block_idx += 1;
self.elem_in_block = 0;
}
if self.block_idx >= self.pef.blocks.len() {
return None;
}
let base = self.pef.bases[self.block_idx];
let rel = self.pef.blocks[self.block_idx]
.get(self.elem_in_block)
.unwrap();
self.elem_in_block += 1;
self.remaining -= 1;
Some(base + rel)
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.remaining, Some(self.remaining))
}
}
impl ExactSizeIterator for PefIter<'_> {}
impl<'a> IntoIterator for &'a PartitionedEliasFano {
type Item = u64;
type IntoIter = PefIter<'a>;
fn into_iter(self) -> PefIter<'a> {
self.iter()
}
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::vec;
#[test]
fn partitioned_roundtrip_basic() {
let values = vec![10u64, 20, 30, 31, 32, 100, 1000];
let pef = PartitionedEliasFano::new(&values, 2000, 3);
assert_eq!(pef.len(), values.len());
for (i, &v) in values.iter().enumerate() {
assert_eq!(pef.get(i).unwrap(), v);
}
let bytes = pef.to_bytes();
let pef2 = PartitionedEliasFano::from_bytes(&bytes).unwrap();
assert_eq!(pef2.len(), values.len());
for (i, &v) in values.iter().enumerate() {
assert_eq!(pef2.get(i).unwrap(), v);
}
}
#[test]
fn partitioned_single_element() {
let pef = PartitionedEliasFano::new(&[42u64], 100, 64);
assert_eq!(pef.len(), 1);
assert_eq!(pef.get(0).unwrap(), 42);
let bytes = pef.to_bytes();
let pef2 = PartitionedEliasFano::from_bytes(&bytes).unwrap();
assert_eq!(pef2.get(0).unwrap(), 42);
}
#[test]
fn partitioned_empty() {
let pef = PartitionedEliasFano::new(&[], 100u64, 64);
assert!(pef.is_empty());
assert!(pef.get(0).is_err());
}
#[test]
fn partitioned_block_boundary() {
let values = vec![0u64, 1, 2, 10, 11, 12];
let pef = PartitionedEliasFano::new(&values, 20, 3);
assert_eq!(pef.num_blocks(), 2);
for (i, &v) in values.iter().enumerate() {
assert_eq!(pef.get(i).unwrap(), v);
}
}
#[test]
fn partitioned_block_size_larger_than_n() {
let values = vec![5u64, 10, 15];
let pef = PartitionedEliasFano::new(&values, 20, 100);
assert_eq!(pef.num_blocks(), 1);
for (i, &v) in values.iter().enumerate() {
assert_eq!(pef.get(i).unwrap(), v);
}
}
#[test]
fn partitioned_rejects_corrupted_n() {
let values = vec![10u64, 20, 30];
let pef = PartitionedEliasFano::new(&values, 100, 2);
let mut bytes = pef.to_bytes();
let bad_n: u64 = 999;
bytes[20..28].copy_from_slice(&bad_n.to_le_bytes());
assert!(PartitionedEliasFano::from_bytes(&bytes).is_err());
}
}