use crate::{Error, Result};
use alloc::boxed::Box;
use alloc::vec::Vec;
pub const CHUNK_ROWS: u32 = 2048;
const WORDS_PER_CHUNK: usize = (CHUNK_ROWS as usize) / 64;
const SPARSE_MAX: usize = WORDS_PER_CHUNK * 4;
const KIND_SPARSE: u8 = 0;
const KIND_DENSE: u8 = 1;
fn split(row: u32) -> (u32, u16) {
(row / CHUNK_ROWS, (row % CHUNK_ROWS) as u16)
}
#[derive(Clone, Debug, Eq, PartialEq)]
enum Container {
Sparse(Vec<u16>),
Dense(Box<[u64; WORDS_PER_CHUNK]>),
}
impl Container {
fn contains(&self, off: u16) -> bool {
match self {
Self::Sparse(offs) => offs.binary_search(&off).is_ok(),
Self::Dense(words) => {
let (w, b) = (off as usize / 64, off as usize % 64);
words.get(w).is_some_and(|word| word & (1u64 << b) != 0)
}
}
}
fn insert(&mut self, off: u16) -> bool {
match self {
Self::Sparse(offs) => match offs.binary_search(&off) {
Ok(_) => false,
Err(pos) => {
offs.insert(pos, off);
if offs.len() > SPARSE_MAX {
*self = Self::densify(offs);
}
true
}
},
Self::Dense(words) => {
let (w, b) = (off as usize / 64, off as usize % 64);
let mask = 1u64 << b;
let Some(word) = words.get_mut(w) else {
return false;
};
let was = *word & mask != 0;
*word |= mask;
!was
}
}
}
fn densify(offs: &[u16]) -> Self {
let mut words = Box::new([0u64; WORDS_PER_CHUNK]);
for &off in offs {
if let Some(word) = words.get_mut(off as usize / 64) {
*word |= 1u64 << (off as usize % 64);
}
}
Self::Dense(words)
}
#[expect(
clippy::cast_possible_truncation,
reason = "a chunk holds at most CHUNK_ROWS (2048) distinct offsets, well within u32"
)]
fn cardinality(&self) -> u32 {
match self {
Self::Sparse(offs) => offs.len() as u32,
Self::Dense(words) => words.iter().map(|w| w.count_ones()).sum(),
}
}
#[expect(
clippy::cast_possible_truncation,
reason = "w < 32 and b < 64, so w*64 + b < 2048 fits u16"
)]
fn for_each<F: FnMut(u16)>(&self, mut f: F) {
match self {
Self::Sparse(offs) => offs.iter().for_each(|&o| f(o)),
Self::Dense(words) => {
for (w, &word) in words.iter().enumerate() {
let mut bits = word;
while bits != 0 {
let b = bits.trailing_zeros() as usize;
f((w * 64 + b) as u16);
bits &= bits - 1;
}
}
}
}
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct DeleteBitmap {
chunks: Vec<(u32, Container)>,
}
impl DeleteBitmap {
#[must_use]
pub fn new() -> Self {
Self { chunks: Vec::new() }
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.chunks.is_empty()
}
#[must_use]
pub fn len(&self) -> u64 {
self.chunks
.iter()
.map(|(_, c)| u64::from(c.cardinality()))
.sum()
}
pub fn insert(&mut self, row: u32) -> bool {
let (chunk, off) = split(row);
match self.chunks.binary_search_by_key(&chunk, |(c, _)| *c) {
Ok(pos) => self.chunks.get_mut(pos).is_some_and(|(_, c)| c.insert(off)),
Err(pos) => {
self.chunks
.insert(pos, (chunk, Container::Sparse(alloc::vec![off])));
true
}
}
}
#[must_use]
pub fn contains(&self, row: u32) -> bool {
let (chunk, off) = split(row);
match self.chunks.binary_search_by_key(&chunk, |(c, _)| *c) {
Ok(pos) => self.chunks.get(pos).is_some_and(|(_, c)| c.contains(off)),
Err(_) => false,
}
}
#[must_use]
pub fn chunk_has_deletes(&self, chunk_index: u32) -> bool {
self.chunks
.binary_search_by_key(&chunk_index, |(c, _)| *c)
.is_ok()
}
pub fn union(&mut self, other: &Self) {
for (chunk, container) in &other.chunks {
match self.chunks.binary_search_by_key(chunk, |(c, _)| *c) {
Ok(pos) => {
if let Some((_, dst)) = self.chunks.get_mut(pos) {
container.for_each(|off| {
dst.insert(off);
});
}
}
Err(pos) => self.chunks.insert(pos, (*chunk, container.clone())),
}
}
}
pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
self.chunks.iter().flat_map(|(chunk, container)| {
let base = chunk * CHUNK_ROWS;
let mut rows = Vec::with_capacity(container.cardinality() as usize);
container.for_each(|off| rows.push(base + u32::from(off)));
rows.into_iter()
})
}
#[must_use]
#[expect(
clippy::cast_possible_truncation,
reason = "chunk count <= row_count/2048 fits u32; a chunk holds <= 2048 offsets, fitting u16"
)]
pub fn encode(&self) -> Vec<u8> {
let mut out = Vec::new();
out.extend_from_slice(&(self.chunks.len() as u32).to_le_bytes());
for (chunk, container) in &self.chunks {
out.extend_from_slice(&chunk.to_le_bytes());
match container {
Container::Sparse(offs) => {
out.push(KIND_SPARSE);
out.extend_from_slice(&(offs.len() as u16).to_le_bytes());
for &off in offs {
out.extend_from_slice(&off.to_le_bytes());
}
}
Container::Dense(words) => {
out.push(KIND_DENSE);
for &word in words.iter() {
out.extend_from_slice(&word.to_le_bytes());
}
}
}
}
out
}
pub fn decode(bytes: &[u8]) -> Result<Self> {
let mut cur = Cursor::new(bytes);
let chunk_count = usize::try_from(cur.u32()?)
.map_err(|_| Error::InvalidHeader("delete_bitmap: chunk count does not fit usize"))?;
if chunk_count > cur.remaining_len() / 7 {
return Err(Error::InvalidHeader(
"delete_bitmap: chunk count exceeds encoded payload",
));
}
let mut chunks = Vec::with_capacity(chunk_count);
let mut last_chunk: Option<u32> = None;
for _ in 0..chunk_count {
let chunk = cur.u32()?;
if chunk > u32::MAX / CHUNK_ROWS {
return Err(Error::InvalidHeader(
"delete_bitmap: chunk index out of row-position range",
));
}
if last_chunk.is_some_and(|p| chunk <= p) {
return Err(Error::InvalidHeader(
"delete_bitmap: chunk indices not strictly ascending",
));
}
last_chunk = Some(chunk);
let container = match cur.u8()? {
KIND_SPARSE => {
let count = cur.u16()? as usize;
if count == 0 || count > CHUNK_ROWS as usize {
return Err(Error::InvalidHeader(
"delete_bitmap: sparse count out of range",
));
}
let mut offs = Vec::with_capacity(count);
let mut last: Option<u16> = None;
for _ in 0..count {
let off = cur.u16()?;
if u32::from(off) >= CHUNK_ROWS {
return Err(Error::InvalidHeader(
"delete_bitmap: sparse offset out of range",
));
}
if last.is_some_and(|p| off <= p) {
return Err(Error::InvalidHeader(
"delete_bitmap: sparse offsets not strictly ascending",
));
}
last = Some(off);
offs.push(off);
}
Container::Sparse(offs)
}
KIND_DENSE => {
let mut words = Box::new([0u64; WORDS_PER_CHUNK]);
for word in words.iter_mut() {
*word = cur.u64()?;
}
Container::Dense(words)
}
_ => {
return Err(Error::InvalidHeader(
"delete_bitmap: unknown container kind",
));
}
};
if container.cardinality() == 0 {
return Err(Error::InvalidHeader("delete_bitmap: empty container"));
}
chunks.push((chunk, container));
}
if cur.remaining_len() != 0 {
return Err(Error::InvalidHeader(
"delete_bitmap: trailing bytes after chunks",
));
}
Ok(Self { chunks })
}
}
struct Cursor<'a> {
buf: &'a [u8],
pos: usize,
}
impl<'a> Cursor<'a> {
fn new(buf: &'a [u8]) -> Self {
Self { buf, pos: 0 }
}
fn remaining_len(&self) -> usize {
self.buf.len().saturating_sub(self.pos)
}
fn take<const N: usize>(&mut self) -> Result<[u8; N]> {
let end = self
.pos
.checked_add(N)
.ok_or(Error::InvalidHeader("delete_bitmap: length overflow"))?;
let arr = self
.buf
.get(self.pos..end)
.and_then(|s| <[u8; N]>::try_from(s).ok())
.ok_or(Error::InvalidHeader("delete_bitmap: truncated buffer"))?;
self.pos = end;
Ok(arr)
}
fn u8(&mut self) -> Result<u8> {
let [b] = self.take()?;
Ok(b)
}
fn u16(&mut self) -> Result<u16> {
Ok(u16::from_le_bytes(self.take()?))
}
fn u32(&mut self) -> Result<u32> {
Ok(u32::from_le_bytes(self.take()?))
}
fn u64(&mut self) -> Result<u64> {
Ok(u64::from_le_bytes(self.take()?))
}
}
#[cfg(test)]
#[expect(
clippy::unwrap_used,
clippy::indexing_slicing,
clippy::cast_possible_truncation,
reason = "test assertions over fixed in-test buffers"
)]
mod tests;