use roaring::RoaringBitmap;
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct DeletionVector {
bitmap: Arc<RoaringBitmap>,
}
const MAGIC_NUMBER: u32 = 1581511376;
const MAGIC_NUMBER_SIZE_BYTES: usize = 4;
impl DeletionVector {
pub fn empty() -> Self {
Self {
bitmap: Arc::new(RoaringBitmap::new()),
}
}
pub fn from_bitmap(bitmap: RoaringBitmap) -> Self {
Self {
bitmap: Arc::new(bitmap),
}
}
pub fn iter(&self) -> DeletionVectorIterator {
DeletionVectorIterator::new(self.bitmap.iter().map(u64::from).collect())
}
pub fn is_empty(&self) -> bool {
self.bitmap.is_empty()
}
#[cfg(test)]
fn bitmap(&self) -> &RoaringBitmap {
&self.bitmap
}
pub fn read_from_bytes(bytes: &[u8], expected_length: Option<u64>) -> crate::Result<Self> {
use bytes::Buf;
if bytes.len() < 8 {
return Err(crate::Error::DataInvalid {
message: "Deletion vector data too short".to_string(),
source: None,
});
}
let mut buf = bytes;
let bitmap_length = buf.get_i32() as usize;
let magic_number = buf.get_i32() as u32;
if magic_number != MAGIC_NUMBER {
return Err(crate::Error::DataInvalid {
message: format!(
"Invalid magic number: expected {MAGIC_NUMBER}, got {magic_number}"
),
source: None,
});
}
if let Some(expected) = expected_length {
if bitmap_length as u64 != expected {
return Err(crate::Error::DataInvalid {
message: format!(
"Size not match, actual size: {bitmap_length}, expected size: {expected}"
),
source: None,
});
}
}
let bitmap_data_size = bitmap_length - MAGIC_NUMBER_SIZE_BYTES;
if bytes.len() < 8 + bitmap_data_size + 4 {
return Err(crate::Error::DataInvalid {
message: format!(
"Deletion vector data incomplete: need {} bytes, got {}",
8 + bitmap_data_size + 4,
bytes.len()
),
source: None,
});
}
let bitmap_data = &bytes[8..8 + bitmap_data_size];
let bitmap = RoaringBitmap::deserialize_from(bitmap_data).map_err(|e| {
crate::Error::DataInvalid {
message: format!("Failed to deserialize RoaringBitmap: {e}"),
source: Some(Box::new(e)),
}
})?;
Ok(Self::from_bitmap(bitmap))
}
}
impl Default for DeletionVector {
fn default() -> Self {
Self::empty()
}
}
#[derive(Debug)]
pub struct DeletionVectorIterator {
positions: Vec<u64>,
cursor: usize,
}
impl DeletionVectorIterator {
pub(crate) fn new(positions: Vec<u64>) -> Self {
Self {
positions,
cursor: 0,
}
}
}
impl Iterator for DeletionVectorIterator {
type Item = u64;
fn next(&mut self) -> Option<Self::Item> {
if self.cursor < self.positions.len() {
let v = self.positions[self.cursor];
self.cursor += 1;
Some(v)
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use roaring::RoaringBitmap;
use std::env::current_dir;
#[test]
fn test_read_deletion_vector() {
let workdir = current_dir().unwrap();
let path =
workdir.join("tests/fixtures/index/index-7e53780d-2faa-4e4c-9f2e-93af5082bbdb-0");
let bytes = &std::fs::read(&path).expect("fixture index file must exist")[1..];
assert!(!bytes.is_empty(), "fixture file must not be empty");
let dv = DeletionVector::read_from_bytes(bytes, Some(24))
.expect("failed to read DeletionVector");
let expected_bitmap = RoaringBitmap::from_iter([1u32, 2u32]);
assert_eq!(dv.bitmap(), &expected_bitmap, "bitmap should be [1, 2]");
}
}