use crate::bloom::NgramBloom;
use crate::error::{Error, Result};
use crate::histogram::ByteHistogram;
use crate::BlockIndex;
pub struct IncrementalBuilder;
impl IncrementalBuilder {
pub fn append_blocks(serialized: &[u8], blocks: &[&[u8]]) -> Result<Vec<u8>> {
Self::append_blocks_with_boundary(serialized, None, blocks)
}
pub fn append_blocks_with_boundary(
serialized: &[u8],
prev_last_byte: Option<u8>,
blocks: &[&[u8]],
) -> Result<Vec<u8>> {
let mut index = BlockIndex::from_bytes_checked(serialized)?;
let mut prev_last_byte = prev_last_byte;
for &block in blocks {
index.append_block_with_boundary(block, prev_last_byte)?;
prev_last_byte = block.last().copied();
}
Ok(index.to_bytes())
}
}
impl BlockIndex {
pub fn append_block(&mut self, block_data: &[u8]) -> Result<()> {
self.append_block_with_boundary(block_data, None)
}
pub fn append_block_with_boundary(
&mut self,
block_data: &[u8],
prev_last_byte: Option<u8>,
) -> Result<()> {
if block_data.is_empty() || block_data.len() > self.block_size {
return Err(Error::UnalignedData {
data_len: block_data.len(),
block_size: self.block_size,
});
}
if self.total_len % self.block_size != 0 {
return Err(Error::TrailingPartialBlock {
total_len: self.total_len,
block_size: self.block_size,
});
}
let bloom_bits = self.bloom_bits()?;
self.histograms.push(ByteHistogram::from_block(block_data));
let mut bloom = NgramBloom::from_block(block_data, bloom_bits)?;
if let Some(prev_byte) = prev_last_byte {
if let Some(&first) = block_data.first() {
bloom.insert_ngram(prev_byte, first);
}
}
self.blooms.push(bloom);
self.total_len =
self.total_len
.checked_add(block_data.len())
.ok_or(Error::UnalignedData {
data_len: self.total_len,
block_size: self.block_size,
})?;
Ok(())
}
pub fn merge(&mut self, other: &BlockIndex) -> Result<()> {
self.merge_with_boundary(other, None, None)
}
pub fn merge_with_boundary(
&mut self,
other: &BlockIndex,
self_last_byte: Option<u8>,
other_first_byte: Option<u8>,
) -> Result<()> {
if self.block_size != other.block_size {
return Err(Error::IncompatibleIndexConfiguration {
reason: "block_size differs",
});
}
if self.total_len % self.block_size != 0 {
return Err(Error::TrailingPartialBlock {
total_len: self.total_len,
block_size: self.block_size,
});
}
if other.total_len % other.block_size != 0 {
return Err(Error::TrailingPartialBlock {
total_len: other.total_len,
block_size: other.block_size,
});
}
let self_bloom_bits = self.bloom_bits()?;
let other_bloom_bits = other.bloom_bits()?;
if self_bloom_bits != other_bloom_bits {
return Err(Error::IncompatibleIndexConfiguration {
reason: "bloom_bits differs",
});
}
let mut other_blooms: Vec<crate::bloom::NgramBloom> = other.blooms.clone();
if let (Some(prev_byte), Some(first_byte), Some(first_bloom)) =
(self_last_byte, other_first_byte, other_blooms.first_mut())
{
first_bloom.insert_ngram(prev_byte, first_byte);
}
self.total_len = self.total_len.checked_add(other.total_len).ok_or(
Error::IncompatibleIndexConfiguration {
reason: "total length overflow",
},
)?;
self.histograms.extend(other.histograms.iter().cloned());
self.blooms.extend(other_blooms);
self.bloom_bits = self_bloom_bits;
Ok(())
}
pub fn remove_blocks(&mut self, block_ids: &[usize]) -> Result<()> {
if block_ids.is_empty() {
return Ok(());
}
let block_count = self.block_count();
let mut remove = vec![false; block_count];
for &block_id in block_ids {
if block_id >= block_count {
return Err(Error::InvalidBlockId {
block_id,
block_count,
});
}
remove[block_id] = true;
}
let mut found_removed = false;
for &should_remove in &remove {
if should_remove {
found_removed = true;
} else if found_removed {
return Err(Error::NonSuffixBlockRemoval);
}
}
let mut removed_len = 0;
for (i, &should_remove) in remove.iter().enumerate() {
if should_remove {
let is_last = i == block_count - 1;
let partial = self.total_len % self.block_size;
let block_len = if is_last && partial != 0 {
partial
} else {
self.block_size
};
removed_len += block_len;
}
}
let removed_blocks = remove.iter().filter(|&&flag| flag).count();
if removed_blocks == 0 {
return Ok(());
}
let mut next_histograms = Vec::with_capacity(block_count - removed_blocks);
let mut next_blooms = Vec::with_capacity(block_count - removed_blocks);
for ((histogram, bloom), should_remove) in self
.histograms
.drain(..)
.zip(self.blooms.drain(..))
.zip(remove)
{
if !should_remove {
next_histograms.push(histogram);
next_blooms.push(bloom);
}
}
self.histograms = next_histograms;
self.blooms = next_blooms;
self.total_len = self
.total_len
.checked_sub(removed_len)
.ok_or(Error::InvalidBlockId {
block_id: block_count,
block_count,
})?;
Ok(())
}
fn bloom_bits(&self) -> Result<usize> {
if self.bloom_bits == 0 {
match self.blooms.first() {
Some(bloom) => Ok(bloom.raw_parts().0),
None => Err(Error::ZeroBloomBits),
}
} else {
Ok(self.bloom_bits)
}
}
}