use super::{codec, codec::Codec, traits::Storable, VarVec, VarVecBitWriter, VarVecError};
use crate::common::codec_writer::CodecWriter;
use crate::fixed::{BitWidth, FixedVec};
use dsi_bitstream::{
dispatch::StaticCodeWrite,
impls::MemWordWriterVec,
prelude::{BitWrite, CodesWrite, Endianness, LE},
};
use std::marker::PhantomData;
#[derive(Debug)]
pub struct VarVecBuilder<T: Storable, E: Endianness> {
k: usize,
codec_spec: Codec,
_markers: PhantomData<(T, E)>,
}
impl<T: Storable, E: Endianness> VarVecBuilder<T, E> {
pub(super) fn new() -> Self {
Self {
k: 32,
codec_spec: Codec::Auto,
_markers: PhantomData,
}
}
pub fn k(mut self, k: usize) -> Self {
self.k = k;
self
}
pub fn codec(mut self, codec_spec: Codec) -> Self {
self.codec_spec = codec_spec;
self
}
pub fn build(self, input: &[T]) -> Result<VarVec<T, E, Vec<u64>>, VarVecError>
where
VarVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
{
if self.k == 0 {
return Err(VarVecError::InvalidParameters(
"Sampling rate k cannot be zero".to_string(),
));
}
let resolved_code = if self.codec_spec.requires_analysis() {
codec::resolve_codec_from_iter(input.iter().map(|&x| x.to_word()), self.codec_spec)?
} else {
codec::resolve_codec(&[] as &[u64], self.codec_spec)?
};
if input.is_empty() {
let empty_samples = FixedVec::<u64, u64, LE>::builder()
.build(&[0u64; 0])
.unwrap();
return Ok(unsafe {
VarVec::new_unchecked(Vec::new(), empty_samples, self.k, 0, resolved_code)
});
}
let word_writer = MemWordWriterVec::new(Vec::new());
let mut writer = VarVecBitWriter::<E>::new(word_writer);
let sample_capacity = input.len().div_ceil(self.k);
let mut temp_samples = Vec::with_capacity(sample_capacity);
let mut current_bit_offset = 0;
let code_writer = CodecWriter::new(resolved_code);
for (i, &value) in input.iter().enumerate() {
if i % self.k == 0 {
temp_samples.push(current_bit_offset as u64);
}
let bits_written = code_writer.write(&mut writer, value.to_word())?;
current_bit_offset += bits_written;
}
writer.write_bits(u64::MAX, 64).unwrap();
let samples = FixedVec::<u64, u64, LE>::builder()
.bit_width(BitWidth::Minimal)
.build(&temp_samples)
.unwrap();
writer.flush().unwrap();
let mut data = writer.into_inner().unwrap().into_inner();
data.shrink_to_fit();
Ok(unsafe { VarVec::new_unchecked(data, samples, self.k, input.len(), resolved_code) })
}
}
#[derive(Debug)]
pub struct VarVecFromIterBuilder<T: Storable, E: Endianness, I: IntoIterator<Item = T>> {
iter: I,
k: usize,
codec_spec: Codec,
_markers: PhantomData<(T, E)>,
}
impl<T: Storable, E: Endianness, I: IntoIterator<Item = T>> VarVecFromIterBuilder<T, E, I> {
pub(super) fn new(iter: I) -> Self {
Self {
iter,
k: 32,
codec_spec: Codec::Gamma,
_markers: PhantomData,
}
}
pub fn k(mut self, k: usize) -> Self {
self.k = k;
self
}
pub fn codec(mut self, codec_spec: Codec) -> Self {
self.codec_spec = codec_spec;
self
}
pub fn build(self) -> Result<VarVec<T, E, Vec<u64>>, VarVecError>
where
VarVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
{
let resolved_code = match self.codec_spec {
Codec::Auto
| Codec::Rice { log2_b: None }
| Codec::Zeta { k: None }
| Codec::Golomb { b: None } => {
return Err(VarVecError::InvalidParameters("Automatic parameter selection is not supported for iterator-based construction. Please provide fixed parameters.".to_string()));
}
spec => codec::resolve_codec(&[0u64; 0], spec)?,
};
if self.k == 0 {
return Err(VarVecError::InvalidParameters(
"Sampling rate k cannot be zero".to_string(),
));
}
let word_writer = MemWordWriterVec::new(Vec::new());
let mut writer = VarVecBitWriter::<E>::new(word_writer);
let mut len = 0;
let mut temp_samples = Vec::new();
let mut current_bit_offset = 0;
let code_writer = CodecWriter::new(resolved_code);
for (i, value) in self.iter.into_iter().enumerate() {
if i % self.k == 0 {
temp_samples.push(current_bit_offset as u64);
}
let bits_written = code_writer.write(&mut writer, value.to_word())?;
current_bit_offset += bits_written;
len += 1;
}
writer.write_bits(u64::MAX, 64).unwrap();
let samples = FixedVec::<u64, u64, LE>::builder()
.bit_width(BitWidth::Minimal)
.build(&temp_samples)
.unwrap();
writer.flush().unwrap();
let mut data = writer.into_inner().unwrap().into_inner();
data.shrink_to_fit();
Ok(unsafe { VarVec::new_unchecked(data, samples, self.k, len, resolved_code) })
}
}