use super::{SeqVec, SeqVecBitWriter, SeqVecError};
use crate::common::codec_writer::CodecWriter;
use crate::fixed::{BitWidth, FixedVec};
use crate::variable::codec::{self, Codec};
use crate::variable::traits::Storable;
use dsi_bitstream::{
dispatch::StaticCodeWrite,
impls::MemWordWriterVec,
prelude::{BitWrite, Codes, CodesWrite, Endianness},
};
use std::marker::PhantomData;
#[derive(Debug, Clone)]
pub struct SeqVecBuilder<T: Storable, E: Endianness> {
codec_spec: Codec,
store_lengths: bool,
_markers: PhantomData<(T, E)>,
}
impl<T: Storable, E: Endianness> Default for SeqVecBuilder<T, E> {
fn default() -> Self {
Self::new()
}
}
impl<T: Storable, E: Endianness> SeqVecBuilder<T, E> {
#[inline]
pub fn new() -> Self {
Self {
codec_spec: Codec::Auto,
store_lengths: false,
_markers: PhantomData,
}
}
#[inline]
pub fn codec(mut self, codec_spec: Codec) -> Self {
self.codec_spec = codec_spec;
self
}
#[inline]
pub fn store_lengths(mut self, store: bool) -> Self {
self.store_lengths = store;
self
}
pub fn build<S: AsRef<[T]>>(
self,
sequences: &[S],
) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
where
T: 'static,
SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
{
if self.codec_spec.requires_analysis() {
self.build_two_pass(sequences)
} else {
self.build_single_pass(sequences)
}
}
fn build_two_pass<S: AsRef<[T]>>(
self,
sequences: &[S],
) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
where
T: 'static,
SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
{
let resolved_codec = codec::resolve_codec_from_iter(
sequences
.iter()
.flat_map(|seq| seq.as_ref().iter().map(|x| x.to_word())),
self.codec_spec,
)
.map_err(|e| SeqVecError::CodecDispatch(e.to_string()))?;
self.encode_sequences(sequences, resolved_codec)
}
fn build_single_pass<S: AsRef<[T]>>(
self,
sequences: &[S],
) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
where
T: 'static,
SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
{
let resolved_codec = codec::resolve_codec::<u64>(&[], self.codec_spec)
.map_err(|e| SeqVecError::CodecDispatch(e.to_string()))?;
self.encode_sequences(sequences, resolved_codec)
}
fn encode_sequences<S: AsRef<[T]>>(
self,
sequences: &[S],
resolved_codec: Codes,
) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
where
SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
{
let num_sequences = sequences.len();
if num_sequences == 0 {
let empty_offsets = FixedVec::<u64, u64, E>::builder()
.bit_width(BitWidth::Minimal)
.build(&[0u64])?;
let seq_lengths = if self.store_lengths {
Some(
FixedVec::<u64, u64, E>::builder()
.bit_width(BitWidth::Minimal)
.build(&[])?,
)
} else {
None
};
return Ok(SeqVec {
data: Vec::new(),
bit_offsets: empty_offsets,
seq_lengths,
encoding: resolved_codec,
_markers: PhantomData,
});
}
let (data, offsets, lengths) = encode_sequences_impl(
sequences.iter(),
resolved_codec,
Vec::with_capacity(num_sequences + 1),
self.store_lengths,
num_sequences,
)?;
let bit_offsets = FixedVec::<u64, u64, E>::builder()
.bit_width(BitWidth::Minimal)
.build(&offsets)?;
let seq_lengths = if let Some(lengths) = lengths {
Some(
FixedVec::<u64, u64, E>::builder()
.bit_width(BitWidth::Minimal)
.build(&lengths)?,
)
} else {
None
};
Ok(SeqVec {
data,
bit_offsets,
seq_lengths,
encoding: resolved_codec,
_markers: PhantomData,
})
}
}
#[derive(Debug)]
pub struct SeqVecFromIterBuilder<T: Storable, E: Endianness, I> {
iter: I,
codec_spec: Codec,
store_lengths: bool,
_markers: PhantomData<(T, E)>,
}
impl<T, E, I, S> SeqVecFromIterBuilder<T, E, I>
where
T: Storable,
E: Endianness,
I: IntoIterator<Item = S>,
S: AsRef<[T]>,
{
#[inline]
pub fn new(iter: I) -> Self {
Self {
iter,
codec_spec: Codec::Gamma,
store_lengths: false,
_markers: PhantomData,
}
}
#[inline]
pub fn codec(mut self, codec_spec: Codec) -> Self {
self.codec_spec = codec_spec;
self
}
#[inline]
pub fn store_lengths(mut self, store: bool) -> Self {
self.store_lengths = store;
self
}
pub fn build(self) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
where
T: 'static,
SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
{
if self.codec_spec.requires_analysis() {
return Err(SeqVecError::InvalidParameters(
"Automatic codec selection is not supported for iterator-based construction. \
Please provide a fully-specified codec"
.to_string(),
));
}
let resolved_codec = codec::resolve_codec::<u64>(&[], self.codec_spec)
.map_err(|e| SeqVecError::CodecDispatch(e.to_string()))?;
let iter = self.iter.into_iter();
let (lower, _) = iter.size_hint();
let offsets = Vec::with_capacity(lower.saturating_add(1));
let (data, offsets, lengths) =
encode_sequences_impl(iter, resolved_codec, offsets, self.store_lengths, lower)?;
if offsets.is_empty() {
let empty_offsets = FixedVec::<u64, u64, E>::builder()
.bit_width(BitWidth::Minimal)
.build(&[0u64])?;
let seq_lengths = if self.store_lengths {
Some(
FixedVec::<u64, u64, E>::builder()
.bit_width(BitWidth::Minimal)
.build(&[])?,
)
} else {
None
};
return Ok(SeqVec {
data: Vec::new(),
bit_offsets: empty_offsets,
seq_lengths,
encoding: resolved_codec,
_markers: PhantomData,
});
}
let bit_offsets = FixedVec::<u64, u64, E>::builder()
.bit_width(BitWidth::Minimal)
.build(&offsets)?;
let seq_lengths = if let Some(lengths) = lengths {
Some(
FixedVec::<u64, u64, E>::builder()
.bit_width(BitWidth::Minimal)
.build(&lengths)?,
)
} else {
None
};
Ok(SeqVec {
data,
bit_offsets,
seq_lengths,
encoding: resolved_codec,
_markers: PhantomData,
})
}
}
#[allow(dead_code)]
trait CodecSpecExt {
fn requires_analysis(&self) -> bool;
}
impl CodecSpecExt for Codec {
#[inline]
fn requires_analysis(&self) -> bool {
matches!(
self,
Codec::Auto
| Codec::Rice { log2_b: None }
| Codec::Zeta { k: None }
| Codec::Golomb { b: None }
| Codec::Pi { k: None }
| Codec::ExpGolomb { k: None }
)
}
}
type EncodeSequencesResult = (Vec<u64>, Vec<u64>, Option<Vec<u64>>);
fn encode_sequences_impl<T: Storable, E: Endianness, I, S>(
sequences: I,
resolved_codec: Codes,
mut offsets: Vec<u64>,
store_lengths: bool,
lengths_capacity_hint: usize,
) -> Result<EncodeSequencesResult, SeqVecError>
where
I: IntoIterator<Item = S>,
S: AsRef<[T]>,
SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
{
let word_writer = MemWordWriterVec::new(Vec::new());
let mut writer = SeqVecBitWriter::<E>::new(word_writer);
let mut current_bit_offset: u64 = 0;
let code_writer = CodecWriter::new(resolved_codec);
let mut lengths = if store_lengths {
Some(Vec::with_capacity(lengths_capacity_hint))
} else {
None
};
for seq in sequences {
let seq_ref = seq.as_ref();
offsets.push(current_bit_offset);
if let Some(ref mut lengths) = lengths {
lengths.push(seq_ref.len() as u64);
}
for elem in seq_ref {
let bits_written = code_writer.write(&mut writer, elem.to_word())?;
current_bit_offset += bits_written as u64;
}
}
offsets.push(current_bit_offset);
writer.flush()?;
let mut data = writer.into_inner()?.into_inner();
data.shrink_to_fit();
Ok((data, offsets, lengths))
}
impl<T: Storable + 'static, E: Endianness> SeqVec<T, E, Vec<u64>> {
#[inline]
pub fn builder() -> SeqVecBuilder<T, E> {
SeqVecBuilder::new()
}
#[inline]
pub fn from_iter_builder<I, S>(iter: I) -> SeqVecFromIterBuilder<T, E, I>
where
I: IntoIterator<Item = S>,
S: AsRef<[T]>,
{
SeqVecFromIterBuilder::new(iter)
}
#[inline]
pub unsafe fn from_raw_parts(
data: Vec<u64>,
bit_offsets: crate::fixed::FixedVec<u64, u64, E, Vec<u64>>,
encoding: dsi_bitstream::prelude::Codes,
) -> Self {
SeqVec {
data,
bit_offsets,
seq_lengths: None,
encoding,
_markers: PhantomData,
}
}
#[inline]
pub unsafe fn from_raw_parts_with_lengths(
data: Vec<u64>,
bit_offsets: crate::fixed::FixedVec<u64, u64, E, Vec<u64>>,
seq_lengths: Option<crate::fixed::FixedVec<u64, u64, E, Vec<u64>>>,
encoding: dsi_bitstream::prelude::Codes,
) -> Self {
SeqVec {
data,
bit_offsets,
seq_lengths,
encoding,
_markers: PhantomData,
}
}
}