#[macro_use]
mod macros;
pub mod builder;
pub mod codec;
pub mod iter;
#[cfg(feature = "parallel")]
mod parallel;
pub mod reader;
pub mod seq_reader;
#[cfg(feature = "serde")]
pub mod serde;
pub mod slice;
pub mod traits;
pub use self::{codec::Codec, traits::Storable};
use crate::fixed::{Error as FixedVecError, FixedVec};
use dsi_bitstream::{
codes::params::DefaultReadParams,
dispatch::StaticCodeRead,
prelude::{
BitRead, BitSeek, BufBitReader, BufBitWriter, Codes, CodesRead, CodesWrite, Endianness,
MemWordReader, MemWordWriterVec,
},
traits::{BE, BitWrite, LE},
};
use mem_dbg::{DbgFlags, FlatType, MemDbgImpl, MemSize, SizeFlags};
use std::{
error::Error,
fmt::{self, Write},
marker::PhantomData,
};
pub use builder::{VarVecBuilder, VarVecFromIterBuilder};
use iter::{VarVecIntoIter, VarVecIter};
pub use reader::VarVecReader;
pub use seq_reader::VarVecSeqReader;
pub use slice::{VarVecSlice, VarVecSliceIter};
#[derive(Debug)]
pub enum VarVecError {
Io(std::io::Error),
Bitstream(Box<dyn Error + Send + Sync>),
InvalidParameters(String),
CodecDispatch(String),
IndexOutOfBounds(usize),
}
impl fmt::Display for VarVecError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
VarVecError::Io(e) => write!(f, "I/O error: {}", e),
VarVecError::Bitstream(e) => write!(f, "Bitstream error: {}", e),
VarVecError::InvalidParameters(s) => write!(f, "Invalid parameters: {}", s),
VarVecError::CodecDispatch(s) => write!(f, "Codec dispatch error: {}", s),
VarVecError::IndexOutOfBounds(index) => write!(f, "Index out of bounds: {}", index),
}
}
}
impl Error for VarVecError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
VarVecError::Io(e) => Some(e),
VarVecError::Bitstream(e) => Some(e.as_ref()),
_ => None,
}
}
}
impl From<std::io::Error> for VarVecError {
fn from(e: std::io::Error) -> Self {
VarVecError::Io(e)
}
}
impl From<core::convert::Infallible> for VarVecError {
fn from(_: core::convert::Infallible) -> Self {
unreachable!()
}
}
impl From<FixedVecError> for VarVecError {
fn from(e: FixedVecError) -> Self {
VarVecError::InvalidParameters(e.to_string())
}
}
#[derive(Debug, Clone)]
pub struct VarVec<T: Storable, E: Endianness, B: AsRef<[u64]> = Vec<u64>> {
pub(super) data: B,
pub(super) samples: FixedVec<u64, u64, LE, B>,
pub(super) k: usize,
pub(super) len: usize,
pub(super) encoding: Codes,
pub(super) _markers: PhantomData<(T, E)>,
}
pub(crate) type VarVecBitWriter<E> = BufBitWriter<E, MemWordWriterVec<u64, Vec<u64>>>;
pub(crate) type VarVecBitReader<'a, E> =
BufBitReader<E, MemWordReader<u64, &'a [u64], true>, DefaultReadParams>;
impl<T: Storable + 'static, E: Endianness> VarVec<T, E, Vec<u64>> {
pub fn builder() -> VarVecBuilder<T, E> {
VarVecBuilder::new()
}
pub fn from_iter_builder<I>(iter: I) -> VarVecFromIterBuilder<T, E, I>
where
I: IntoIterator<Item = T> + Clone,
{
VarVecFromIterBuilder::new(iter)
}
pub fn into_vec(self) -> Vec<T>
where
for<'a> VarVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
+ CodesRead<E>
+ BitSeek<Error = core::convert::Infallible>,
{
self.into_iter().collect()
}
pub fn from_slice(slice: &[T]) -> Result<Self, VarVecError>
where
for<'a> crate::variable::VarVecBitWriter<E>:
BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
{
Self::builder().k(16).codec(Codec::Auto).build(slice)
}
}
impl<T: Storable, E: Endianness, B: AsRef<[u64]>> VarVec<T, E, B> {
pub fn from_parts(
data: B,
samples_data: B,
samples_len: usize,
samples_num_bits: usize,
k: usize,
len: usize,
encoding: Codes,
) -> Result<Self, VarVecError> {
let samples =
FixedVec::<u64, u64, LE, B>::from_parts(samples_data, samples_len, samples_num_bits)?;
if k == 0 {
return Err(VarVecError::InvalidParameters(
"Sampling rate k cannot be zero".to_string(),
));
}
let expected_samples = if len == 0 { 0 } else { len.div_ceil(k) };
if samples.len() != expected_samples {
return Err(VarVecError::InvalidParameters(format!(
"Inconsistent number of samples. Expected {}, found {}",
expected_samples,
samples.len()
)));
}
Ok(unsafe { Self::new_unchecked(data, samples, k, len, encoding) })
}
pub(crate) unsafe fn new_unchecked(
data: B,
samples: FixedVec<u64, u64, LE, B>,
k: usize,
len: usize,
encoding: Codes,
) -> Self {
Self {
data,
samples,
k,
len,
encoding,
_markers: PhantomData,
}
}
pub fn slice(&'_ self, start: usize, len: usize) -> Option<VarVecSlice<'_, T, E, B>> {
if start.saturating_add(len) > self.len {
return None;
}
Some(VarVecSlice::new(self, start..start + len))
}
#[allow(clippy::type_complexity)]
pub fn split_at(
&'_ self,
mid: usize,
) -> Option<(VarVecSlice<'_, T, E, B>, VarVecSlice<'_, T, E, B>)> {
if mid > self.len {
return None;
}
let left = VarVecSlice::new(self, 0..mid);
let right = VarVecSlice::new(self, mid..self.len);
Some((left, right))
}
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
#[inline]
pub fn sampling_rate(&self) -> usize {
self.k
}
#[inline]
pub fn num_samples(&self) -> usize {
self.samples.len()
}
#[deprecated(since = "0.6.0", note = "renamed to `sampling_rate`; use `sampling_rate` instead")]
#[inline]
pub fn get_sampling_rate(&self) -> usize {
self.sampling_rate()
}
#[deprecated(
since = "0.6.0",
note = "renamed to `num_samples`; use `num_samples` instead"
)]
#[inline]
pub fn get_num_samples(&self) -> usize {
self.num_samples()
}
#[inline]
pub fn samples_ref(&self) -> &FixedVec<u64, u64, LE, B> {
&self.samples
}
#[inline]
pub fn as_limbs(&self) -> &[u64] {
self.data.as_ref()
}
#[inline]
pub fn encoding(&self) -> Codes {
self.encoding
}
pub fn limbs(&self) -> Vec<u64> {
self.data.as_ref().to_vec()
}
pub fn iter(&'_ self) -> impl Iterator<Item = T> + '_
where
for<'a> VarVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
+ CodesRead<E>
+ BitSeek<Error = core::convert::Infallible>,
{
VarVecIter::new(self)
}
}
impl<T: Storable, E: Endianness, B: AsRef<[u64]>> VarVec<T, E, B>
where
for<'a> VarVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
+ CodesRead<E>
+ BitSeek<Error = core::convert::Infallible>,
{
pub fn reader(&'_ self) -> VarVecReader<'_, T, E, B> {
VarVecReader::new(self)
}
pub fn seq_reader(&'_ self) -> VarVecSeqReader<'_, T, E, B> {
VarVecSeqReader::new(self)
}
#[inline]
pub fn get(&self, index: usize) -> Option<T> {
if index >= self.len {
return None;
}
Some(unsafe { self.get_unchecked(index) })
}
#[inline]
pub unsafe fn get_unchecked(&self, index: usize) -> T {
let mut reader = self.reader();
unsafe { reader.get_unchecked(index) }
}
pub fn get_many(&self, indices: &[usize]) -> Result<Vec<T>, VarVecError> {
if indices.is_empty() {
return Ok(Vec::new());
}
for &index in indices {
if index >= self.len {
return Err(VarVecError::IndexOutOfBounds(index));
}
}
Ok(unsafe { self.get_many_unchecked(indices) })
}
#[allow(clippy::uninit_vec)]
pub unsafe fn get_many_unchecked(&self, indices: &[usize]) -> Vec<T> {
if indices.is_empty() {
return Vec::new();
}
let mut results = Vec::with_capacity(indices.len());
unsafe { results.set_len(indices.len()) };
let mut indexed_indices: Vec<(usize, usize)> = indices
.iter()
.enumerate()
.map(|(i, &idx)| (idx, i))
.collect();
indexed_indices.sort_unstable_by_key(|&(idx, _)| idx);
if self.k.is_power_of_two() {
let k_exp = self.k.trailing_zeros();
self.get_many_dsi_inner(
&indexed_indices,
&mut results,
|idx| idx >> k_exp,
|block| block << k_exp,
)
.unwrap();
} else {
self.get_many_dsi_inner(
&indexed_indices,
&mut results,
|idx| idx / self.k,
|block| block * self.k,
)
.unwrap();
}
results
}
fn get_many_dsi_inner<F1, F2>(
&self,
indexed_indices: &[(usize, usize)],
results: &mut [T],
block_of: F1,
start_of_block: F2,
) -> Result<(), VarVecError>
where
F1: Fn(usize) -> usize,
F2: Fn(usize) -> usize,
{
let mut reader = self.reader();
let mut current_decoded_index: usize = 0;
for &(target_index, original_position) in indexed_indices {
if target_index < current_decoded_index
|| block_of(target_index) != block_of(current_decoded_index.saturating_sub(1))
{
let target_sample_block = block_of(target_index);
let start_bit = unsafe { self.samples.get_unchecked(target_sample_block) };
reader.reader.set_bit_pos(start_bit)?;
current_decoded_index = start_of_block(target_sample_block);
}
for _ in current_decoded_index..target_index {
reader.code_reader.read(&mut reader.reader)?;
}
let value = reader.code_reader.read(&mut reader.reader)?;
results[original_position] = Storable::from_word(value);
current_decoded_index = target_index + 1;
}
Ok(())
}
pub fn get_many_from_iter<I>(&self, indices: I) -> Result<Vec<T>, VarVecError>
where
I: IntoIterator<Item = usize>,
{
let indices_iter = indices.into_iter();
let (lower_bound, _) = indices_iter.size_hint();
let mut results = Vec::with_capacity(lower_bound);
let mut seq_reader = self.seq_reader();
for index in indices_iter {
let value = seq_reader
.get(index)?
.ok_or(VarVecError::IndexOutOfBounds(index))?;
results.push(value);
}
Ok(results)
}
}
impl<T: Storable + Ord, E: Endianness, B: AsRef<[u64]>> VarVec<T, E, B>
where
for<'a> VarVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
+ CodesRead<E>
+ BitSeek<Error = core::convert::Infallible>,
{
pub fn binary_search(&self, value: &T) -> Result<usize, usize> {
self.binary_search_by(|probe| probe.cmp(value))
}
#[inline]
pub fn binary_search_by<F>(&self, mut f: F) -> Result<usize, usize>
where
F: FnMut(T) -> std::cmp::Ordering,
{
let mut low = 0;
let mut high = self.len();
let mut reader = self.reader();
while low < high {
let mid = low + (high - low) / 2;
let cmp = f(unsafe { reader.get_unchecked(mid) });
match cmp {
std::cmp::Ordering::Less => low = mid + 1,
std::cmp::Ordering::Equal => return Ok(mid),
std::cmp::Ordering::Greater => high = mid,
}
}
Err(low)
}
#[inline]
pub fn binary_search_by_key<K: Ord, F>(&self, b: &K, mut f: F) -> Result<usize, usize>
where
F: FnMut(T) -> K,
{
self.binary_search_by(|k| f(k).cmp(b))
}
}
impl<T: Storable, E: Endianness, B: AsRef<[u64]> + MemSize + FlatType> MemSize for VarVec<T, E, B> {
fn mem_size_rec(&self, flags: SizeFlags, _refs: &mut mem_dbg::HashMap<usize, usize>) -> usize {
let mut total_size = core::mem::size_of::<Self>();
total_size += self.data.mem_size(flags) - core::mem::size_of::<B>();
total_size +=
self.samples.mem_size(flags) - core::mem::size_of::<FixedVec<u64, u64, LE, B>>();
total_size
}
}
struct CodeWrapper<'a>(&'a Codes);
impl mem_dbg::MemSize for CodeWrapper<'_> {
fn mem_size_rec(&self, _flags: mem_dbg::SizeFlags, _refs: &mut mem_dbg::HashMap<usize, usize>) -> usize {
core::mem::size_of_val(self.0)
}
}
impl mem_dbg::MemDbgImpl for CodeWrapper<'_> {
fn _mem_dbg_depth_on(
&self,
writer: &mut impl core::fmt::Write,
total_size: usize,
max_depth: usize,
prefix: &mut String,
field_name: Option<&str>,
is_last: bool,
padded_size: usize,
flags: DbgFlags,
_dbg_refs: &mut mem_dbg::HashSet<usize>,
) -> core::fmt::Result {
if prefix.len() > max_depth {
return Ok(());
}
let real_size = self.mem_size(flags.to_size_flags());
let mut buffer = String::new();
if flags.contains(DbgFlags::HUMANIZE) {
let (value, uom) = mem_dbg::humanize_float(real_size);
if uom == " B" {
let _ = write!(&mut buffer, "{:>5} B ", real_size);
} else {
let precision = if value.abs() >= 100.0 {
1
} else if value.abs() >= 10.0 {
2
} else {
3
};
let _ = write!(&mut buffer, "{0:>4.1$} {2} ", value, precision, uom);
}
} else {
let align = mem_dbg::n_of_digits(total_size);
let _ = write!(&mut buffer, "{:>align$} B ", real_size, align = align);
}
if flags.contains(DbgFlags::PERCENTAGE) {
let percentage = if total_size == 0 {
100.0
} else {
100.0 * real_size as f64 / total_size as f64
};
let _ = write!(&mut buffer, "{:>6.2}% ", percentage);
}
if flags.contains(DbgFlags::COLOR) {
writer.write_fmt(format_args!("{}", mem_dbg::color(real_size)))?;
}
writer.write_str(&buffer)?;
if flags.contains(DbgFlags::COLOR) {
writer.write_fmt(format_args!("{}", mem_dbg::reset_color()))?;
}
if !prefix.is_empty() {
writer.write_str(&prefix[2..])?;
writer.write_char(if is_last { '╰' } else { '├' })?;
writer.write_char('â•´')?;
}
if let Some(field_name) = field_name {
writer.write_fmt(format_args!("{}", field_name))?;
}
if flags.contains(DbgFlags::TYPE_NAME) {
if flags.contains(DbgFlags::COLOR) {
writer.write_fmt(format_args!("{}", mem_dbg::type_color()))?;
}
writer.write_fmt(format_args!(": {:?}", self.0))?;
if flags.contains(DbgFlags::COLOR) {
writer.write_fmt(format_args!("{}", mem_dbg::reset_color()))?;
}
}
let padding = padded_size - core::mem::size_of_val(self.0);
if padding != 0 {
writer.write_fmt(format_args!(" [{}B]", padding))?;
}
writer.write_char('\n')?;
Ok(())
}
fn _mem_dbg_rec_on(
&self,
_writer: &mut impl core::fmt::Write,
_total_size: usize,
_max_depth: usize,
_prefix: &mut String,
_is_last: bool,
_flags: DbgFlags,
_dbg_refs: &mut mem_dbg::HashSet<usize>,
) -> core::fmt::Result {
Ok(())
}
}
impl<T: Storable, E: Endianness, B: AsRef<[u64]> + MemDbgImpl + FlatType> MemDbgImpl for VarVec<T, E, B> {
fn _mem_dbg_rec_on(
&self,
writer: &mut impl core::fmt::Write,
total_size: usize,
max_depth: usize,
prefix: &mut String,
_is_last: bool,
flags: DbgFlags,
_dbg_refs: &mut mem_dbg::HashSet<usize>,
) -> core::fmt::Result {
self.data._mem_dbg_depth_on(
writer,
total_size,
max_depth,
prefix,
Some("data"),
false,
core::mem::size_of_val(&self.data),
flags,
_dbg_refs,
)?;
self.samples._mem_dbg_depth_on(
writer,
total_size,
max_depth,
prefix,
Some("samples"),
false,
core::mem::size_of_val(&self.samples),
flags,
_dbg_refs,
)?;
self.k._mem_dbg_depth_on(
writer,
total_size,
max_depth,
prefix,
Some("k"),
false,
core::mem::size_of_val(&self.k),
flags,
_dbg_refs,
)?;
self.len._mem_dbg_depth_on(
writer,
total_size,
max_depth,
prefix,
Some("len"),
false,
core::mem::size_of_val(&self.len),
flags,
_dbg_refs,
)?;
let code_wrapper = CodeWrapper(&self.encoding);
code_wrapper._mem_dbg_depth_on(
writer,
total_size,
max_depth,
prefix,
Some("encoding"),
false, core::mem::size_of_val(&self.encoding),
flags,
_dbg_refs,
)?;
self._markers._mem_dbg_depth_on(
writer,
total_size,
max_depth,
prefix,
Some("_markers"),
true, core::mem::size_of_val(&self._markers),
flags,
_dbg_refs,
)?;
Ok(())
}
}
impl<T: Storable + 'static, E: Endianness + 'static> IntoIterator for VarVec<T, E, Vec<u64>>
where
for<'a> VarVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
+ CodesRead<E>
+ BitSeek<Error = core::convert::Infallible>,
{
type Item = T;
type IntoIter = VarVecIntoIter<T, E>;
fn into_iter(self) -> Self::IntoIter {
VarVecIntoIter::new(self)
}
}
pub type UVarVec<T> = VarVec<T, LE>;
pub type SVarVec<T> = VarVec<T, LE>;
pub type BEVarVec = VarVec<u64, BE>;
pub type LEVarVec = VarVec<u64, LE>;
pub type BESVarVec = VarVec<i64, BE>;
pub type LESVarVec = VarVec<i64, LE>;
#[deprecated(since = "0.6.0", note = "renamed to `VarVec`; use `VarVec` instead")]
pub type IntVec<T, E, B = Vec<u64>> = VarVec<T, E, B>;
#[deprecated(
since = "0.6.0",
note = "renamed to `VarVecBuilder`; use `VarVecBuilder` instead"
)]
pub type IntVecBuilder<T, E> = VarVecBuilder<T, E>;
#[deprecated(
since = "0.6.0",
note = "renamed to `VarVecFromIterBuilder`; use `VarVecFromIterBuilder` instead"
)]
pub type IntVecFromIterBuilder<T, E, I> = VarVecFromIterBuilder<T, E, I>;
#[deprecated(
since = "0.6.0",
note = "renamed to `VarVecReader`; use `VarVecReader` instead"
)]
pub type IntVecReader<'a, T, E, B> = VarVecReader<'a, T, E, B>;
#[deprecated(
since = "0.6.0",
note = "renamed to `VarVecSeqReader`; use `VarVecSeqReader` instead"
)]
pub type IntVecSeqReader<'a, T, E, B> = VarVecSeqReader<'a, T, E, B>;
#[deprecated(
since = "0.6.0",
note = "renamed to `VarVecSlice`; use `VarVecSlice` instead"
)]
pub type IntVecSlice<'a, T, E, B> = VarVecSlice<'a, T, E, B>;
#[deprecated(
since = "0.6.0",
note = "renamed to `VarVecIter`; use `VarVecIter` instead"
)]
pub type IntVecIter<'a, T, E, B> = VarVecIter<'a, T, E, B>;
#[deprecated(
since = "0.6.0",
note = "renamed to `VarVecIntoIter`; use `VarVecIntoIter` instead"
)]
pub type IntVecIntoIter<T, E> = VarVecIntoIter<T, E>;
#[deprecated(
since = "0.6.0",
note = "renamed to `VarVecError`; use `VarVecError` instead"
)]
pub type IntVecError = VarVecError;
#[deprecated(
since = "0.6.0",
note = "renamed to `VarVecSliceIter`; use `VarVecSliceIter` instead"
)]
pub type IntVecSliceIter<'a, T, E, B> = VarVecSliceIter<'a, T, E, B>;
#[deprecated(since = "0.6.0", note = "renamed to `UVarVec`; use `UVarVec` instead")]
pub type UIntVec<T> = UVarVec<T>;
#[deprecated(since = "0.6.0", note = "renamed to `SVarVec`; use `SVarVec` instead")]
pub type SIntVec<T> = SVarVec<T>;
#[deprecated(
since = "0.6.0",
note = "renamed to `BEVarVec`; use `BEVarVec` instead"
)]
pub type BEIntVec = BEVarVec;
#[deprecated(
since = "0.6.0",
note = "renamed to `LEVarVec`; use `LEVarVec` instead"
)]
pub type LEIntVec = LEVarVec;
#[deprecated(
since = "0.6.0",
note = "renamed to `BESVarVec`; use `BESVarVec` instead"
)]
pub type BESIntVec = BESVarVec;
#[deprecated(
since = "0.6.0",
note = "renamed to `LESVarVec`; use `LESVarVec` instead"
)]
pub type LESIntVec = LESVarVec;
#[deprecated(since = "0.6.0", note = "renamed to `Codec`; use `Codec` instead")]
#[allow(deprecated)]
pub use self::codec::VariableCodecSpec;
#[deprecated(
since = "0.6.0",
note = "renamed to `VarVecBitReader`; use `VarVecBitReader` instead"
)]
#[allow(dead_code)]
pub(crate) type IntVecBitReader<'a, E> = VarVecBitReader<'a, E>;
#[deprecated(
since = "0.6.0",
note = "renamed to `VarVecBitWriter`; use `VarVecBitWriter` instead"
)]
#[allow(dead_code)]
pub(crate) type IntVecBitWriter<E> = VarVecBitWriter<E>;
impl<T, E, B, O> PartialEq<O> for VarVec<T, E, B>
where
T: Storable + PartialEq,
E: Endianness,
B: AsRef<[u64]>,
O: AsRef<[T]>,
for<'a> VarVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
+ CodesRead<E>
+ BitSeek<Error = core::convert::Infallible>,
{
fn eq(&self, other: &O) -> bool {
let other_slice = other.as_ref();
if self.len() != other_slice.len() {
return false;
}
self.iter().zip(other_slice.iter()).all(|(a, b)| a == *b)
}
}