#![cfg(target_pointer_width = "64")]
use std::io::{Read, Write};
use anyhow::{anyhow, Result};
use num_traits::ToPrimitive;
use crate::bit_vectors::{self, BitVector, Rank, Rank9Sel};
use crate::int_vectors::{Access, Build, CompactVector, NumVals};
use crate::utils;
use crate::Serializable;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DacsOpt {
data: Vec<CompactVector>,
flags: Vec<Rank9Sel>,
}
impl DacsOpt {
pub fn from_slice<T>(vals: &[T], max_levels: Option<usize>) -> Result<Self>
where
T: ToPrimitive,
{
let max_levels = max_levels.unwrap_or(64);
if !(1..=64).contains(&max_levels) {
return Err(anyhow!(
"max_levels must be in 1..=64, but got {max_levels}"
));
}
if vals.is_empty() {
return Ok(Self::default());
}
for x in vals {
x.to_usize()
.ok_or_else(|| anyhow!("vals must consist only of values castable into usize."))?;
}
let widths = Self::compute_opt_widths(vals, max_levels);
Self::build(vals, &widths)
}
fn compute_opt_widths<T>(vals: &[T], max_levels: usize) -> Vec<usize>
where
T: ToPrimitive,
{
assert!(!vals.is_empty());
assert_ne!(max_levels, 0);
let mut maxv = 0;
for x in vals {
maxv = maxv.max(x.to_usize().unwrap());
}
let num_bits = utils::needed_bits(maxv);
let max_levels = max_levels.min(num_bits);
let nums_ints = {
let mut nums_ints = vec![0; num_bits + 1];
for x in vals {
nums_ints[utils::needed_bits(x.to_usize().unwrap()) - 1] += 1;
}
for j in (0..num_bits).rev() {
nums_ints[j] += nums_ints[j + 1];
}
nums_ints
};
debug_assert_eq!(nums_ints[0], vals.len());
debug_assert_eq!(*nums_ints.last().unwrap(), 0);
let mut dp_s = vec![vec![0; max_levels]; num_bits + 1];
let mut dp_b = vec![vec![0; max_levels]; num_bits + 1];
for j in 0..num_bits {
dp_s[j][0] = (num_bits - j) * nums_ints[j];
dp_b[j][0] = num_bits - j;
}
for r in 1..max_levels {
for j in 0..num_bits {
dp_s[j][r] = usize::MAX;
for b in 1..=num_bits - j {
let c = (b + 1) * nums_ints[j] + dp_s[j + b][r - 1];
if c <= dp_s[j][r] {
dp_s[j][r] = c;
dp_b[j][r] = b;
}
}
}
}
let mut min_level_idx = 0;
for r in 1..max_levels {
if dp_s[0][r] < dp_s[0][min_level_idx] {
min_level_idx = r;
}
}
let num_levels = min_level_idx + 1;
let mut widths = vec![0; num_levels];
let (mut j, mut r) = (0, 0);
while j < num_bits {
widths[r] = dp_b[j][num_levels - r - 1];
j += widths[r];
r += 1;
}
assert_eq!(j, num_bits);
assert_eq!(r, num_levels);
assert_eq!(widths.iter().sum::<usize>(), num_bits);
widths
}
fn build<T>(vals: &[T], widths: &[usize]) -> Result<Self>
where
T: ToPrimitive,
{
assert!(!vals.is_empty());
assert!(!widths.is_empty());
if widths.len() == 1 {
let mut data = CompactVector::with_capacity(vals.len(), widths[0]).unwrap();
vals.iter()
.for_each(|x| data.push_int(x.to_usize().unwrap()).unwrap());
return Ok(Self {
data: vec![data],
flags: vec![],
});
}
let mut data: Vec<_> = widths
.iter()
.map(|&w| CompactVector::new(w).unwrap())
.collect();
let mut flags = vec![BitVector::default(); widths.len() - 1];
for x in vals {
let mut x = x.to_usize().unwrap();
for (j, &width) in widths.iter().enumerate() {
let mask = (1 << width) - 1;
data[j].push_int(x & mask).unwrap();
x >>= width;
if j == widths.len() - 1 {
assert_eq!(x, 0);
break;
} else if x == 0 {
flags[j].push_bit(false);
break;
}
flags[j].push_bit(true);
}
}
let flags = flags.into_iter().map(Rank9Sel::new).collect();
Ok(Self { data, flags })
}
pub const fn iter(&self) -> Iter {
Iter::new(self)
}
#[inline(always)]
pub fn len(&self) -> usize {
self.data[0].len()
}
#[inline(always)]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline(always)]
pub fn num_levels(&self) -> usize {
self.data.len()
}
#[inline(always)]
pub fn widths(&self) -> Vec<usize> {
self.data.iter().map(|d| d.width()).collect()
}
}
impl Default for DacsOpt {
fn default() -> Self {
Self {
data: vec![CompactVector::default()],
flags: vec![],
}
}
}
impl Build for DacsOpt {
fn build_from_slice<T>(vals: &[T]) -> Result<Self>
where
T: ToPrimitive,
Self: Sized,
{
Self::from_slice(vals, None)
}
}
impl NumVals for DacsOpt {
fn num_vals(&self) -> usize {
self.len()
}
}
impl Access for DacsOpt {
fn access(&self, mut pos: usize) -> Option<usize> {
if self.len() <= pos {
return None;
}
let mut x = 0;
let mut width = 0;
for j in 0..self.num_levels() {
x |= self.data[j].access(pos).unwrap() << width;
if j == self.num_levels() - 1
|| !bit_vectors::Access::access(&self.flags[j], pos).unwrap()
{
break;
}
pos = self.flags[j].rank1(pos).unwrap();
width += self.data[j].width();
}
Some(x)
}
}
pub struct Iter<'a> {
seq: &'a DacsOpt,
pos: usize,
}
impl<'a> Iter<'a> {
pub const fn new(seq: &'a DacsOpt) -> Self {
Self { seq, pos: 0 }
}
}
impl Iterator for Iter<'_> {
type Item = usize;
#[inline(always)]
fn next(&mut self) -> Option<Self::Item> {
if self.pos < self.seq.len() {
let x = self.seq.access(self.pos).unwrap();
self.pos += 1;
Some(x)
} else {
None
}
}
#[inline(always)]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.seq.len(), Some(self.seq.len()))
}
}
impl Serializable for DacsOpt {
fn serialize_into<W: Write>(&self, mut writer: W) -> Result<usize> {
let mut mem = 0;
mem += self.data.serialize_into(&mut writer)?;
mem += self.flags.serialize_into(&mut writer)?;
Ok(mem)
}
fn deserialize_from<R: Read>(mut reader: R) -> Result<Self> {
let data = Vec::<CompactVector>::deserialize_from(&mut reader)?;
let flags = Vec::<Rank9Sel>::deserialize_from(&mut reader)?;
Ok(Self { data, flags })
}
fn size_in_bytes(&self) -> usize {
self.data.size_in_bytes() + self.flags.size_in_bytes()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_opt_witdhs_0() {
let widths = DacsOpt::compute_opt_widths(&[0b0, 0b0, 0b0, 0b0], 3);
assert_eq!(widths, vec![1]);
}
#[test]
fn test_opt_witdhs_1() {
let widths = DacsOpt::compute_opt_widths(&[0b11, 0b1, 0b1111, 0b11], 1);
assert_eq!(widths, vec![4]);
}
#[test]
fn test_opt_witdhs_2() {
let widths = DacsOpt::compute_opt_widths(&[0b11, 0b1, 0b1111, 0b11], 2);
assert_eq!(widths, vec![2, 2]);
}
#[test]
fn test_opt_witdhs_3() {
let widths = DacsOpt::compute_opt_widths(&[0b11, 0b1, 0b1111, 0b11], 3);
assert_eq!(widths, vec![2, 2]);
}
#[test]
fn test_opt_witdhs_4() {
let widths = DacsOpt::compute_opt_widths(&[0b1111, 0b1111, 0b1111, 0b1111], 3);
assert_eq!(widths, vec![4]);
}
#[test]
fn test_basic_1() {
let seq = DacsOpt::from_slice(&[0b11, 0b1, 0b1111, 0b11], None).unwrap();
assert_eq!(
seq.data,
vec![
CompactVector::from_slice(&[0b11, 0b1, 0b11, 0b11]).unwrap(),
CompactVector::from_slice(&[0b11]).unwrap(),
]
);
assert_eq!(
seq.flags,
vec![Rank9Sel::from_bits([false, false, true, false])]
);
assert!(!seq.is_empty());
assert_eq!(seq.len(), 4);
assert_eq!(seq.num_levels(), 2);
assert_eq!(seq.widths(), vec![2, 2]);
assert_eq!(seq.access(0), Some(0b11));
assert_eq!(seq.access(1), Some(0b1));
assert_eq!(seq.access(2), Some(0b1111));
assert_eq!(seq.access(3), Some(0b11));
}
#[test]
fn test_basic_2() {
let seq = DacsOpt::from_slice(&[5, 0, 100000, 334], None).unwrap();
assert_eq!(
seq.data,
vec![
CompactVector::from_slice(&[0b101, 0b000, 0b000, 0b110]).unwrap(),
CompactVector::from_slice(&[0b010100, 0b101001]).unwrap(),
CompactVector::from_slice(&[0b11000011]).unwrap(),
]
);
assert_eq!(
seq.flags,
vec![
Rank9Sel::from_bits([false, false, true, true]),
Rank9Sel::from_bits([true, false])
]
);
assert!(!seq.is_empty());
assert_eq!(seq.len(), 4);
assert_eq!(seq.num_levels(), 3);
assert_eq!(seq.widths(), vec![3, 6, 8]);
assert_eq!(seq.access(0), Some(5));
assert_eq!(seq.access(1), Some(0));
assert_eq!(seq.access(2), Some(100000));
assert_eq!(seq.access(3), Some(334));
}
#[test]
fn test_empty() {
let seq = DacsOpt::from_slice::<usize>(&[], None).unwrap();
assert!(seq.is_empty());
assert_eq!(seq.len(), 0);
assert_eq!(seq.num_levels(), 1);
assert_eq!(seq.widths(), vec![0]);
}
#[test]
fn test_all_zeros() {
let seq = DacsOpt::from_slice(&[0, 0, 0, 0], None).unwrap();
assert!(!seq.is_empty());
assert_eq!(seq.len(), 4);
assert_eq!(seq.num_levels(), 1);
assert_eq!(seq.widths(), vec![1]);
assert_eq!(seq.access(0), Some(0));
assert_eq!(seq.access(1), Some(0));
assert_eq!(seq.access(2), Some(0));
assert_eq!(seq.access(3), Some(0));
}
#[test]
fn test_from_slice_uncastable() {
let e = DacsOpt::from_slice(&[u128::MAX], None);
assert_eq!(
e.err().map(|x| x.to_string()),
Some("vals must consist only of values castable into usize.".to_string())
);
}
#[test]
fn test_serialize() {
let mut bytes = vec![];
let seq = DacsOpt::from_slice(&[0b11, 0b1, 0b1111, 0b11], None).unwrap();
let size = seq.serialize_into(&mut bytes).unwrap();
let other = DacsOpt::deserialize_from(&bytes[..]).unwrap();
assert_eq!(seq, other);
assert_eq!(size, bytes.len());
assert_eq!(size, seq.size_in_bytes());
}
}