use std::{io, isize, marker::PhantomData};
use binout::{AsIs, Serializer, VByte};
use bitm::{bits_to_store, ceiling_div, get_bits57, init_bits57, n_lowest_bits, BitAccess, BitVec};
use dyn_size_of::GetSize;
#[cfg(feature = "sux")] use sux::traits::IndexedSeq;
pub trait CompressedArray {
const MAX_FOR_UNUSED: bool = false;
fn new(values: Vec<usize>, last_in_value: usize, number_of_keys: usize) -> Self;
fn get(&self, index: usize) -> usize;
fn write(&self, output: &mut dyn io::Write) -> io::Result<()>;
fn write_bytes(&self) -> usize;
fn read(input: &mut dyn io::Read) -> io::Result<Self> where Self: Sized;
}
pub trait CompressedBuilder: Sized {
fn new(num_of_values: usize, max_value: usize) -> Self;
fn push(&mut self, value: usize);
#[inline]
fn with_all(values: Vec<usize>, last: usize) -> Self {
let mut builder = Self::new(values.len(), last);
for value in values { builder.push(value); }
builder
}
}
#[cfg(feature = "cseq")] pub type CSeqEliasFano = cseq::elias_fano::Sequence<bitm::CombinedSampling<bitm::ConstCombinedSamplingDensity<11>>, bitm::BinaryRankSearch>;
#[cfg(feature = "cseq")]
impl CompressedBuilder for cseq::elias_fano::Builder {
#[inline] fn new(num_of_values: usize, max_value: usize) -> Self {
cseq::elias_fano::Builder::new(num_of_values, max_value as u64+1)
}
#[inline] fn push(&mut self, value: usize) {
unsafe { self.push_unchecked(value as u64); }
}
}
#[cfg(feature = "cseq")]
impl CompressedArray for CSeqEliasFano {
fn new(values: Vec<usize>, last: usize, _num_of_keys: usize) -> Self {
cseq::elias_fano::Builder::with_all(values, last).finish_s()
}
#[inline]
fn get(&self, index: usize) -> usize {
unsafe { self.get_unchecked(index) as usize }
}
fn write(&self, output: &mut dyn io::Write) -> io::Result<()> {
CSeqEliasFano::write(&self, output)
}
fn write_bytes(&self) -> usize {
CSeqEliasFano::write_bytes(&self)
}
fn read(input: &mut dyn io::Read) -> io::Result<Self> where Self: Sized {
CSeqEliasFano::read_s(input)
}
}
pub struct LinearRegression {
multiplier: isize, divider: isize, offset: isize, }
impl LinearRegression {
pub fn new(multiplier: usize, divider: usize, values: Vec<usize>) -> (Self, CompactFast) {
let mut max_diff = isize::MIN; let mut min_diff = isize::MAX; for (i, v) in values.iter().copied().enumerate() {
if v == usize::MAX { continue; }
let diff = (i * multiplier) as isize - (v * divider) as isize; if diff > max_diff { max_diff = diff }
if diff < min_diff { min_diff = diff }
}
let regression = LinearRegression {
multiplier: multiplier as isize,
divider: divider as isize,
offset: min_diff
};
let max_correction = (max_diff - min_diff) as usize / divider;
let mut corrections = CompactFastBuilder::new(values.len(), max_correction);
for (i, v) in values.iter().copied().enumerate() {
if v == usize::MAX {
corrections.push(0);
} else {
let correction = regression.get(i) - v as isize;
debug_assert!(correction >= 0);
let correction = correction as usize;
debug_assert!(correction <= max_correction, "{correction} <= {max_correction}");
corrections.push(correction as usize);
}
}
(regression, corrections.compact)
}
#[inline(always)] pub fn get(&self, i: usize) -> isize {
(self.multiplier * i as isize - self.offset) / self.divider
}
}
pub trait LinearRegressionConstructor {
fn new(values: &[usize], num_of_keys: usize) -> (usize, usize);
}
pub struct Simple;
impl LinearRegressionConstructor for Simple {
#[inline] fn new(values: &[usize], num_of_keys: usize) -> (usize, usize) {
(num_of_keys, values.len()+1)
}
}
pub struct LeastSquares;
impl LinearRegressionConstructor for LeastSquares {
fn new(values: &[usize], _num_of_keys: usize) -> (usize, usize) {
let mut n= 0u128;
let mut x_sum = 0;
let mut y_sum = 0;
let mut x_sqr_sum = 0;
let mut xy_sum = 0;
for (x, y) in values.iter().copied().enumerate() {
if y == usize::MAX { continue; }
n += 1;
x_sum += x as u128;
y_sum += y as u128;
x_sqr_sum += (x as u128) * (x as u128);
xy_sum += (x as u128) * (y as u128);
}
if n == 0 { return (1, 1); }
let mut multiplier = (n * xy_sum).abs_diff(x_sum * y_sum);
let mut divider = (n * x_sqr_sum).abs_diff(x_sum * x_sum);
let max_vals = (1<<(isize::BITS-2)) / n;
if multiplier > max_vals || divider > max_vals {
let div = (multiplier / max_vals).max(divider / max_vals);
let divh = div / 2;
multiplier = (multiplier + divh) / div;
divider = (divider + divh) / div;
}
(multiplier as usize, divider as usize)
}
}
pub struct LinearRegressionArray<C> {
regression: LinearRegression,
corrections: CompactFast,
constructor: PhantomData<C>
}
impl<C: LinearRegressionConstructor> CompressedArray for LinearRegressionArray<C> {
const MAX_FOR_UNUSED: bool = true;
fn new(values: Vec<usize>, _last: usize, num_of_keys: usize) -> Self {
let (multiplier, divider) = C::new(&values, num_of_keys);
let (regression, corrections) = LinearRegression::new(multiplier, divider, values);
Self { regression, corrections, constructor: PhantomData }
}
fn get(&self, index: usize) -> usize {
(self.regression.get(index) - self.corrections.get(index) as isize) as usize
}
fn write(&self, output: &mut dyn io::Write) -> io::Result<()> {
AsIs::write(output, self.regression.multiplier as usize)?;
AsIs::write(output, self.regression.divider as usize)?;
AsIs::write(output, self.regression.offset as usize)?;
self.corrections.write(output)
}
fn write_bytes(&self) -> usize {
AsIs::size(self.regression.multiplier as usize)
+ AsIs::size(self.regression.divider as usize)
+ AsIs::size(self.regression.offset as usize)
+ self.corrections.write_bytes()
}
fn read(input: &mut dyn io::Read) -> io::Result<Self> where Self: Sized {
let multiplier: usize = AsIs::read(input)?;
let divider: usize = AsIs::read(input)?;
let offset: usize = AsIs::read(input)?;
let corrections = CompactFast::read(input)?;
Ok(Self {
regression: LinearRegression {
multiplier: multiplier as isize,
divider: divider as isize,
offset: offset as isize,
},
corrections,
constructor: PhantomData
})
}
}
impl<C> GetSize for LinearRegressionArray<C> {
fn size_bytes_dyn(&self) -> usize { self.corrections.size_bytes_dyn() }
fn size_bytes_content_dyn(&self) -> usize { self.corrections.size_bytes_dyn() }
const USES_DYN_MEM: bool = true;
}
pub struct Compact {
pub items: Box<[u64]>,
pub item_size: u8,
}
pub struct CompactBuilder {
compact: Compact,
index: usize
}
impl CompressedBuilder for CompactBuilder {
fn new(num_of_values: usize, max_value: usize) -> Self {
let item_size = bits_to_store(max_value as u64);
Self {
compact: Compact { items: Box::with_zeroed_bits(item_size as usize * num_of_values), item_size },
index: 0
}
}
#[inline] fn push(&mut self, value: usize) {
self.compact.items.init_successive_bits(&mut self.index, value as u64, self.compact.item_size);
}
}
impl GetSize for Compact {
fn size_bytes_dyn(&self) -> usize { self.items.size_bytes_dyn() }
fn size_bytes_content_dyn(&self) -> usize { self.items.size_bytes_dyn() }
const USES_DYN_MEM: bool = true;
}
impl CompressedArray for Compact {
fn new(values: Vec<usize>, last: usize, _num_of_keys: usize) -> Self {
CompactBuilder::with_all(values, last).compact
}
#[inline]
fn get(&self, index: usize) -> usize {
unsafe { self.items.get_fragment_unchecked(index, self.item_size) as usize }
}
fn write(&self, output: &mut dyn io::Write) -> io::Result<()> {
VByte::write(output, self.item_size)?;
AsIs::write_array(output, &self.items)
}
fn write_bytes(&self) -> usize {
VByte::size(self.item_size) + AsIs::array_size(&self.items)
}
fn read(input: &mut dyn io::Read) -> io::Result<Self> where Self: Sized {
let item_size = VByte::read(input)?;
let items = AsIs::read_array(input)?;
Ok(Self { items, item_size })
}
}
pub struct CompactFast {
pub items: Box<[u8]>,
pub item_size: u8,
}
pub struct CompactFastBuilder {
compact: CompactFast,
first_bit: usize,
}
impl CompressedBuilder for CompactFastBuilder {
#[inline] fn new(num_of_values: usize, max_value: usize) -> Self {
let item_size = bits_to_store(max_value as u64);
Self {
compact: CompactFast { items: vec![0; ceiling_div(item_size as usize * num_of_values, 8) + 7].into_boxed_slice(), item_size },
first_bit: 0,
}
}
#[inline] fn push(&mut self, value: usize) {
unsafe{init_bits57(self.compact.items.as_mut_ptr(), self.first_bit, value as u64)};
self.first_bit += self.compact.item_size as usize;
}
}
impl GetSize for CompactFast {
fn size_bytes_dyn(&self) -> usize { self.items.size_bytes_dyn() }
fn size_bytes_content_dyn(&self) -> usize { self.items.size_bytes_dyn() }
const USES_DYN_MEM: bool = true;
}
impl CompressedArray for CompactFast {
fn new(values: Vec<usize>, last: usize, _num_of_keys: usize) -> Self {
CompactFastBuilder::with_all(values, last).compact
}
#[inline]
fn get(&self, index: usize) -> usize {
(unsafe { get_bits57(self.items.as_ptr(), index * self.item_size as usize) & n_lowest_bits(self.item_size) }) as usize
}
#[inline]
fn write(&self, output: &mut dyn io::Write) -> io::Result<()> {
VByte::write(output, self.item_size)?;
AsIs::write_array(output, &self.items)
}
fn write_bytes(&self) -> usize {
VByte::size(self.item_size) + AsIs::array_size(&self.items)
}
fn read(input: &mut dyn io::Read) -> io::Result<Self> where Self: Sized {
let item_size = VByte::read(input)?;
let items = AsIs::read_array(input)?;
Ok(Self { items, item_size })
}
}
#[cfg(feature = "sux")] pub struct SuxEliasFano(sux::dict::elias_fano::EfSeq);
#[cfg(feature = "sux")] impl CompressedBuilder for sux::dict::EliasFanoBuilder {
#[inline] fn new(num_of_values: usize, max_value: usize) -> Self {
sux::dict::EliasFanoBuilder::new(num_of_values, max_value)
}
#[inline] fn push(&mut self, value: usize) {
unsafe{ self.push_unchecked(value); }
}
}
#[cfg(feature = "sux")]
impl CompressedArray for SuxEliasFano {
fn new(values: Vec<usize>, last: usize, _num_of_keys: usize) -> Self {
SuxEliasFano(sux::dict::EliasFanoBuilder::with_all(values, last).build_with_seq())
}
#[inline]
fn get(&self, index: usize) -> usize {
unsafe { self.0.get_unchecked(index) }
}
fn write(&self, output: &mut dyn io::Write) -> io::Result<()> {
use std::io::Write;
use epserde::ser::Serialize;
let mut bw = std::io::BufWriter::new(output);
match unsafe {self.0.serialize(&mut bw)} {
Ok(_) => Ok(()),
Err(epserde::ser::Error::FileOpenError(io_err)) => Err(io_err),
Err(e) => Err(io::Error::new(io::ErrorKind::Other, e))
}?;
bw.flush()
}
fn write_bytes(&self) -> usize {
let mut buf = Vec::<u8>::new();
use epserde::ser::Serialize;
unsafe { self.0.serialize(&mut buf).unwrap(); }
buf.len()
}
fn read(input: &mut dyn io::Read) -> io::Result<Self> where Self: Sized {
use epserde::deser::Deserialize;
match unsafe{ sux::dict::elias_fano::EfSeq::deserialize_full(&mut std::io::BufReader::with_capacity(1, input))} {
Ok(v) => Ok(Self(v)),
Err(epserde::deser::Error::FileOpenError(io_err)) => Err(io_err),
Err(e) => Err(io::Error::new(io::ErrorKind::Other, e))
}
}
}
#[cfg(feature = "sux")]
impl GetSize for SuxEliasFano {
fn size_bytes_dyn(&self) -> usize {
mem_dbg::MemSize::mem_size(&self.0, mem_dbg::SizeFlags::default()) - std::mem::size_of_val(self)
}
fn size_bytes_content_dyn(&self) -> usize {
mem_dbg::MemSize::mem_size(&self.0, mem_dbg::SizeFlags::default() | mem_dbg::SizeFlags::CAPACITY) - std::mem::size_of_val(self)
}
fn size_bytes(&self) -> usize {
mem_dbg::MemSize::mem_size(&self.0, mem_dbg::SizeFlags::default())
}
const USES_DYN_MEM: bool = true;
}
#[cfg(feature = "cacheline-ef")]
pub struct CachelineEF(cacheline_ef::CachelineEfVec);
#[cfg(feature = "cacheline-ef")]
impl CompressedArray for CachelineEF {
fn new(values: Vec<usize>, _last: usize, _num_of_keys: usize) -> Self {
let v: Vec<_> = values.iter().map(|v| *v as u64).collect();
CachelineEF(cacheline_ef::CachelineEfVec::new(&v))
}
fn get(&self, index: usize) -> usize {
unsafe { self.0.index_unchecked(index) as usize }
}
fn write(&self, _output: &mut dyn io::Write) -> io::Result<()> {
todo!()
}
fn write_bytes(&self) -> usize {
todo!()
}
fn read(_input: &mut dyn io::Read) -> io::Result<Self> where Self: Sized {
todo!()
}
}
#[cfg(feature = "cacheline-ef")]
impl GetSize for CachelineEF {
fn size_bytes_dyn(&self) -> usize {
mem_dbg::MemSize::mem_size(&self.0, mem_dbg::SizeFlags::default()) - std::mem::size_of_val(self)
}
fn size_bytes_content_dyn(&self) -> usize {
mem_dbg::MemSize::mem_size(&self.0, mem_dbg::SizeFlags::default() | mem_dbg::SizeFlags::CAPACITY) - std::mem::size_of_val(self)
}
fn size_bytes(&self) -> usize {
mem_dbg::MemSize::mem_size(&self.0, mem_dbg::SizeFlags::default())
}
const USES_DYN_MEM: bool = true;
}
#[cfg(feature = "sux")] pub type DefaultCompressedArray = SuxEliasFano;
#[cfg(all(feature = "cacheline-ef", not(feature = "sux")))] pub type DefaultCompressedArray = CachelineEF;
#[cfg(all(feature = "cseq", not(feature = "sux"), not(feature="cacheline-ef")))] pub type DefaultCompressedArray = CSeqEliasFano;
#[cfg(all(not(feature="cseq"), not(feature = "sux"), not(feature="cacheline-ef")))] pub type DefaultCompressedArray = Compact;