const DENSE_THRESHOLD: usize = 64;
const PARTITION_THRESHOLD: usize = 256;
const OPTIMAL_THRESHOLD: usize = 4096;
use crate::error::{Result, ZiporaError};
use crate::algorithms::bit_ops::select_in_word;
use crate::succinct::BitVector;
use std::cmp::Ordering;
use super::basic::{EliasFano, EliasFanoCursor};
use super::partitioned::{PartitionedEliasFano, PartitionedEliasFanoCursor};
use super::optimal::{OptimalPartitionedEliasFano, OptimalPefCursor};
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum HybridPostingList {
Dense(Vec<u32>),
EliasFano(EliasFano),
Partitioned(PartitionedEliasFano),
Optimal(OptimalPartitionedEliasFano),
}
impl HybridPostingList {
pub fn from_sorted(values: &[u32]) -> Self {
let n = values.len();
if n <= DENSE_THRESHOLD {
Self::Dense(values.to_vec())
} else if n <= PARTITION_THRESHOLD {
Self::EliasFano(EliasFano::from_sorted(values))
} else if n <= OPTIMAL_THRESHOLD {
Self::Partitioned(PartitionedEliasFano::from_sorted(values))
} else {
Self::Optimal(OptimalPartitionedEliasFano::from_sorted(values))
}
}
pub fn from_sorted_u64(values: &[u64]) -> Self {
let n = values.len();
if n <= DENSE_THRESHOLD {
Self::Dense(values.iter().map(|&v| v as u32).collect())
} else if n <= PARTITION_THRESHOLD {
Self::EliasFano(EliasFano::from_sorted_u64(values))
} else if n <= OPTIMAL_THRESHOLD {
Self::Partitioned(PartitionedEliasFano::from_sorted_u64(values))
} else {
Self::Optimal(OptimalPartitionedEliasFano::from_sorted_u64(values))
}
}
pub fn with_encoding(values: &[u32], encoding: PostingEncoding) -> Self {
match encoding {
PostingEncoding::Dense => Self::Dense(values.to_vec()),
PostingEncoding::EliasFano => Self::EliasFano(EliasFano::from_sorted(values)),
PostingEncoding::Partitioned => Self::Partitioned(PartitionedEliasFano::from_sorted(values)),
PostingEncoding::Optimal => Self::Optimal(OptimalPartitionedEliasFano::from_sorted(values)),
}
}
#[inline]
pub fn len(&self) -> usize {
match self {
Self::Dense(v) => v.len(),
Self::EliasFano(ef) => ef.len(),
Self::Partitioned(pef) => pef.len(),
Self::Optimal(opef) => opef.len(),
}
}
#[inline]
pub fn is_empty(&self) -> bool { self.len() == 0 }
pub fn size_bytes(&self) -> usize {
match self {
Self::Dense(v) => v.len() * 4 + 8, Self::EliasFano(ef) => ef.size_bytes() + 8,
Self::Partitioned(pef) => pef.size_bytes() + 8,
Self::Optimal(opef) => opef.size_bytes() + 8,
}
}
#[inline]
pub fn bits_per_element(&self) -> f64 {
if self.len() == 0 { return 0.0; }
(self.size_bytes() * 8) as f64 / self.len() as f64
}
pub fn get(&self, index: usize) -> Option<u64> {
match self {
Self::Dense(v) => v.get(index).map(|&x| x as u64),
Self::EliasFano(ef) => ef.get(index),
Self::Partitioned(pef) => pef.get(index),
Self::Optimal(opef) => opef.get(index),
}
}
#[inline]
pub fn next_geq(&self, target: u64) -> Option<(usize, u64)> {
match self {
Self::Dense(v) => {
match v.binary_search(&(target as u32)) {
Ok(i) => Some((i, v[i] as u64)),
Err(i) => {
if i < v.len() {
Some((i, v[i] as u64))
} else {
None
}
}
}
}
Self::EliasFano(ef) => ef.next_geq(target),
Self::Partitioned(pef) => pef.next_geq(target),
Self::Optimal(opef) => opef.next_geq(target),
}
}
pub fn encoding(&self) -> PostingEncoding {
match self {
Self::Dense(_) => PostingEncoding::Dense,
Self::EliasFano(_) => PostingEncoding::EliasFano,
Self::Partitioned(_) => PostingEncoding::Partitioned,
Self::Optimal(_) => PostingEncoding::Optimal,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum PostingEncoding {
Dense,
EliasFano,
Partitioned,
Optimal,
}