use std::{mem::size_of, ops::Deref};
use crate::{Key, Pgm, Result};
const LINEAR_THRESHOLD: usize = 32;
#[cfg_attr(feature = "bitcode", derive(bitcode::Encode, bitcode::Decode))]
#[derive(Clone, Debug)]
pub struct PgmData<K: Key> {
pub pgm: Pgm<K>,
pub data: Vec<K>,
}
impl<K: Key> Deref for PgmData<K> {
type Target = Pgm<K>;
#[inline]
fn deref(&self) -> &Self::Target {
&self.pgm
}
}
impl<K: Key> PgmData<K> {
pub fn load(data: Vec<K>, epsilon: usize, check_sorted: bool) -> Result<Self> {
let pgm = Pgm::new(&data, epsilon, check_sorted)?;
Ok(Self { pgm, data })
}
#[inline]
#[must_use]
pub fn data(&self) -> &[K] {
&self.data
}
#[inline]
#[must_use]
pub fn get(&self, key: K) -> Option<usize> {
let (lo, hi) = self.pgm.predict_range(key);
if lo >= self.data.len() {
return None;
}
let hi = hi.min(self.data.len());
let len = hi - lo;
if len <= LINEAR_THRESHOLD {
for i in lo..hi {
let v = unsafe { *self.data.get_unchecked(i) };
if v == key {
return Some(i);
}
if v > key {
return None;
}
}
None
} else {
unsafe { self.data.get_unchecked(lo..hi) }
.binary_search(&key)
.ok()
.map(|p| lo + p)
}
}
#[inline]
pub fn get_many<'a, I>(&'a self, keys: I) -> impl Iterator<Item = Option<usize>> + 'a
where
I: IntoIterator<Item = K> + 'a,
<I as IntoIterator>::IntoIter: 'a,
{
keys.into_iter().map(move |k| self.get(k))
}
#[inline]
pub fn count_hits<I>(&self, keys: I) -> usize
where
I: IntoIterator<Item = K>,
{
keys.into_iter().filter(|&k| self.get(k).is_some()).count()
}
#[inline]
#[must_use]
pub fn memory_usage(&self) -> usize {
self.data.len() * size_of::<K>() + self.pgm.mem_usage()
}
#[inline]
#[must_use]
pub fn predict_pos(&self, key: K) -> usize {
self.pgm.predict(key)
}
#[inline]
#[must_use]
pub fn stats(&self) -> crate::PgmStats {
crate::PgmStats {
segments: self.pgm.segment_count(),
avg_segment_size: self.pgm.avg_segment_size(),
memory_bytes: self.memory_usage(),
}
}
}