use common_traits::SelectInWord;
use std::cmp::min;
const L: usize = 44;
#[derive(Default, Clone, mem_dbg::MemSize, mem_dbg::MemDbg)]
#[cfg_attr(feature = "epserde", derive(epserde::prelude::Epserde))]
pub struct CachelineEfVec<E = Vec<CachelineEf>> {
ef: E,
len: usize,
}
impl CachelineEfVec<Vec<CachelineEf>> {
pub fn try_new(vals: &[u64]) -> Option<Self> {
let mut p = Vec::with_capacity(vals.len().div_ceil(L));
for i in (0..vals.len()).step_by(L) {
p.push(CachelineEf::try_new(&vals[i..min(i + L, vals.len())])?);
}
Some(Self {
ef: p,
len: vals.len(),
})
}
pub fn new(vals: &[u64]) -> Self {
Self::try_new(vals).expect("Values are too sparse!")
}
}
impl<E: AsRef<[CachelineEf]>> CachelineEfVec<E> {
pub fn index(&self, index: usize) -> u64 {
assert!(
index < self.len,
"Index {index} out of bounds. Length is {}.",
self.len
);
unsafe { self.ef.as_ref().get_unchecked(index / L).index(index % L) }
}
pub fn len(&self) -> usize {
self.len
}
pub unsafe fn index_unchecked(&self, index: usize) -> u64 {
(*self.ef.as_ref().get_unchecked(index / L)).index(index % L)
}
pub fn prefetch(&self, index: usize) {
prefetch_index(self.ef.as_ref(), index / L);
}
pub fn size_in_bytes(&self) -> usize {
std::mem::size_of_val(self.ef.as_ref())
}
}
#[derive(Clone, Copy, mem_dbg::MemSize, mem_dbg::MemDbg)]
#[repr(C)]
#[repr(align(64))]
#[cfg_attr(feature = "epserde", derive(epserde::prelude::Epserde))]
#[cfg_attr(feature = "epserde", zero_copy)]
#[copy_type]
pub struct CachelineEf {
high_boundaries: [u64; 2],
reduced_offset: u32,
low_bits: [u8; L],
}
impl CachelineEf {
fn try_new(vals: &[u64]) -> Option<Self> {
assert!(!vals.is_empty(), "List of values must not be empty.");
assert!(
vals.len() <= L,
"Number of values must be at most {L}, but is {}",
vals.len()
);
let l = vals.len();
if vals[l - 1] - vals[0] > 256 * (128 - L as u64) {
return None;
}
assert!(
vals[l - 1] < (1u64 << 40),
"Last value {} is too large! Must be less than 2^40={}",
vals[l - 1],
1u64 << 40
);
let offset = vals[0] >> 8;
assert!(
offset <= u32::MAX as u64,
"vals[0] does not fit in 40 bits."
);
let mut low_bits = [0u8; L];
for (i, &v) in vals.iter().enumerate() {
low_bits[i] = (v & 0xff) as u8;
}
let mut high_boundaries = [0u64; 2];
let mut last = 0;
for (i, &v) in vals.iter().enumerate() {
assert!(i >= last, "Values are not sorted! {last} > {i}");
last = i;
let idx = i + ((v >> 8) - offset) as usize;
assert!(idx < 128, "Value {} is too large!", v - offset);
high_boundaries[idx / 64] |= 1 << (idx % 64);
}
Some(Self {
reduced_offset: offset as u32,
high_boundaries,
low_bits,
})
}
pub fn index(&self, idx: usize) -> u64 {
let p = self.high_boundaries[0].count_ones() as usize;
let one_pos = if idx < p {
self.high_boundaries[0].select_in_word(idx)
} else {
64 + self.high_boundaries[1].select_in_word(idx - p)
};
256 * self.reduced_offset as u64 + 256 * (one_pos - idx) as u64 + self.low_bits[idx] as u64
}
}
fn prefetch_index<T>(s: &[T], index: usize) {
let ptr = unsafe { s.as_ptr().add(index) as *const u64 };
#[cfg(target_arch = "x86_64")]
unsafe {
std::arch::x86_64::_mm_prefetch(ptr as *const i8, std::arch::x86_64::_MM_HINT_T0);
}
#[cfg(target_arch = "x86")]
unsafe {
std::arch::x86::_mm_prefetch(ptr as *const i8, std::arch::x86::_MM_HINT_T0);
}
#[cfg(target_arch = "aarch64")]
unsafe {
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64")))]
{
}
}
#[test]
fn test() {
let max = (128 - L) * 256;
let offset = rand::random::<u64>() % (1 << 40);
let mut vals = [0u64; L];
for _ in 0..1000000 {
for v in &mut vals {
*v = offset + rand::random::<u64>() % max as u64;
}
vals.sort_unstable();
let lef = CachelineEf::try_new(&vals).unwrap();
for i in 0..L {
assert_eq!(lef.index(i), vals[i], "error; full list: {:?}", vals);
}
}
}
#[test]
fn size() {
assert_eq!(std::mem::size_of::<CachelineEf>(), 64);
}