use csf::utils::ceiling_div;
#[cfg(target_arch = "x86")]
use std::arch::x86::{__m128i, _mm_setzero_si128};
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::{__m128i, _mm_setzero_si128};
use std::os::raw::c_int;
use arrayvec::ArrayVec;
use csf::bits_to_store;
use superslice::*;
use std::mem::MaybeUninit;
use co_sort::*;
use std::collections::HashMap;
#[allow(improper_ctypes)]
#[link(name = "simdcomp")]
extern {
fn simdmaxbitsd1(initvalue: u32, input: *const u32) -> u32;
fn simdpackwithoutmaskd1(initvalue: u32, input: *const u32, output: *mut __m128i, bit: u32);
fn simdpack_shortlength(input: *const u32, length: c_int, out: *mut __m128i, bit: u32) -> *mut __m128i;
fn simdpackwithoutmask(input: *const u32, output: *mut __m128i, bit: u32);
fn simdsearchd1(initOffset: *mut __m128i, input: *const __m128i, bit: u32, key: u32, presult: *mut u32) -> c_int;
fn simdselectFOR(initvalue: u32, input: *const __m128i, bit: u32, slot: c_int) -> u32;
}
pub struct ClusterBP128 {
block_second_positions: Box<[u32]>,
blocks_metadata: Box<[u32]>,
blocks_nimbers: Box<[u8]>,
data: Box<[__m128i]>
}
impl ClusterBP128 {
const NIMBERS_IN_BLOCK: usize = 128 * 2 + 2;
fn metadata(data_index: u32, bits_per_entry: u8, max_nimber: u8) -> u32 {
assert!(data_index < (1u32 << 23));
assert!(bits_per_entry < (1u8 << 5));
(data_index << 9) | ((bits_per_entry as u32) << 4) | (max_nimber as u32)
}
fn get_metadata(&self, block_index: usize) -> (u32, u8, u8) {
let rest = self.blocks_metadata[block_index];
let max_nimber = (rest & 15) as u8;
let bits_per_entry = ((rest >> 4) & ((1<<5)-1)) as u8;
return (rest >> 9, bits_per_entry, max_nimber);
}
fn set_nimber_of_first(block_nimber: &mut u8, nimber_to_set: u8) {
debug_assert!(nimber_to_set < 16);
*block_nimber |= nimber_to_set;
}
fn get_nimber_of_first(block_nimber: u8) -> u8 { block_nimber & 15 }
fn set_nimber_of_second(block_nimber: &mut u8, nimber_to_set: u8) {
debug_assert!(nimber_to_set < 16);
*block_nimber |= nimber_to_set << 4;
}
fn get_nimber_of_second(block_nimber: u8) -> u8 { block_nimber >> 4 }
fn bits_to_store_2_nimbers(max_nimber: u8) -> u8 {
return bits_to_store!(max_nimber*(max_nimber+2));
}
fn prefilter(positions: &[u32], nimbers: &[u8]) -> (Vec<u32>, Vec<u8>) {
assert_eq!(positions.len(), nimbers.len());
let mut pre_filtered_positions = Vec::with_capacity(positions.len());
let mut pre_filtered_nimbers = Vec::with_capacity(nimbers.len());
{
let mut i = 0usize;
while i+1 < positions.len() {
if nimbers[i] != nimbers[i+1] {
pre_filtered_positions.push(positions[i]);
pre_filtered_nimbers.push(nimbers[i]);
i += 1;
pre_filtered_positions.push(positions[i]); pre_filtered_nimbers.push(nimbers[i]);
}
i += 1;
}
if i < positions.len() {
pre_filtered_positions.push(*positions.last().unwrap());
pre_filtered_nimbers.push(*nimbers.last().unwrap());
}
}
(pre_filtered_positions, pre_filtered_nimbers)
}
fn from_prefiltered(positions: &[u32], nimbers: &[u8]) -> Self {
let blocks_count = ceiling_div(nimbers.len(), Self::NIMBERS_IN_BLOCK);
let mut block_second_positions = vec![0u32; blocks_count].into_boxed_slice();
let mut blocks_metadata = vec![0u32; blocks_count].into_boxed_slice();
let mut blocks_nimbers = vec![0u8; blocks_count].into_boxed_slice();
let mut data: Option<Box<[__m128i]>> = None;
for &store_phase in [false, true].iter() { let mut data_size = 0usize;
for block_i in 0..blocks_count {
let last_block = block_i + 1 == blocks_count;
let idx_beg = block_i * Self::NIMBERS_IN_BLOCK;
let (idx_end, size) = if last_block {
(nimbers.len(), nimbers.len() - idx_beg)
} else {
(idx_beg + Self::NIMBERS_IN_BLOCK, Self::NIMBERS_IN_BLOCK)
};
if store_phase { Self::set_nimber_of_first(&mut blocks_nimbers[block_i], nimbers[idx_beg]); }
if size == 1 { assert!(last_block); if store_phase {
blocks_metadata[block_i] = Self::metadata(data_size as _, 0, 0); block_second_positions[block_i] = positions[idx_beg].checked_add(1).unwrap();
}
break; }
if store_phase {
block_second_positions[block_i] = positions[idx_beg + 1];
Self::set_nimber_of_second(&mut blocks_nimbers[block_i], nimbers[idx_beg + 1]);
}
if size == 2 { debug_assert!(last_block); if store_phase { blocks_metadata[block_i] = Self::metadata(data_size as _, 0, 0); } break; }
let max_nimber = *nimbers[idx_beg + 2..idx_end].iter().max().unwrap();
let mut to_compress = ArrayVec::<[u32; 128]>::new();
let mut nimbers_to_compress = ArrayVec::<[u32; 128]>::new();
let mut uncompress_i = 2usize; let mut recent_value_compressed = 0u32;
while uncompress_i + 1 < size { let p = idx_beg + uncompress_i + 1;
recent_value_compressed = positions[p] - positions[idx_beg + 1];
to_compress.push(recent_value_compressed);
nimbers_to_compress.push(nimbers[p] as u32 * (max_nimber + 1) as u32 + nimbers[p-1] as u32);
uncompress_i += 2;
}
if uncompress_i < size { recent_value_compressed = positions.last().unwrap() - positions[idx_beg + 1];
to_compress.push(recent_value_compressed);
let n = *nimbers.last().unwrap() as u32;
nimbers_to_compress.push(n * (max_nimber + 1) as u32 + n);
}
let to_compress_real_size = to_compress.len();
while to_compress.len() < 128 { recent_value_compressed = recent_value_compressed.checked_add(1).unwrap();
to_compress.push(recent_value_compressed);
nimbers_to_compress.push(0);
}
let positions_bits = unsafe { simdmaxbitsd1(0, to_compress.as_ptr()) };
let nimbers_bits = Self::bits_to_store_2_nimbers(max_nimber);
if store_phase {
blocks_metadata[block_i] = Self::metadata(data_size as _, positions_bits as _, max_nimber);
unsafe { simdpackwithoutmaskd1(0, to_compress.as_mut_ptr(), data.as_mut().unwrap().as_mut_ptr().offset(data_size as _), positions_bits) };
let nimbers_begin = data.as_mut().unwrap().as_mut_ptr().wrapping_offset(data_size as isize + positions_bits as isize);
if to_compress.len() != to_compress_real_size {
assert!(last_block); let nimbers_end = unsafe {
simdpack_shortlength(nimbers_to_compress.as_ptr(), to_compress_real_size as _, nimbers_begin, nimbers_bits as _)
};
let compressed = unsafe { nimbers_end.offset_from(nimbers_begin) };
if compressed != ceiling_div(nimbers_bits as usize * to_compress_real_size, 128) as _ {
panic!("Error while saving {} nimbers pair, each on {} bits: {} != {}",
to_compress_real_size, nimbers_bits, compressed, ceiling_div(nimbers_bits as usize * to_compress_real_size, 128));
}
} else {
unsafe { simdpackwithoutmask(nimbers_to_compress.as_ptr(), nimbers_begin, nimbers_bits as _); }
}
}
data_size += positions_bits as usize + if last_block {
ceiling_div(nimbers_bits as usize * to_compress_real_size, 128)
} else {
nimbers_bits as usize
};
}
if !store_phase {
data = Some(vec![unsafe { _mm_setzero_si128() }; data_size].into_boxed_slice());
}
}
Self {
block_second_positions,
blocks_metadata,
blocks_nimbers,
data: data.unwrap()
}
}
#[inline]
pub fn from_sorted(positions: &[u32], nimbers: &[u8]) -> Self {
let (positions, nimbers) = Self::prefilter(positions, nimbers);
Self::from_prefiltered(&positions, &nimbers)
}
#[inline]
pub fn from_unsorted(positions: &mut [u32], nimbers: &mut [u8]) -> Self {
co_sort!(positions, nimbers);
Self::from_sorted(positions, nimbers)
}
pub fn get(&self, p: u32) -> u8 {
let mut block_index = self.block_second_positions.upper_bound(&p);
if block_index == 0 { return Self::get_nimber_of_first(self.blocks_nimbers[0]);
}
block_index -= 1; let key = p - self.block_second_positions[block_index];
if key == 0 { return Self::get_nimber_of_second(self.blocks_nimbers[block_index]);
}
let (data_index, bits_per_entry, mut max_nimber) = self.get_metadata(block_index);
let mut found_key = MaybeUninit::<u32>::uninit();
let mut offset = unsafe{_mm_setzero_si128()};
let data_block_begin = self.data.as_ptr().wrapping_offset(data_index as isize);
let in_block_index = unsafe{simdsearchd1(&mut offset, data_block_begin, bits_per_entry as u32, key, found_key.as_mut_ptr())};
if in_block_index == 128 { return Self::get_nimber_of_first(self.blocks_nimbers[block_index + 1]);
}
let nimbers_pair = unsafe {simdselectFOR(0,
data_block_begin.wrapping_offset(bits_per_entry as isize),
Self::bits_to_store_2_nimbers(max_nimber) as u32,
in_block_index)} as u8;
max_nimber += 1;
if unsafe {found_key.assume_init()} == key {
nimbers_pair / max_nimber } else {
nimbers_pair % max_nimber
}
}
pub fn size_bytes(&self) -> usize {
self.block_second_positions.len() * std::mem::size_of::<u32>() +
self.blocks_metadata.len() * std::mem::size_of::<u32>() +
self.blocks_nimbers.len() * std::mem::size_of::<u8>() +
self.data.len() * std::mem::size_of::<__m128i>() +
std::mem::size_of_val(self)
}
}
fn iter_to_vecs<'a, 'b, M: Iterator<Item=(&'a u32, &'b u8)>>(map: M, len: usize) -> (Vec<u32>, Vec<u8>) {
let mut positions = Vec::with_capacity(len);
let mut nimbers = Vec::with_capacity(len);
for (p, n) in map {
positions.push(*p);
nimbers.push(*n);
};
(positions, nimbers)
}
#[inline]
fn map_to_vecs<S>(map: &HashMap<u32, u8, S>) -> (Vec<u32>, Vec<u8>) {
iter_to_vecs(map.iter(), map.len())
}
impl<S> From<&HashMap<u32, u8, S>> for ClusterBP128 {
fn from(map: &HashMap<u32, u8, S>) -> Self {
let (mut positions, mut nimbers) = map_to_vecs(map);
Self::from_unsorted(&mut positions, &mut nimbers)
}
}
impl<S> From<HashMap<u32, u8, S>> for ClusterBP128 {
fn from(map: HashMap<u32, u8, S>) -> Self {
let (mut positions, mut nimbers) = map_to_vecs(&map);
drop(map);
Self::from_unsorted(&mut positions, &mut nimbers)
}
}
#[cfg(test)]
mod tests {
use super::*;
use maplit::hashmap;
#[test]
fn with_hashmap1() {
let map = ClusterBP128::from(hashmap!(1=>2u8));
assert_eq!(map.get(1), 2);
}
#[test]
fn with_hashmap2() {
let map = ClusterBP128::from(hashmap!(1=>2u8, 3=>6u8));
assert_eq!(map.get(1), 2);
assert_eq!(map.get(3), 6);
}
#[test]
fn with_hashmap3() {
let map = ClusterBP128::from(hashmap!(123=>1u8, 34=>2u8, 5=>1u8));
assert_eq!(map.get(123), 1);
assert_eq!(map.get(34), 2);
assert_eq!(map.get(5), 1);
}
#[test]
fn with_hashmap130() {
let mut m = HashMap::new();
for i in 0u8..130u8 { m.insert(2*(i as u32), i/10); }
let map = ClusterBP128::from(m);
for i in 0u8..130u8 { assert_eq!(map.get(2*(i as u32)), i/10); }
}
}