#![cfg(target_pointer_width = "64")]
use anybytes::Bytes;
use anybytes::View;
use crate::bit_vector::BitVectorData;
use crate::broadword;
use crate::error::{Error, Result};
const BLOCK_LEN: usize = 8;
const SELECT_ONES_PER_HINT: usize = 64 * BLOCK_LEN * 2;
const SELECT_ZEROS_PER_HINT: usize = SELECT_ONES_PER_HINT;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Rank9SelIndex<const SELECT1: bool = true, const SELECT0: bool = true> {
len: usize,
block_rank_pairs: View<[usize]>,
select1_hints: Option<View<[usize]>>,
select0_hints: Option<View<[usize]>>,
}
#[derive(Default, Debug, Clone)]
struct Rank9SelIndexBuilder<const SELECT1: bool, const SELECT0: bool> {
len: usize,
block_rank_pairs: Vec<usize>,
select1_hints: Option<Vec<usize>>,
select0_hints: Option<Vec<usize>>,
}
impl<const SELECT1: bool, const SELECT0: bool> Rank9SelIndexBuilder<SELECT1, SELECT0> {
pub fn new(data: &BitVectorData) -> Self {
Self::build_rank(data)
}
pub fn select1_hints(self) -> Self {
self.build_select1()
}
pub fn select0_hints(self) -> Self {
self.build_select0()
}
pub fn build(self) -> Rank9SelIndex<SELECT1, SELECT0> {
let block_rank_pairs = Bytes::from_source(self.block_rank_pairs)
.view::<[usize]>()
.unwrap();
let select1_hints = self
.select1_hints
.map(|v| Bytes::from_source(v).view::<[usize]>().unwrap());
let select0_hints = self
.select0_hints
.map(|v| Bytes::from_source(v).view::<[usize]>().unwrap());
Rank9SelIndex::<SELECT1, SELECT0> {
len: self.len,
block_rank_pairs,
select1_hints,
select0_hints,
}
}
fn build_rank(data: &BitVectorData) -> Self {
let mut next_rank = 0;
let mut cur_subrank = 0;
let mut subranks = 0;
let mut block_rank_pairs = vec![next_rank];
let words = data.words();
for (i, &word) in words.iter().enumerate() {
let word_pop = broadword::popcount(word);
let shift = i % BLOCK_LEN;
if shift != 0 {
subranks <<= 9;
subranks |= cur_subrank;
}
next_rank += word_pop;
cur_subrank += word_pop;
if shift == BLOCK_LEN - 1 {
block_rank_pairs.push(subranks);
block_rank_pairs.push(next_rank);
subranks = 0;
cur_subrank = 0;
}
}
let left = BLOCK_LEN - (data.num_words() % BLOCK_LEN);
for _ in 0..left {
subranks <<= 9;
subranks |= cur_subrank;
}
block_rank_pairs.push(subranks);
if data.num_words() % BLOCK_LEN != 0 {
block_rank_pairs.push(next_rank);
block_rank_pairs.push(0);
}
block_rank_pairs.shrink_to_fit();
Self {
len: data.len(),
block_rank_pairs,
select1_hints: None,
select0_hints: None,
}
}
fn build_select1(mut self) -> Self {
let mut select1_hints = vec![];
let mut cur_ones_threshold = SELECT_ONES_PER_HINT;
for i in 0..self.num_blocks() {
if self.block_rank(i + 1) > cur_ones_threshold {
select1_hints.push(i);
cur_ones_threshold += SELECT_ONES_PER_HINT;
}
}
select1_hints.push(self.num_blocks());
select1_hints.shrink_to_fit();
self.select1_hints = Some(select1_hints);
self
}
fn build_select0(mut self) -> Self {
let mut select0_hints = vec![];
let mut cur_zeros_threshold = SELECT_ZEROS_PER_HINT;
for i in 0..self.num_blocks() {
if self.block_rank0(i + 1) > cur_zeros_threshold {
select0_hints.push(i);
cur_zeros_threshold += SELECT_ZEROS_PER_HINT;
}
}
select0_hints.push(self.num_blocks());
select0_hints.shrink_to_fit();
self.select0_hints = Some(select0_hints);
self
}
#[inline(always)]
fn num_blocks(&self) -> usize {
self.block_rank_pairs.len() / 2 - 1
}
#[inline(always)]
fn block_rank(&self, block: usize) -> usize {
self.block_rank_pairs[block * 2]
}
#[inline(always)]
fn block_rank0(&self, block: usize) -> usize {
block * BLOCK_LEN * 64 - self.block_rank(block)
}
}
impl<const SELECT1: bool, const SELECT0: bool> Rank9SelIndex<SELECT1, SELECT0> {
pub fn new(data: &BitVectorData) -> Self {
let mut builder = Rank9SelIndexBuilder::<SELECT1, SELECT0>::new(data);
if SELECT1 {
builder = builder.select1_hints();
}
if SELECT0 {
builder = builder.select0_hints();
}
builder.build()
}
#[inline(always)]
pub fn num_ones(&self) -> usize {
self.block_rank_pairs[self.block_rank_pairs.len() - 2]
}
#[inline(always)]
pub fn num_zeros(&self) -> usize {
self.len - self.num_ones()
}
#[inline(always)]
fn num_blocks(&self) -> usize {
self.block_rank_pairs.len() / 2 - 1
}
#[inline(always)]
fn block_rank(&self, block: usize) -> usize {
self.block_rank_pairs[block * 2]
}
#[inline(always)]
fn sub_block_rank(&self, sub_bpos: usize) -> usize {
let (block, left) = (sub_bpos / BLOCK_LEN, sub_bpos % BLOCK_LEN);
self.block_rank(block) + ((self.sub_block_ranks(block) >> ((7 - left) * 9)) & 0x1FF)
}
#[inline(always)]
fn sub_block_ranks(&self, block: usize) -> usize {
self.block_rank_pairs[block * 2 + 1]
}
#[inline(always)]
fn block_rank0(&self, block: usize) -> usize {
block * BLOCK_LEN * 64 - self.block_rank(block)
}
pub fn rank1(&self, data: &BitVectorData, pos: usize) -> Option<usize> {
if data.len() < pos {
return None;
}
if pos == data.len() {
return Some(self.num_ones());
}
let (sub_bpos, sub_left) = (pos / 64, pos % 64);
let mut r = self.sub_block_rank(sub_bpos);
if sub_left != 0 {
r += broadword::popcount(data.words()[sub_bpos] << (64 - sub_left));
}
Some(r)
}
pub fn rank0(&self, data: &BitVectorData, pos: usize) -> Option<usize> {
Some(pos - self.rank1(data, pos)?)
}
pub fn select1(&self, data: &BitVectorData, k: usize) -> Option<usize> {
if self.num_ones() <= k {
return None;
}
let block = {
let (mut a, mut b) = (0, self.num_blocks());
if let Some(select1_hints) = self.select1_hints.as_ref() {
let chunk = k / SELECT_ONES_PER_HINT;
if chunk != 0 {
a = select1_hints[chunk - 1];
}
b = select1_hints[chunk] + 1;
}
while b - a > 1 {
let mid = a + (b - a) / 2;
let x = self.block_rank(mid);
if x <= k {
a = mid;
} else {
b = mid;
}
}
a
};
debug_assert!(block < self.num_blocks());
let block_offset = block * BLOCK_LEN;
let mut cur_rank = self.block_rank(block);
debug_assert!(cur_rank <= k);
let rank_in_block_parallel = (k - cur_rank) as u64 * broadword::ONES_STEP_9;
let sub_ranks = self.sub_block_ranks(block) as u64;
let sub_block_offset = ((broadword::uleq_step_9(sub_ranks, rank_in_block_parallel)
.wrapping_mul(broadword::ONES_STEP_9)
>> 54)
& 0x7) as usize;
cur_rank += ((sub_ranks >> (7 - sub_block_offset).wrapping_mul(9)) & 0x1FF) as usize;
debug_assert!(cur_rank <= k);
let word_offset = block_offset + sub_block_offset;
let sel = word_offset * 64
+ broadword::select_in_word(data.words()[word_offset], k - cur_rank).unwrap();
Some(sel)
}
pub fn select0(&self, data: &BitVectorData, k: usize) -> Option<usize> {
if self.num_zeros() <= k {
return None;
}
let block = {
let (mut a, mut b) = (0, self.num_blocks());
if let Some(select0_hints) = self.select0_hints.as_ref() {
let chunk = k / SELECT_ZEROS_PER_HINT;
if chunk != 0 {
a = select0_hints[chunk - 1];
}
b = select0_hints[chunk] + 1;
}
while b - a > 1 {
let mid = a + (b - a) / 2;
let x = self.block_rank0(mid);
if x <= k {
a = mid;
} else {
b = mid;
}
}
a
};
debug_assert!(block < self.num_blocks());
let block_offset = block * BLOCK_LEN;
let mut cur_rank = self.block_rank0(block);
debug_assert!(cur_rank <= k);
let rank_in_block_parallel = (k - cur_rank) as u64 * broadword::ONES_STEP_9;
let sub_ranks = 64u64 * broadword::INV_COUNT_STEP_9 - self.sub_block_ranks(block) as u64;
let sub_block_offset = ((broadword::uleq_step_9(sub_ranks, rank_in_block_parallel)
.wrapping_mul(broadword::ONES_STEP_9)
>> 54)
& 0x7) as usize;
cur_rank += ((sub_ranks >> (7 - sub_block_offset).wrapping_mul(9)) & 0x1FF) as usize;
debug_assert!(cur_rank <= k);
let word_offset = block_offset + sub_block_offset;
let sel = word_offset * 64
+ broadword::select_in_word(!data.words()[word_offset], k - cur_rank).unwrap();
Some(sel)
}
}
impl<const SELECT1: bool, const SELECT0: bool> Rank9SelIndex<SELECT1, SELECT0> {
pub fn from_bytes(mut bytes: Bytes) -> Result<Self> {
let len = *bytes.view_prefix::<usize>()?;
let brp_len = *bytes.view_prefix::<usize>()?;
let block_rank_pairs = bytes.view_prefix_with_elems::<[usize]>(brp_len)?;
let has_select1 = *bytes.view_prefix::<usize>()? != 0;
let select1_hints = if has_select1 {
let l = *bytes.view_prefix::<usize>()?;
Some(bytes.view_prefix_with_elems::<[usize]>(l)?)
} else {
None
};
let has_select0 = *bytes.view_prefix::<usize>()? != 0;
let select0_hints = if has_select0 {
let l = *bytes.view_prefix::<usize>()?;
Some(bytes.view_prefix_with_elems::<[usize]>(l)?)
} else {
None
};
if has_select1 != SELECT1 || has_select0 != SELECT0 {
return Err(Error::MismatchedHintFlags);
}
Ok(Self {
len,
block_rank_pairs,
select1_hints,
select0_hints,
})
}
}
impl<const SELECT1: bool, const SELECT0: bool> crate::bit_vector::BitVectorIndex
for Rank9SelIndex<SELECT1, SELECT0>
{
fn build(data: &BitVectorData) -> Self {
let mut builder = Rank9SelIndexBuilder::<SELECT1, SELECT0>::new(data);
if SELECT1 {
builder = builder.select1_hints();
}
if SELECT0 {
builder = builder.select0_hints();
}
builder.build()
}
fn num_ones(&self, _data: &BitVectorData) -> usize {
self.num_ones()
}
fn rank1(&self, data: &BitVectorData, pos: usize) -> Option<usize> {
Rank9SelIndex::rank1(self, data, pos)
}
fn select1(&self, data: &BitVectorData, k: usize) -> Option<usize> {
Rank9SelIndex::select1(self, data, k)
}
fn select0(&self, data: &BitVectorData, k: usize) -> Option<usize> {
Rank9SelIndex::select0(self, data, k)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_builder_new_equivalence() {
let data = BitVectorData::from_bits([true, false, true, false]);
let idx1 = Rank9SelIndex::<true, true>::new(&data);
let idx2 = Rank9SelIndex::<true, true>::new(&data);
assert_eq!(idx1, idx2);
}
}