use super::{
BuilderOptions, RankSelectBuilder, RankSelectOps, RankSelectPerformanceOps,
};
use crate::system::{CpuFeatures, get_cpu_features};
use crate::FastVec;
use crate::error::{Result, ZiporaError};
use crate::succinct::BitVector;
use std::fmt;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::{_MM_HINT_T0, _mm_prefetch, _popcnt64};
#[repr(C, align(32))] #[derive(Clone, Debug)]
struct InterleavedLine {
rlev1: u32,
rlev2: [u8; 4],
bit64: [u64; 4],
}
#[derive(Clone)]
pub struct RankSelectInterleaved256 {
lines: FastVec<InterleavedLine>,
select_cache: Option<FastVec<u32>>,
total_ones: usize,
total_bits: usize,
select_sample_rate: usize,
}
const LINE_BITS: usize = 256;
const WORDS_PER_LINE: usize = 4;
const BITS_PER_WORD: usize = 64;
const DEFAULT_IL_SELECT_SAMPLE_RATE: usize = 512;
impl InterleavedLine {
fn new() -> Self {
Self {
rlev1: 0,
rlev2: [0; 4],
bit64: [0; 4],
}
}
#[inline(always)]
fn rank1_within_line(&self, bit_offset: usize) -> usize {
let word_idx = bit_offset / BITS_PER_WORD;
let bit_in_word = bit_offset % BITS_PER_WORD;
let rank = self.rlev2[word_idx] as usize;
if bit_in_word > 0 {
let trailing_count = unsafe {
debug_assert!(word_idx < WORDS_PER_LINE, "word_idx {} >= WORDS_PER_LINE {}", word_idx, WORDS_PER_LINE);
let word = *self.bit64.get_unchecked(word_idx);
#[cfg(target_feature = "popcnt")]
{
use std::arch::x86_64::_popcnt64;
let mask = (1u64 << bit_in_word) - 1;
_popcnt64((word & mask) as i64) as usize
}
#[cfg(not(target_feature = "popcnt"))]
{
let mask = (1u64 << bit_in_word) - 1;
(word & mask).count_ones() as usize
}
};
rank + trailing_count
} else {
rank
}
}
#[inline]
fn count_ones(&self) -> usize {
self.bit64
.iter()
.map(|&word| word.count_ones() as usize)
.sum()
}
#[inline]
fn get_bit(&self, bit_offset: usize) -> bool {
let word_idx = bit_offset / BITS_PER_WORD;
let bit_in_word = bit_offset % BITS_PER_WORD;
if word_idx < WORDS_PER_LINE {
(self.bit64[word_idx] >> bit_in_word) & 1 == 1
} else {
false
}
}
#[inline(always)]
fn popcount_hardware_accelerated(&self, x: u64) -> u32 {
#[cfg(target_arch = "x86_64")]
{
#[cfg(test)]
{
x.count_ones()
}
#[cfg(not(test))]
{
if get_cpu_features().has_popcnt {
unsafe { _popcnt64(x as i64) as u32 }
} else {
x.count_ones()
}
}
}
#[cfg(not(target_arch = "x86_64"))]
{
x.count_ones()
}
}
#[inline(always)]
fn prefetch_hint(&self) {
#[cfg(target_arch = "x86_64")]
{
unsafe {
_mm_prefetch(self as *const InterleavedLine as *const i8, _MM_HINT_T0);
}
}
}
}
impl RankSelectInterleaved256 {
pub fn new(bit_vector: BitVector) -> Result<Self> {
Self::with_options(bit_vector, true, DEFAULT_IL_SELECT_SAMPLE_RATE)
}
pub fn with_options(
bit_vector: BitVector,
enable_select_cache: bool,
select_sample_rate: usize,
) -> Result<Self> {
let mut rs = Self {
lines: FastVec::new(),
select_cache: if enable_select_cache {
Some(FastVec::new())
} else {
None
},
total_ones: 0,
total_bits: bit_vector.len(),
select_sample_rate,
};
rs.build_interleaved_cache(bit_vector)?;
if rs.select_cache.is_some() && rs.total_ones > 0 {
rs.build_select_cache()?;
}
Ok(rs)
}
fn build_interleaved_cache(&mut self, bit_vector: BitVector) -> Result<()> {
if self.total_bits == 0 {
return Ok(());
}
let num_lines = (self.total_bits + LINE_BITS - 1) / LINE_BITS;
self.lines.reserve(num_lines)?;
let blocks = bit_vector.blocks();
let mut cumulative_rank = 0u32;
for line_idx in 0..num_lines {
let mut line = InterleavedLine::new();
line.rlev1 = cumulative_rank;
let line_start_bit = line_idx * LINE_BITS;
let line_end_bit = ((line_idx + 1) * LINE_BITS).min(self.total_bits);
let mut line_rank = 0u16;
for word_idx in 0..WORDS_PER_LINE {
let word_start_bit = line_start_bit + word_idx * BITS_PER_WORD;
let word_end_bit = (word_start_bit + BITS_PER_WORD).min(line_end_bit);
line.rlev2[word_idx] = line_rank as u8;
if word_start_bit < self.total_bits {
let word = self.extract_word_from_blocks(blocks, word_start_bit, word_end_bit);
line.bit64[word_idx] = word;
let word_popcount = line.popcount_hardware_accelerated(word) as u16;
line_rank += word_popcount;
}
}
cumulative_rank += line_rank as u32;
self.lines.push(line)?;
}
self.total_ones = cumulative_rank as usize;
Ok(())
}
fn extract_word_from_blocks(&self, blocks: &[u64], start_bit: usize, end_bit: usize) -> u64 {
let start_word = start_bit / BITS_PER_WORD;
let end_word = (end_bit + BITS_PER_WORD - 1) / BITS_PER_WORD;
if start_word >= blocks.len() {
return 0;
}
if start_word + 1 >= end_word && end_bit <= (start_word + 1) * BITS_PER_WORD {
let mut word = blocks[start_word];
let start_bit_in_word = start_bit % BITS_PER_WORD;
if start_bit_in_word > 0 {
word >>= start_bit_in_word;
}
let valid_bits = end_bit - start_bit;
if valid_bits < BITS_PER_WORD {
let mask = (1u64 << valid_bits) - 1;
word &= mask;
}
word
} else {
let mut result = 0u64;
let mut bits_written = 0;
for block_idx in start_word..end_word.min(blocks.len()) {
let block_start_bit = block_idx * BITS_PER_WORD;
let block_end_bit = (block_idx + 1) * BITS_PER_WORD;
let copy_start = start_bit.max(block_start_bit);
let copy_end = end_bit.min(block_end_bit);
if copy_start < copy_end {
let mut block_word = blocks[block_idx];
let shift_right = copy_start - block_start_bit;
if shift_right > 0 {
block_word >>= shift_right;
}
let bits_to_copy = copy_end - copy_start;
if bits_to_copy < BITS_PER_WORD {
let mask = (1u64 << bits_to_copy) - 1;
block_word &= mask;
}
result |= block_word << bits_written;
bits_written += bits_to_copy;
}
if bits_written >= BITS_PER_WORD {
break;
}
}
result
}
}
fn build_select_cache(&mut self) -> Result<()> {
if self.select_cache.is_none() || self.total_ones == 0 {
return Ok(());
}
let mut ones_seen = 0;
let mut current_pos = 0;
let mut cache_entries = Vec::new();
while ones_seen < self.total_ones {
let target_ones = (ones_seen + self.select_sample_rate).min(self.total_ones);
while ones_seen < target_ones && current_pos < self.total_bits {
if self.get_bit_internal(current_pos) {
ones_seen += 1;
}
if ones_seen < target_ones {
current_pos += 1;
}
}
if ones_seen == target_ones {
cache_entries.push(current_pos as u32);
}
current_pos += 1;
}
if let Some(ref mut select_cache) = self.select_cache {
for entry in cache_entries {
select_cache.push(entry)?;
}
}
Ok(())
}
#[inline]
fn get_bit_internal(&self, pos: usize) -> bool {
if pos >= self.total_bits {
return false;
}
let line_idx = pos / LINE_BITS;
let bit_in_line = pos % LINE_BITS;
if line_idx < self.lines.len() {
self.lines[line_idx].get_bit(bit_in_line)
} else {
false
}
}
#[inline]
pub fn lines(&self) -> &[InterleavedLine] {
&self.lines
}
#[inline(always)]
fn rank1_cache_optimized(&self, pos: usize) -> usize {
if pos == 0 || self.total_bits == 0 {
return 0;
}
let pos = pos.min(self.total_bits);
let line_idx = pos / LINE_BITS;
if line_idx >= self.lines.len() {
return self.total_ones;
}
let bit_in_line = pos % LINE_BITS;
unsafe {
debug_assert!(line_idx < self.lines.len(), "line_idx {} >= lines.len() {}", line_idx, self.lines.len());
let line = self.lines.get_unchecked(line_idx);
if line_idx + 1 < self.lines.len() {
#[cfg(target_arch = "x86_64")]
{
debug_assert!(line_idx + 1 < self.lines.len(), "line_idx + 1 {} >= lines.len() {}", line_idx + 1, self.lines.len());
let next_line_ptr = self.lines.get_unchecked(line_idx + 1) as *const _ as *const i8;
std::arch::x86_64::_mm_prefetch::<{std::arch::x86_64::_MM_HINT_T0}>(next_line_ptr);
}
}
line.rlev1 as usize + line.rank1_within_line(bit_in_line)
}
}
#[inline]
fn select1_cache_optimized(&self, k: usize) -> Result<usize> {
if k >= self.total_ones {
return Err(ZiporaError::out_of_bounds(k, self.total_ones));
}
let target_rank = k + 1;
if let Some(ref select_cache) = self.select_cache {
let hint_idx = k / self.select_sample_rate;
if hint_idx < select_cache.len() {
let hint_pos = select_cache[hint_idx] as usize;
return self.select1_from_hint(k, hint_pos);
}
}
let line_idx = self.binary_search_lines(target_rank);
let line = &self.lines[line_idx];
let rank_before_line = line.rlev1 as usize;
let remaining_ones = target_rank - rank_before_line;
self.select1_within_line(line_idx, remaining_ones)
}
fn select1_from_hint(&self, k: usize, hint_pos: usize) -> Result<usize> {
let target_rank = k + 1;
let hint_rank = self.rank1_cache_optimized(hint_pos + 1);
if hint_rank >= target_rank {
self.select1_linear_search(0, hint_pos + 1, target_rank)
} else {
self.select1_linear_search(hint_pos, self.total_bits, target_rank)
}
}
fn select1_linear_search(&self, start: usize, end: usize, target_rank: usize) -> Result<usize> {
let mut current_rank = self.rank1_cache_optimized(start);
for pos in start..end {
if self.get_bit_internal(pos) {
current_rank += 1;
if current_rank == target_rank {
return Ok(pos);
}
}
}
Err(ZiporaError::invalid_data(
"Select position not found".to_string(),
))
}
#[inline]
fn binary_search_lines(&self, target_rank: usize) -> usize {
let mut left = 0;
let mut right = self.lines.len();
while left < right {
let mid = left + (right - left) / 2;
if self.lines[mid].rlev1 < target_rank as u32 {
left = mid + 1;
} else {
right = mid;
}
}
left.saturating_sub(1)
.min(self.lines.len().saturating_sub(1))
}
fn select1_within_line(&self, line_idx: usize, remaining_ones: usize) -> Result<usize> {
if line_idx >= self.lines.len() {
return Err(ZiporaError::invalid_data(
"Line index out of bounds".to_string(),
));
}
let line = &self.lines[line_idx];
let line_start_bit = line_idx * LINE_BITS;
let mut found_ones = 0;
for word_idx in 0..WORDS_PER_LINE {
let _word_start_rank = line.rlev2[word_idx] as usize;
let word = line.bit64[word_idx];
let word_popcount = line.popcount_hardware_accelerated(word) as usize;
if found_ones + word_popcount >= remaining_ones {
let needed_in_word = remaining_ones - found_ones;
let bit_pos = self.uint_select1_bmi2(word, needed_in_word + 1);
if bit_pos < BITS_PER_WORD {
return Ok(line_start_bit + word_idx * BITS_PER_WORD + bit_pos);
}
}
found_ones += word_popcount;
}
Err(ZiporaError::invalid_data(
"Select position not found in line".to_string(),
))
}
#[inline]
fn select_u64_within_word(&self, word: u64, k: usize) -> usize {
self.uint_select1_bmi2(word, k)
}
#[inline(always)]
fn uint_select1_bmi2(&self, word: u64, rank: usize) -> usize {
if rank == 0 || rank > word.count_ones() as usize {
return BITS_PER_WORD;
}
#[cfg(target_arch = "x86_64")]
{
#[cfg(not(test))]
{
if get_cpu_features().has_bmi2 {
unsafe {
use std::arch::x86_64::{_pdep_u64, _tzcnt_u64};
return _tzcnt_u64(_pdep_u64(1u64 << (rank - 1), word)) as usize;
}
}
}
}
let mut count = 0;
for i in 0..64 {
if (word >> i) & 1 == 1 {
count += 1;
if count == rank {
return i;
}
}
}
BITS_PER_WORD }
#[inline]
pub fn prefetch_rank1(&self, bitpos: usize) {
#[cfg(target_arch = "x86_64")]
{
let line_idx = bitpos / LINE_BITS;
if line_idx < self.lines.len() {
unsafe {
let ptr = &self.lines[line_idx] as *const InterleavedLine as *const i8;
_mm_prefetch(ptr, _MM_HINT_T0);
}
}
}
#[cfg(target_arch = "aarch64")]
{
let line_idx = bitpos / LINE_BITS;
if line_idx < self.lines.len() {
unsafe {
let ptr = &self.lines[line_idx] as *const InterleavedLine;
std::arch::asm!(
"prfm pldl1keep, [{0}]",
in(reg) ptr,
options(nostack, preserves_flags)
);
}
}
}
}
#[inline]
pub fn fast_prefetch_rank1(lines: &[InterleavedLine], bitpos: usize) {
#[cfg(target_arch = "x86_64")]
{
let line_idx = bitpos / LINE_BITS;
if line_idx < lines.len() {
unsafe {
let ptr = &lines[line_idx] as *const InterleavedLine as *const i8;
_mm_prefetch(ptr, _MM_HINT_T0);
}
}
}
}
#[inline]
pub fn prefetch_select1(&self, id: usize) {
if let Some(ref cache) = self.select_cache {
#[cfg(target_arch = "x86_64")]
{
let cache_idx = id / self.select_sample_rate;
if cache_idx < cache.len() {
unsafe {
let ptr = &cache[cache_idx] as *const u32 as *const i8;
_mm_prefetch(ptr, _MM_HINT_T0);
}
}
}
}
}
#[inline]
pub fn rank1_optimized(&self, pos: usize) -> usize {
self.rank1_cache_optimized(pos)
}
pub fn rank1_bulk_optimized(&self, positions: &[usize]) -> Vec<usize> {
self.rank1_bulk(positions)
}
#[inline]
pub fn select1_optimized(&self, k: usize) -> Result<usize> {
self.select1_cache_optimized(k)
}
pub fn select1_bulk_optimized(&self, indices: &[usize]) -> Result<Vec<usize>> {
self.select1_bulk(indices)
}
pub fn get_bit_data(&self) -> Vec<u64> {
let total_words = (self.total_bits + BITS_PER_WORD - 1) / BITS_PER_WORD;
let mut result = Vec::with_capacity(total_words);
for line in self.lines.iter() {
for &word in &line.bit64 {
if result.len() < total_words {
result.push(word);
}
}
}
result
}
#[inline]
pub unsafe fn prefetch_ahead(
&self,
base_pos: usize,
count: usize,
strategy: &mut crate::memory::PrefetchStrategy,
) {
if self.lines.is_empty() || count == 0 {
return;
}
let start_line = base_pos / LINE_BITS;
let end_line = ((base_pos + count * LINE_BITS) / LINE_BITS).min(self.lines.len());
if start_line >= self.lines.len() {
return;
}
unsafe {
let num_lines = end_line - start_line;
let slice_bytes = std::slice::from_raw_parts(
(&self.lines[start_line] as *const InterleavedLine) as *const u8,
num_lines * std::mem::size_of::<InterleavedLine>()
);
let stride = std::mem::size_of::<InterleavedLine>();
let prefetch_count = num_lines.min(8);
strategy.sequential_prefetch(slice_bytes, stride, prefetch_count);
}
}
}
impl Default for RankSelectInterleaved256 {
fn default() -> Self {
Self {
lines: FastVec::new(),
select_cache: None,
total_ones: 0,
total_bits: 0,
select_sample_rate: 256,
}
}
}
impl RankSelectOps for RankSelectInterleaved256 {
#[inline]
fn rank1(&self, pos: usize) -> usize {
self.rank1_cache_optimized(pos)
}
#[inline]
fn rank0(&self, pos: usize) -> usize {
if pos == 0 || self.total_bits == 0 {
return 0;
}
let pos = pos.min(self.total_bits);
pos - self.rank1_cache_optimized(pos)
}
#[inline]
fn select1(&self, k: usize) -> Result<usize> {
self.select1_cache_optimized(k)
}
fn select0(&self, k: usize) -> Result<usize> {
if k >= (self.total_bits - self.total_ones) {
return Err(ZiporaError::out_of_bounds(
k,
self.total_bits - self.total_ones,
));
}
let mut lo = 0;
let mut hi = self.lines.len();
while lo < hi {
let mid = (lo + hi) / 2;
let bitpos = mid * LINE_BITS;
let rank1_at_line = if mid == 0 { 0 } else { self.lines[mid].rlev1 as usize };
let rank0_at_line = bitpos - rank1_at_line;
if rank0_at_line <= k { lo = mid + 1;
} else {
hi = mid;
}
}
let line_idx = lo.saturating_sub(1);
if line_idx >= self.lines.len() {
return Err(ZiporaError::invalid_data(
"Select0 line index out of bounds".to_string(),
));
}
let line = &self.lines[line_idx];
let base_bitpos = line_idx * LINE_BITS;
let base_rank1 = line.rlev1 as usize;
let base_rank0 = base_bitpos - base_rank1;
let target_rank_in_line = k - base_rank0;
let mut rank_in_line = 0;
for word_idx in 0..WORDS_PER_LINE {
let word = line.bit64[word_idx];
let inverted_word = !word; let zeros_in_word = inverted_word.count_ones() as usize;
if rank_in_line + zeros_in_word > target_rank_in_line {
let rank_in_word = target_rank_in_line - rank_in_line;
let bit_pos = self.uint_select1_bmi2(inverted_word, rank_in_word + 1); if bit_pos < BITS_PER_WORD {
return Ok(base_bitpos + word_idx * BITS_PER_WORD + bit_pos);
}
}
rank_in_line += zeros_in_word;
}
Err(ZiporaError::invalid_data(
"Select0 position not found".to_string(),
))
}
#[inline]
fn len(&self) -> usize {
self.total_bits
}
#[inline]
fn count_ones(&self) -> usize {
self.total_ones
}
#[inline]
fn get(&self, index: usize) -> Option<bool> {
if index >= self.total_bits {
None
} else {
Some(self.get_bit_internal(index))
}
}
fn space_overhead_percent(&self) -> f64 {
if self.total_bits == 0 {
return 0.0;
}
let bit_data_bytes = (self.total_bits + 7) / 8;
let cache_bytes = self.lines.len() * std::mem::size_of::<InterleavedLine>();
let select_cache_bytes = self
.select_cache
.as_ref()
.map(|cache| cache.len() * 4)
.unwrap_or(0);
let total_overhead = cache_bytes + select_cache_bytes;
(total_overhead as f64 / bit_data_bytes as f64) * 100.0
}
}
impl RankSelectBuilder<RankSelectInterleaved256> for RankSelectInterleaved256 {
fn from_bit_vector(bit_vector: BitVector) -> Result<RankSelectInterleaved256> {
Self::new(bit_vector)
}
fn from_iter<I>(iter: I) -> Result<RankSelectInterleaved256>
where
I: IntoIterator<Item = bool>,
{
let mut bit_vector = BitVector::new();
for bit in iter {
bit_vector.push(bit)?;
}
Self::new(bit_vector)
}
fn from_bytes(bytes: &[u8], bit_len: usize) -> Result<RankSelectInterleaved256> {
let mut bit_vector = BitVector::new();
for (byte_idx, &byte) in bytes.iter().enumerate() {
for bit_idx in 0..8 {
let global_bit_idx = byte_idx * 8 + bit_idx;
if global_bit_idx >= bit_len {
break;
}
let bit = (byte >> bit_idx) & 1 == 1;
bit_vector.push(bit)?;
}
if (byte_idx + 1) * 8 >= bit_len {
break;
}
}
Self::new(bit_vector)
}
fn with_optimizations(
bit_vector: BitVector,
opts: BuilderOptions,
) -> Result<RankSelectInterleaved256> {
let enable_select_cache = opts.optimize_select;
let select_sample_rate = opts.select_sample_rate;
Self::with_options(bit_vector, enable_select_cache, select_sample_rate)
}
}
impl RankSelectPerformanceOps for RankSelectInterleaved256 {
#[inline]
fn rank1_hardware_accelerated(&self, pos: usize) -> usize {
self.rank1_cache_optimized(pos)
}
#[inline]
fn select1_hardware_accelerated(&self, k: usize) -> Result<usize> {
self.select1_cache_optimized(k)
}
#[inline]
fn rank1_adaptive(&self, pos: usize) -> usize {
self.rank1_cache_optimized(pos)
}
#[inline]
fn select1_adaptive(&self, k: usize) -> Result<usize> {
self.select1_cache_optimized(k)
}
fn rank1_bulk(&self, positions: &[usize]) -> Vec<usize> {
const PREFETCH_DISTANCE: usize = 8;
let mut results = Vec::with_capacity(positions.len());
for (i, &pos) in positions.iter().enumerate() {
if i + PREFETCH_DISTANCE < positions.len() {
self.prefetch_rank1(positions[i + PREFETCH_DISTANCE]);
}
results.push(self.rank1_cache_optimized(pos));
}
results
}
fn select1_bulk(&self, indices: &[usize]) -> Result<Vec<usize>> {
const PREFETCH_DISTANCE: usize = 8;
let mut results = Vec::with_capacity(indices.len());
for (i, &k) in indices.iter().enumerate() {
if i + PREFETCH_DISTANCE < indices.len() {
self.prefetch_select1(indices[i + PREFETCH_DISTANCE]);
}
results.push(self.select1_cache_optimized(k)?);
}
Ok(results)
}
}
impl fmt::Debug for RankSelectInterleaved256 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("RankSelectInterleaved256")
.field("total_bits", &self.total_bits)
.field("total_ones", &self.total_ones)
.field("lines", &self.lines.len())
.field(
"select_cache_size",
&self.select_cache.as_ref().map(|c| c.len()).unwrap_or(0),
)
.field("select_sample_rate", &self.select_sample_rate)
.field("space_overhead_percent", &self.space_overhead_percent())
.finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::succinct::BitVector;
#[test]
fn debug_rank_select_semantics() -> Result<()> {
let mut bv = BitVector::new();
bv.push(true)?; bv.push(false)?; bv.push(true)?; bv.push(false)?;
let rs = RankSelectInterleaved256::new(bv)?;
println!("Bit pattern: 1010");
println!("rank1(0) = {} (bits before position 0)", rs.rank1(0));
println!("rank1(1) = {} (bits before position 1)", rs.rank1(1));
println!("rank1(2) = {} (bits before position 2)", rs.rank1(2));
println!("rank1(3) = {} (bits before position 3)", rs.rank1(3));
println!("rank1(4) = {} (bits before position 4)", rs.rank1(4));
println!("select1(0) = {:?} (position of 0th set bit)", rs.select1(0));
println!("select1(1) = {:?} (position of 1st set bit)", rs.select1(1));
if let Ok(pos0) = rs.select1(0) {
println!("Round-trip: select1(0)={}, rank1({})={}", pos0, pos0, rs.rank1(pos0));
println!("Round-trip: select1(0)={}, rank1({})={}", pos0, pos0+1, rs.rank1(pos0+1));
}
for i in 0..=4 {
let r0 = rs.rank0(i);
let r1 = rs.rank1(i);
println!("Position {}: rank0={}, rank1={}, sum={}", i, r0, r1, r0 + r1);
}
Ok(())
}
#[test]
fn test_rank_select_interleaved256_basic() -> Result<()> {
let mut bv = BitVector::new();
for i in 0..100 {
bv.push(i % 5 == 0)?; }
let rs = RankSelectInterleaved256::new(bv)?;
assert_eq!(rs.len(), 100);
assert_eq!(rs.count_ones(), 20);
assert_eq!(rs.rank1(0), 0);
assert_eq!(rs.rank1(5), 1);
assert_eq!(rs.rank1(10), 2);
assert_eq!(rs.rank1(25), 5);
for pos in [0, 5, 10, 25, 50, 99] {
assert_eq!(rs.rank0(pos) + rs.rank1(pos), pos);
}
assert_eq!(rs.select1(0)?, 0);
assert_eq!(rs.select1(1)?, 5);
assert_eq!(rs.select1(2)?, 10);
assert_eq!(rs.get(0), Some(true));
assert_eq!(rs.get(1), Some(false));
assert_eq!(rs.get(5), Some(true));
assert_eq!(rs.get(100), None);
assert!(rs.space_overhead_percent() < 2000.0);
Ok(())
}
#[test]
fn test_rank_select_interleaved256_empty() -> Result<()> {
let bv = BitVector::new();
let rs = RankSelectInterleaved256::new(bv)?;
assert_eq!(rs.len(), 0);
assert_eq!(rs.count_ones(), 0);
assert_eq!(rs.rank1(0), 0);
assert_eq!(rs.rank0(0), 0);
assert_eq!(rs.get(0), None);
Ok(())
}
#[test]
fn test_rank_select_interleaved256_all_ones() -> Result<()> {
let mut bv = BitVector::new();
for _ in 0..50 {
bv.push(true)?;
}
let rs = RankSelectInterleaved256::new(bv)?;
assert_eq!(rs.len(), 50);
assert_eq!(rs.count_ones(), 50);
assert_eq!(rs.rank1(50), 50);
assert_eq!(rs.rank0(50), 0);
for i in 0..50 {
assert_eq!(rs.select1(i)?, i);
assert_eq!(rs.get(i), Some(true));
}
Ok(())
}
#[test]
fn test_rank_select_interleaved256_all_zeros() -> Result<()> {
let mut bv = BitVector::new();
for _ in 0..50 {
bv.push(false)?;
}
let rs = RankSelectInterleaved256::new(bv)?;
assert_eq!(rs.len(), 50);
assert_eq!(rs.count_ones(), 0);
assert_eq!(rs.rank1(50), 0);
assert_eq!(rs.rank0(50), 50);
for i in 0..50 {
assert_eq!(rs.get(i), Some(false));
}
assert!(rs.select1(0).is_err());
Ok(())
}
#[test]
fn test_prefetch_methods_basic() -> Result<()> {
let mut bv = BitVector::new();
for i in 0..1000 {
bv.push(i % 3 == 0)?;
}
let rs = RankSelectInterleaved256::new(bv)?;
rs.prefetch_rank1(0);
rs.prefetch_rank1(100);
rs.prefetch_rank1(500);
rs.prefetch_rank1(999);
if !rs.lines.is_empty() {
RankSelectInterleaved256::fast_prefetch_rank1(&rs.lines, 0);
RankSelectInterleaved256::fast_prefetch_rank1(&rs.lines, 500);
}
rs.prefetch_select1(0);
if rs.count_ones() > 10 {
rs.prefetch_select1(10);
}
Ok(())
}
#[test]
fn test_prefetch_edge_cases() -> Result<()> {
let bv = BitVector::new();
let rs = RankSelectInterleaved256::new(bv)?;
rs.prefetch_rank1(0);
let mut bv = BitVector::new();
bv.push(true)?;
let rs = RankSelectInterleaved256::new(bv)?;
rs.prefetch_rank1(0);
rs.prefetch_rank1(1);
let mut bv = BitVector::new();
for i in 0..LINE_BITS * 3 {
bv.push(i % 2 == 0)?;
}
let rs = RankSelectInterleaved256::new(bv)?;
rs.prefetch_rank1(0);
rs.prefetch_rank1(LINE_BITS);
rs.prefetch_rank1(LINE_BITS * 2);
rs.prefetch_rank1(LINE_BITS * 3);
Ok(())
}
#[test]
fn test_optimized_methods_correctness() -> Result<()> {
let mut bv = BitVector::new();
for i in 0..1000 {
bv.push(i % 7 == 0)?;
}
let rs = RankSelectInterleaved256::new(bv)?;
for pos in [0, 50, 100, 250, 500, 750, 999] {
let base_result = rs.rank1_cache_optimized(pos);
let optimized_result = rs.rank1_optimized(pos);
assert_eq!(base_result, optimized_result,
"rank1_optimized({}) should match base implementation", pos);
}
let ones_count = rs.count_ones();
if ones_count > 0 {
for id in [0, ones_count / 4, ones_count / 2, ones_count - 1] {
let base_result = rs.select1(id)?;
let optimized_result = rs.select1_optimized(id)?;
assert_eq!(base_result, optimized_result,
"select1_optimized({}) should match base implementation", id);
}
}
Ok(())
}
#[test]
fn test_bulk_operations_with_prefetching() -> Result<()> {
let mut bv = BitVector::new();
for i in 0..2000 {
bv.push(i % 11 == 0)?;
}
let rs = RankSelectInterleaved256::new(bv)?;
let positions: Vec<usize> = (0..100).map(|i| i * 20).collect();
let bulk_results = rs.rank1_bulk_optimized(&positions);
assert_eq!(bulk_results.len(), positions.len());
for (i, &pos) in positions.iter().enumerate() {
let individual_result = rs.rank1(pos);
assert_eq!(bulk_results[i], individual_result,
"Bulk rank at position {} should match individual rank", pos);
}
let ones_count = rs.count_ones();
if ones_count >= 50 {
let ids: Vec<usize> = (0..50).collect();
let bulk_select_results = rs.select1_bulk_optimized(&ids)?;
assert_eq!(bulk_select_results.len(), ids.len());
for (i, &id) in ids.iter().enumerate() {
let individual_result = rs.select1(id)?;
assert_eq!(bulk_select_results[i], individual_result,
"Bulk select for id {} should match individual select", id);
}
}
Ok(())
}
#[test]
fn test_bulk_operations_empty_input() -> Result<()> {
let mut bv = BitVector::new();
for i in 0..100 {
bv.push(i % 5 == 0)?;
}
let rs = RankSelectInterleaved256::new(bv)?;
let empty_positions: Vec<usize> = vec![];
let results = rs.rank1_bulk_optimized(&empty_positions);
assert!(results.is_empty());
let select_results = rs.select1_bulk_optimized(&empty_positions)?;
assert!(select_results.is_empty());
Ok(())
}
#[test]
fn test_bulk_operations_single_element() -> Result<()> {
let mut bv = BitVector::new();
for i in 0..100 {
bv.push(i % 5 == 0)?;
}
let rs = RankSelectInterleaved256::new(bv)?;
let single_pos = vec![50];
let results = rs.rank1_bulk_optimized(&single_pos);
assert_eq!(results.len(), 1);
assert_eq!(results[0], rs.rank1(50));
Ok(())
}
#[test]
fn test_lookahead_prefetching_coverage() -> Result<()> {
let mut bv = BitVector::new();
for i in 0..5000 {
bv.push(i % 13 == 0)?;
}
let rs = RankSelectInterleaved256::new(bv)?;
let positions: Vec<usize> = (0..100).map(|i| i * 50).collect();
let results = rs.rank1_bulk_optimized(&positions);
assert_eq!(results.len(), positions.len());
for (i, &pos) in positions.iter().enumerate() {
assert_eq!(results[i], rs.rank1(pos),
"Lookahead prefetching should not affect correctness at position {}", pos);
}
Ok(())
}
#[test]
fn test_adaptive_simd_integration() -> Result<()> {
let mut bv = BitVector::new();
for i in 0..1000 {
bv.push(i % 4 == 0)?; }
let rs = RankSelectInterleaved256::new(bv)?;
for pos in [0, 100, 500, 999] {
let adaptive_result = rs.rank1_adaptive(pos);
let expected = rs.rank1_cache_optimized(pos);
assert_eq!(adaptive_result, expected,
"Adaptive SIMD selection should produce correct results at position {}", pos);
}
let ones_count = rs.count_ones();
if ones_count > 0 {
for id in [0, ones_count / 2, ones_count - 1] {
let adaptive_result = rs.select1_adaptive(id)?;
let expected = rs.select1(id)?;
assert_eq!(adaptive_result, expected,
"Adaptive SIMD selection should produce correct results for id {}", id);
}
}
Ok(())
}
#[test]
fn test_performance_monitoring_records_data() -> Result<()> {
use crate::simd::AdaptiveSimdSelector;
let selector = AdaptiveSimdSelector::global();
let mut bv = BitVector::new();
for i in 0..1000 {
bv.push(i % 5 == 0)?;
}
let rs = RankSelectInterleaved256::new(bv)?;
let _result = rs.rank1_optimized(500);
let positions: Vec<usize> = (0..50).map(|i| i * 20).collect();
let _bulk_results = rs.rank1_bulk_optimized(&positions);
let _impl_type = selector.select_optimal_impl(
crate::simd::Operation::Rank,
rs.len(),
Some(rs.count_ones() as f64 / rs.len().max(1) as f64),
);
Ok(())
}
#[test]
fn test_cross_platform_prefetch_compilation() -> Result<()> {
let mut bv = BitVector::new();
for i in 0..256 {
bv.push(i % 2 == 0)?;
}
let rs = RankSelectInterleaved256::new(bv)?;
rs.prefetch_rank1(0);
rs.prefetch_rank1(128);
rs.prefetch_rank1(255);
#[cfg(target_arch = "x86_64")]
{
use std::arch::x86_64::{_mm_prefetch, _MM_HINT_T0};
if !rs.lines.is_empty() {
unsafe {
let ptr = &rs.lines[0] as *const InterleavedLine as *const i8;
_mm_prefetch(ptr, _MM_HINT_T0);
}
}
}
#[cfg(target_arch = "aarch64")]
{
if !rs.lines.is_empty() {
unsafe {
let ptr = &rs.lines[0] as *const InterleavedLine;
std::arch::asm!(
"prfm pldl1keep, [{0}]",
in(reg) ptr,
options(nostack, preserves_flags)
);
}
}
}
Ok(())
}
#[test]
fn test_large_dataset_integration() -> Result<()> {
let mut bv = BitVector::new();
for i in 0..10000 {
bv.push(i % 17 == 0)?;
}
let rs = RankSelectInterleaved256::new(bv)?;
let positions: Vec<usize> = (0..500).map(|i| i * 20).collect();
let bulk_results = rs.rank1_bulk_optimized(&positions);
assert_eq!(bulk_results.len(), positions.len());
for i in [0, 100, 250, 499] {
assert_eq!(bulk_results[i], rs.rank1(positions[i]));
}
let ones_count = rs.count_ones();
if ones_count >= 100 {
let ids: Vec<usize> = (0..100).collect();
let select_results = rs.select1_bulk_optimized(&ids)?;
for i in [0, 25, 50, 75, 99] {
assert_eq!(select_results[i], rs.select1(ids[i])?);
}
}
Ok(())
}
#[test]
fn test_mixed_workload() -> Result<()> {
let mut bv = BitVector::new();
for i in 0..2000 {
bv.push(i % 7 == 0)?;
}
let rs = RankSelectInterleaved256::new(bv)?;
let _r1 = rs.rank1_optimized(100);
let positions = vec![200, 400, 600, 800];
let _bulk_ranks = rs.rank1_bulk_optimized(&positions);
let _r2 = rs.rank1_optimized(1000);
let ones_count = rs.count_ones();
if ones_count >= 10 {
let _s1 = rs.select1_optimized(5)?;
let ids = vec![0, 2, 4, 6, 8];
let _bulk_selects = rs.select1_bulk_optimized(&ids)?;
}
Ok(())
}
}