#[cfg(not(test))]
use alloc::vec::Vec;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::bits::popcount::popcount_word;
use crate::bits::SelectIndex;
use crate::util::broadword::select_in_word;
pub trait SelectSupport: Clone + Default {
fn build(words: &[u64], total_ones: usize) -> Self;
fn select1(&self, words: &[u64], len: usize, total_ones: usize, k: usize) -> Option<usize>;
}
#[derive(Clone, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct NoSelect;
impl SelectSupport for NoSelect {
#[inline]
fn build(_words: &[u64], _total_ones: usize) -> Self {
NoSelect
}
#[inline]
fn select1(&self, _words: &[u64], _len: usize, _total_ones: usize, _k: usize) -> Option<usize> {
None
}
}
#[derive(Clone, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct WithSelect {
select_idx: SelectIndex,
}
impl SelectSupport for WithSelect {
fn build(words: &[u64], total_ones: usize) -> Self {
WithSelect {
select_idx: SelectIndex::build(words, total_ones, 256),
}
}
fn select1(&self, words: &[u64], len: usize, total_ones: usize, k: usize) -> Option<usize> {
if k >= total_ones {
return None;
}
let (start_word, mut remaining) = self.select_idx.jump_to(k);
for (word_idx, &word) in words.iter().enumerate().skip(start_word) {
let pop = popcount_word(word) as usize;
if pop > remaining {
let bit_pos = select_in_word(word, remaining as u32) as usize;
let result = word_idx * 64 + bit_pos;
return if result < len { Some(result) } else { None };
}
remaining -= pop;
}
None
}
}
const BYTE_MIN_EXCESS: [i8; 256] = {
let mut table = [0i8; 256];
let mut byte_val: u16 = 0;
while byte_val < 256 {
let mut excess: i8 = 0;
let mut min_e: i8 = 0;
let mut bit = 0;
while bit < 8 {
if (byte_val >> bit) & 1 == 1 {
excess += 1;
} else {
excess -= 1;
if excess < min_e {
min_e = excess;
}
}
bit += 1;
}
table[byte_val as usize] = min_e;
byte_val += 1;
}
table
};
const BYTE_MAX_EXCESS_REV: [i8; 256] = {
let mut table = [0i8; 256];
let mut byte_val: u16 = 0;
while byte_val < 256 {
let mut excess: i8 = 0;
let mut max_e: i8 = 0;
let mut bit: i8 = 7;
while bit >= 0 {
if (byte_val >> bit) & 1 == 1 {
excess += 1;
if excess > max_e {
max_e = excess;
}
} else {
excess -= 1;
}
bit -= 1;
}
table[byte_val as usize] = max_e;
byte_val += 1;
}
table
};
const BYTE_TOTAL_EXCESS: [i8; 256] = {
let mut table = [0i8; 256];
let mut byte_val: u16 = 0;
while byte_val < 256 {
let mut ones: i8 = 0;
let mut tmp = byte_val;
while tmp != 0 {
ones += (tmp & 1) as i8;
tmp >>= 1;
}
table[byte_val as usize] = 2 * ones - 8;
byte_val += 1;
}
table
};
const BYTE_FIND_CLOSE: [[u8; 16]; 256] = {
let mut table = [[8u8; 16]; 256];
let mut byte_val: usize = 0;
while byte_val < 256 {
let mut init_excess: usize = 0;
while init_excess < 16 {
let starting_excess = (init_excess + 1) as i8; let mut excess = starting_excess;
let mut bit = 0;
while bit < 8 {
if (byte_val >> bit) & 1 == 1 {
excess += 1;
} else {
excess -= 1;
if excess == 0 {
table[byte_val][init_excess] = bit as u8;
break;
}
}
bit += 1;
}
init_excess += 1;
}
byte_val += 1;
}
table
};
#[inline]
fn word_min_excess_unrolled(word: u64) -> (i8, i16) {
let bytes = word.to_le_bytes();
let b0 = bytes[0] as usize;
let b1 = bytes[1] as usize;
let b2 = bytes[2] as usize;
let b3 = bytes[3] as usize;
let b4 = bytes[4] as usize;
let b5 = bytes[5] as usize;
let b6 = bytes[6] as usize;
let b7 = bytes[7] as usize;
let m0 = BYTE_MIN_EXCESS[b0] as i16;
let m1 = BYTE_MIN_EXCESS[b1] as i16;
let m2 = BYTE_MIN_EXCESS[b2] as i16;
let m3 = BYTE_MIN_EXCESS[b3] as i16;
let m4 = BYTE_MIN_EXCESS[b4] as i16;
let m5 = BYTE_MIN_EXCESS[b5] as i16;
let m6 = BYTE_MIN_EXCESS[b6] as i16;
let m7 = BYTE_MIN_EXCESS[b7] as i16;
let t0 = BYTE_TOTAL_EXCESS[b0] as i16;
let t1 = BYTE_TOTAL_EXCESS[b1] as i16;
let t2 = BYTE_TOTAL_EXCESS[b2] as i16;
let t3 = BYTE_TOTAL_EXCESS[b3] as i16;
let t4 = BYTE_TOTAL_EXCESS[b4] as i16;
let t5 = BYTE_TOTAL_EXCESS[b5] as i16;
let t6 = BYTE_TOTAL_EXCESS[b6] as i16;
let t7 = BYTE_TOTAL_EXCESS[b7] as i16;
let _p0: i16 = 0;
let p1 = t0;
let p2 = p1 + t1;
let p3 = p2 + t2;
let p4 = p3 + t3;
let p5 = p4 + t4;
let p6 = p5 + t5;
let p7 = p6 + t6;
let total = p7 + t7;
let mut global_min = m0; global_min = global_min.min(p1 + m1);
global_min = global_min.min(p2 + m2);
global_min = global_min.min(p3 + m3);
global_min = global_min.min(p4 + m4);
global_min = global_min.min(p5 + m5);
global_min = global_min.min(p6 + m6);
global_min = global_min.min(p7 + m7);
let min_clamped = global_min.clamp(-128, 127) as i8;
(min_clamped, total)
}
fn build_l0_index(words: &[u64], len: usize, num_words: usize) -> (Vec<i8>, Vec<i16>) {
let mut l0_min_excess = Vec::with_capacity(num_words);
let mut l0_word_excess = Vec::with_capacity(num_words);
let full_words = if len % 64 == 0 {
num_words
} else {
num_words - 1
};
for &word in words.iter().take(full_words) {
let (min_e, total_e) = word_min_excess_unrolled(word);
l0_min_excess.push(min_e);
l0_word_excess.push(total_e);
}
if full_words < num_words {
let valid_bits = len % 64;
let (min_e, total_e) = word_min_excess(words[num_words - 1], valid_bits);
l0_min_excess.push(min_e);
l0_word_excess.push(total_e);
}
(l0_min_excess, l0_word_excess)
}
#[cfg(all(feature = "simd", target_arch = "aarch64"))]
fn build_l1_index_neon(
l0_min_excess: &[i8],
l0_word_excess: &[i16],
num_l1: usize,
) -> (Vec<i16>, Vec<i16>) {
use core::arch::aarch64::*;
let mut l1_min_excess = Vec::with_capacity(num_l1);
let mut l1_block_excess = Vec::with_capacity(num_l1);
let num_words = l0_min_excess.len();
for block_idx in 0..num_l1 {
let start = block_idx * FACTOR_L1;
let end = (start + FACTOR_L1).min(num_words);
let mut block_min: i16 = 0;
let mut running_excess: i16 = 0;
let mut i = start;
while i + 8 <= end {
unsafe {
let min_ptr = l0_min_excess.as_ptr().add(i);
let min_i8 = vld1_s8(min_ptr);
let min_lo = vmovl_s8(min_i8);
let excess_ptr = l0_word_excess.as_ptr().add(i);
let excess = vld1q_s16(excess_ptr);
let shifted1 = vextq_s16(vdupq_n_s16(0), excess, 7); let sum1 = vaddq_s16(excess, shifted1);
let shifted2 = vextq_s16(vdupq_n_s16(0), sum1, 6); let sum2 = vaddq_s16(sum1, shifted2);
let shifted4 = vextq_s16(vdupq_n_s16(0), sum2, 4); let prefix_sum = vaddq_s16(sum2, shifted4);
let offset = vdupq_n_s16(running_excess);
let adjusted_prefix = vaddq_s16(prefix_sum, offset);
let exclusive_prefix = vextq_s16(offset, adjusted_prefix, 7);
let adjusted_min = vaddq_s16(min_lo, exclusive_prefix);
let chunk_min = vminvq_s16(adjusted_min);
block_min = block_min.min(chunk_min);
running_excess += vaddvq_s16(excess);
}
i += 8;
}
while i < end {
let word_min = l0_min_excess[i] as i16;
let word_excess = l0_word_excess[i];
block_min = block_min.min(running_excess + word_min);
running_excess += word_excess;
i += 1;
}
l1_min_excess.push(block_min);
l1_block_excess.push(running_excess);
}
(l1_min_excess, l1_block_excess)
}
#[cfg(all(feature = "simd", target_arch = "aarch64"))]
fn build_l2_index_neon(
l1_min_excess: &[i16],
l1_block_excess: &[i16],
num_l2: usize,
) -> (Vec<i16>, Vec<i16>) {
use core::arch::aarch64::*;
let mut l2_min_excess = Vec::with_capacity(num_l2);
let mut l2_block_excess = Vec::with_capacity(num_l2);
let num_l1 = l1_min_excess.len();
for block_idx in 0..num_l2 {
let start = block_idx * FACTOR_L2;
let end = (start + FACTOR_L2).min(num_l1);
let mut block_min: i16 = 0;
let mut running_excess: i16 = 0;
let mut i = start;
while i + 8 <= end {
unsafe {
let min_ptr = l1_min_excess.as_ptr().add(i);
let min_vals = vld1q_s16(min_ptr);
let excess_ptr = l1_block_excess.as_ptr().add(i);
let excess = vld1q_s16(excess_ptr);
let shifted1 = vextq_s16(vdupq_n_s16(0), excess, 7);
let sum1 = vaddq_s16(excess, shifted1);
let shifted2 = vextq_s16(vdupq_n_s16(0), sum1, 6);
let sum2 = vaddq_s16(sum1, shifted2);
let shifted4 = vextq_s16(vdupq_n_s16(0), sum2, 4);
let prefix_sum = vaddq_s16(sum2, shifted4);
let offset = vdupq_n_s16(running_excess);
let adjusted_prefix = vaddq_s16(prefix_sum, offset);
let exclusive_prefix = vextq_s16(offset, adjusted_prefix, 7);
let adjusted_min = vaddq_s16(min_vals, exclusive_prefix);
let chunk_min = vminvq_s16(adjusted_min);
block_min = block_min.min(chunk_min);
running_excess += vaddvq_s16(excess);
}
i += 8;
}
while i < end {
let l1_min = l1_min_excess[i];
let l1_excess = l1_block_excess[i];
block_min = block_min.min(running_excess + l1_min);
running_excess += l1_excess;
i += 1;
}
l2_min_excess.push(block_min);
l2_block_excess.push(running_excess);
}
(l2_min_excess, l2_block_excess)
}
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
#[target_feature(enable = "sse4.1")]
unsafe fn build_l1_index_sse41_impl(
l0_min_excess: &[i8],
l0_word_excess: &[i16],
num_l1: usize,
) -> (Vec<i16>, Vec<i16>) {
use core::arch::x86_64::*;
let mut l1_min_excess = Vec::with_capacity(num_l1);
let mut l1_block_excess = Vec::with_capacity(num_l1);
let num_words = l0_min_excess.len();
let bias = _mm_set1_epi16(i16::MIN);
for block_idx in 0..num_l1 {
let start = block_idx * FACTOR_L1;
let end = (start + FACTOR_L1).min(num_words);
let mut block_min: i16 = 0;
let mut running_excess: i16 = 0;
let mut i = start;
while i + 8 <= end {
let min_ptr = l0_min_excess.as_ptr().add(i);
let min_i8 = _mm_loadl_epi64(min_ptr as *const __m128i); let min_lo = _mm_cvtepi8_epi16(min_i8);
let excess_ptr = l0_word_excess.as_ptr().add(i);
let excess = _mm_loadu_si128(excess_ptr as *const __m128i);
let shifted1 = _mm_slli_si128(excess, 2); let sum1 = _mm_add_epi16(excess, shifted1);
let shifted2 = _mm_slli_si128(sum1, 4); let sum2 = _mm_add_epi16(sum1, shifted2);
let shifted4 = _mm_slli_si128(sum2, 8); let prefix_sum = _mm_add_epi16(sum2, shifted4);
let offset = _mm_set1_epi16(running_excess);
let adjusted_prefix = _mm_add_epi16(prefix_sum, offset);
let exclusive_prefix = _mm_slli_si128(adjusted_prefix, 2);
let exclusive_prefix = _mm_insert_epi16(exclusive_prefix, running_excess as i32, 0);
let adjusted_min = _mm_add_epi16(min_lo, exclusive_prefix);
let biased = _mm_add_epi16(adjusted_min, bias);
let minpos = _mm_minpos_epu16(biased);
let biased_min = _mm_extract_epi16(minpos, 0) as i16;
let chunk_min = biased_min.wrapping_add(i16::MIN);
block_min = block_min.min(chunk_min);
let sum_lo = _mm_add_epi16(excess, _mm_srli_si128(excess, 8)); let sum_lo = _mm_add_epi16(sum_lo, _mm_srli_si128(sum_lo, 4)); let sum_lo = _mm_add_epi16(sum_lo, _mm_srli_si128(sum_lo, 2)); running_excess += _mm_extract_epi16(sum_lo, 0) as i16;
i += 8;
}
while i < end {
let word_min = l0_min_excess[i] as i16;
let word_excess = l0_word_excess[i];
block_min = block_min.min(running_excess + word_min);
running_excess += word_excess;
i += 1;
}
l1_min_excess.push(block_min);
l1_block_excess.push(running_excess);
}
(l1_min_excess, l1_block_excess)
}
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
fn build_l1_index_sse41(
l0_min_excess: &[i8],
l0_word_excess: &[i16],
num_l1: usize,
) -> (Vec<i16>, Vec<i16>) {
unsafe { build_l1_index_sse41_impl(l0_min_excess, l0_word_excess, num_l1) }
}
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
#[target_feature(enable = "sse4.1")]
unsafe fn build_l2_index_sse41_impl(
l1_min_excess: &[i16],
l1_block_excess: &[i16],
num_l2: usize,
) -> (Vec<i16>, Vec<i16>) {
use core::arch::x86_64::*;
let mut l2_min_excess = Vec::with_capacity(num_l2);
let mut l2_block_excess = Vec::with_capacity(num_l2);
let num_l1 = l1_min_excess.len();
let bias = _mm_set1_epi16(i16::MIN);
for block_idx in 0..num_l2 {
let start = block_idx * FACTOR_L2;
let end = (start + FACTOR_L2).min(num_l1);
let mut block_min: i16 = 0;
let mut running_excess: i16 = 0;
let mut i = start;
while i + 8 <= end {
let min_ptr = l1_min_excess.as_ptr().add(i);
let min_vals = _mm_loadu_si128(min_ptr as *const __m128i);
let excess_ptr = l1_block_excess.as_ptr().add(i);
let excess = _mm_loadu_si128(excess_ptr as *const __m128i);
let shifted1 = _mm_slli_si128(excess, 2);
let sum1 = _mm_add_epi16(excess, shifted1);
let shifted2 = _mm_slli_si128(sum1, 4);
let sum2 = _mm_add_epi16(sum1, shifted2);
let shifted4 = _mm_slli_si128(sum2, 8);
let prefix_sum = _mm_add_epi16(sum2, shifted4);
let offset = _mm_set1_epi16(running_excess);
let adjusted_prefix = _mm_add_epi16(prefix_sum, offset);
let exclusive_prefix = _mm_slli_si128(adjusted_prefix, 2);
let exclusive_prefix = _mm_insert_epi16(exclusive_prefix, running_excess as i32, 0);
let adjusted_min = _mm_add_epi16(min_vals, exclusive_prefix);
let biased = _mm_add_epi16(adjusted_min, bias);
let minpos = _mm_minpos_epu16(biased);
let biased_min = _mm_extract_epi16(minpos, 0) as i16;
let chunk_min = biased_min.wrapping_add(i16::MIN);
block_min = block_min.min(chunk_min);
let sum_lo = _mm_add_epi16(excess, _mm_srli_si128(excess, 8));
let sum_lo = _mm_add_epi16(sum_lo, _mm_srli_si128(sum_lo, 4));
let sum_lo = _mm_add_epi16(sum_lo, _mm_srli_si128(sum_lo, 2));
running_excess += _mm_extract_epi16(sum_lo, 0) as i16;
i += 8;
}
while i < end {
let l1_min = l1_min_excess[i];
let l1_excess = l1_block_excess[i];
block_min = block_min.min(running_excess + l1_min);
running_excess += l1_excess;
i += 1;
}
l2_min_excess.push(block_min);
l2_block_excess.push(running_excess);
}
(l2_min_excess, l2_block_excess)
}
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
fn build_l2_index_sse41(
l1_min_excess: &[i16],
l1_block_excess: &[i16],
num_l2: usize,
) -> (Vec<i16>, Vec<i16>) {
unsafe { build_l2_index_sse41_impl(l1_min_excess, l1_block_excess, num_l2) }
}
#[cfg(test)]
#[allow(dead_code)]
fn build_l1_index_scalar(
l0_min_excess: &[i8],
l0_word_excess: &[i16],
num_l1: usize,
) -> (Vec<i16>, Vec<i16>) {
let mut l1_min_excess = Vec::with_capacity(num_l1);
let mut l1_block_excess = Vec::with_capacity(num_l1);
let num_words = l0_min_excess.len();
for block_idx in 0..num_l1 {
let start = block_idx * FACTOR_L1;
let end = (start + FACTOR_L1).min(num_words);
let mut block_min: i16 = 0;
let mut running_excess: i16 = 0;
for i in start..end {
let word_min = l0_min_excess[i] as i16;
let word_excess = l0_word_excess[i];
block_min = block_min.min(running_excess + word_min);
running_excess += word_excess;
}
l1_min_excess.push(block_min);
l1_block_excess.push(running_excess);
}
(l1_min_excess, l1_block_excess)
}
#[cfg(test)]
#[allow(dead_code)]
fn build_l2_index_scalar(
l1_min_excess: &[i16],
l1_block_excess: &[i16],
num_l2: usize,
) -> (Vec<i16>, Vec<i16>) {
let mut l2_min_excess = Vec::with_capacity(num_l2);
let mut l2_block_excess = Vec::with_capacity(num_l2);
let num_l1 = l1_min_excess.len();
for block_idx in 0..num_l2 {
let start = block_idx * FACTOR_L2;
let end = (start + FACTOR_L2).min(num_l1);
let mut block_min: i16 = 0;
let mut running_excess: i16 = 0;
for i in start..end {
let l1_min = l1_min_excess[i];
let l1_excess = l1_block_excess[i];
block_min = block_min.min(running_excess + l1_min);
running_excess += l1_excess;
}
l2_min_excess.push(block_min);
l2_block_excess.push(running_excess);
}
(l2_min_excess, l2_block_excess)
}
#[inline]
fn find_close_in_word_fast(
word: u64,
start_bit: usize,
initial_excess: i32,
valid_bits: usize,
) -> Option<usize> {
if start_bit >= valid_bits || initial_excess <= 0 {
return None;
}
let bytes = word.to_le_bytes();
let mut pos = start_bit;
let mut excess = initial_excess;
let first_byte_idx = pos / 8;
let bit_in_byte = pos % 8;
if bit_in_byte != 0 {
let byte_val = bytes[first_byte_idx];
let end_bit = 8.min(valid_bits - first_byte_idx * 8);
for bit in bit_in_byte..end_bit {
if (byte_val >> bit) & 1 == 1 {
excess += 1;
} else {
excess -= 1;
if excess == 0 {
return Some(first_byte_idx * 8 + bit);
}
}
}
pos = (first_byte_idx + 1) * 8;
}
while pos + 8 <= valid_bits && excess > 0 {
let byte_idx = pos / 8;
let byte_val = bytes[byte_idx] as usize;
let min_excess_in_byte = BYTE_MIN_EXCESS[byte_val] as i32;
if excess + min_excess_in_byte <= 0 {
if excess <= 16 {
let lookup_idx = (excess - 1) as usize;
let match_pos = BYTE_FIND_CLOSE[byte_val][lookup_idx];
if match_pos < 8 {
return Some(pos + match_pos as usize);
}
}
let mut e = excess;
for bit in 0..8 {
if (byte_val >> bit) & 1 == 1 {
e += 1;
} else {
e -= 1;
if e == 0 {
return Some(pos + bit);
}
}
}
}
excess += BYTE_TOTAL_EXCESS[byte_val] as i32;
pos += 8;
}
if pos < valid_bits && excess > 0 {
let byte_idx = pos / 8;
if byte_idx < 8 {
let byte_val = bytes[byte_idx];
let end_bit = valid_bits - pos;
for bit in 0..end_bit {
if (byte_val >> bit) & 1 == 1 {
excess += 1;
} else {
excess -= 1;
if excess == 0 {
return Some(pos + bit);
}
}
}
}
}
None
}
#[inline]
pub fn find_unmatched_close_in_word(x: u64) -> u32 {
let mut excess: i32 = 0;
for bit in 0..64 {
if (x >> bit) & 1 == 1 {
excess += 1; } else {
excess -= 1; if excess < 0 {
return bit;
}
}
}
64 }
#[inline]
pub fn find_close_in_word(word: u64, p: u32) -> Option<u32> {
if p >= 64 {
return None;
}
let shifted = word >> p;
if shifted & 1 == 0 {
return Some(p);
}
let after_open = shifted >> 1;
let remaining_bits = 63 - p;
if remaining_bits == 0 {
return None;
}
let result = find_unmatched_close_in_word(after_open);
if result < remaining_bits {
Some(p + 1 + result)
} else {
None }
}
pub fn find_close(words: &[u64], len: usize, p: usize) -> Option<usize> {
if p >= len || words.is_empty() {
return None;
}
let word_idx = p / 64;
let bit_idx = (p % 64) as u32;
if (words[word_idx] >> bit_idx) & 1 == 0 {
return None; }
if let Some(local) = find_close_in_word(words[word_idx], bit_idx) {
let result = word_idx * 64 + local as usize;
if result < len {
return Some(result);
}
}
let first_word_part = words[word_idx] >> bit_idx;
let first_word_bits = (64 - bit_idx) as usize;
let first_word_ones = first_word_part.count_ones() as i32;
let mut excess = 2 * first_word_ones - first_word_bits as i32;
for (i, &word) in words[word_idx + 1..].iter().enumerate() {
let actual_word_idx = word_idx + 1 + i;
let word_bits = if actual_word_idx * 64 + 64 <= len {
64
} else {
len - actual_word_idx * 64
};
let masked_word = if word_bits == 64 {
word
} else {
word & ((1u64 << word_bits) - 1)
};
let (word_min, word_excess) = word_min_excess_i32(masked_word, word_bits);
if excess + word_min <= 0 {
let mut bit_excess = excess;
for bit in 0..word_bits {
if (masked_word >> bit) & 1 == 1 {
bit_excess += 1;
} else {
bit_excess -= 1;
if bit_excess == 0 {
return Some(actual_word_idx * 64 + bit);
}
}
}
}
excess += word_excess;
if actual_word_idx * 64 >= len {
break;
}
}
None
}
fn word_min_excess_i32(word: u64, valid_bits: usize) -> (i32, i32) {
if valid_bits == 0 {
return (0, 0);
}
let full_bytes = valid_bits / 8;
let remaining_bits = valid_bits % 8;
let mut running_excess: i32 = 0;
let mut global_min: i32 = 0;
let bytes = word.to_le_bytes();
for byte in bytes.iter().take(full_bytes) {
let byte_val = *byte as usize;
let byte_min = running_excess + BYTE_MIN_EXCESS[byte_val] as i32;
global_min = global_min.min(byte_min);
running_excess += BYTE_TOTAL_EXCESS[byte_val] as i32;
}
if remaining_bits > 0 {
let byte_val = bytes[full_bytes];
let mut excess = running_excess;
for bit in 0..remaining_bits {
if (byte_val >> bit) & 1 == 1 {
excess += 1;
} else {
excess -= 1;
global_min = global_min.min(excess);
}
}
running_excess = excess;
}
(global_min, running_excess)
}
fn word_min_excess(word: u64, valid_bits: usize) -> (i8, i16) {
if valid_bits == 0 {
return (0, 0);
}
let full_bytes = valid_bits / 8;
let remaining_bits = valid_bits % 8;
let mut running_excess: i16 = 0;
let mut global_min: i16 = 0;
let bytes = word.to_le_bytes();
for byte in bytes.iter().take(full_bytes) {
let byte_val = *byte as usize;
let byte_min = running_excess + BYTE_MIN_EXCESS[byte_val] as i16;
global_min = global_min.min(byte_min);
running_excess += BYTE_TOTAL_EXCESS[byte_val] as i16;
}
if remaining_bits > 0 {
let byte_val = bytes[full_bytes];
let mut excess = running_excess;
for bit in 0..remaining_bits {
if (byte_val >> bit) & 1 == 1 {
excess += 1;
} else {
excess -= 1;
global_min = global_min.min(excess);
}
}
running_excess = excess;
}
let min_clamped = global_min.clamp(-128, 127) as i8;
(min_clamped, running_excess)
}
fn word_max_excess_rev(word: u64) -> (i32, i32) {
let bytes = word.to_le_bytes();
let mut running_excess: i32 = 0;
let mut global_max: i32 = 0;
for byte in bytes.iter().rev() {
let byte_val = *byte as usize;
let byte_max = running_excess + BYTE_MAX_EXCESS_REV[byte_val] as i32;
global_max = global_max.max(byte_max);
running_excess += BYTE_TOTAL_EXCESS[byte_val] as i32;
}
(global_max, running_excess)
}
pub fn find_open(words: &[u64], len: usize, p: usize) -> Option<usize> {
if p >= len || words.is_empty() {
return None;
}
let word_idx = p / 64;
let bit_idx = p % 64;
if (words[word_idx] >> bit_idx) & 1 == 1 {
return None; }
let mut excess: i32 = -1;
for bit in (0..bit_idx).rev() {
if (words[word_idx] >> bit) & 1 == 1 {
excess += 1; if excess == 0 {
return Some(word_idx * 64 + bit);
}
} else {
excess -= 1; }
}
for word_idx in (0..word_idx).rev() {
let word = words[word_idx];
for bit in (0..64).rev() {
if (word >> bit) & 1 == 1 {
excess += 1;
if excess == 0 {
return Some(word_idx * 64 + bit);
}
} else {
excess -= 1;
}
}
}
None
}
pub fn enclose(words: &[u64], len: usize, p: usize) -> Option<usize> {
if p == 0 || p >= len || words.is_empty() {
return None;
}
let word_idx = p / 64;
let bit_idx = p % 64;
if (words[word_idx] >> bit_idx) & 1 == 0 {
return None; }
let mut excess: i32 = 0;
let start_bit = if bit_idx > 0 { bit_idx - 1 } else { 63 };
let start_word = if bit_idx > 0 {
word_idx
} else {
word_idx.saturating_sub(1)
};
if word_idx == 0 && bit_idx == 0 {
return None; }
let first_word = words[start_word];
for bit in (0..=start_bit).rev() {
if (first_word >> bit) & 1 == 1 {
excess += 1; if excess == 1 {
return Some(start_word * 64 + bit);
}
} else {
excess -= 1; }
}
for word_idx in (0..start_word).rev() {
let word = words[word_idx];
let (max_excess_in_word, word_excess) = word_max_excess_rev(word);
if excess + max_excess_in_word >= 1 {
for bit in (0..64).rev() {
if (word >> bit) & 1 == 1 {
excess += 1;
if excess == 1 {
return Some(word_idx * 64 + bit);
}
} else {
excess -= 1;
}
}
} else {
excess += word_excess;
}
}
None
}
const WORDS_PER_RANK_BLOCK: usize = 8;
const FACTOR_L1: usize = 32;
const FACTOR_L2: usize = 32;
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct BalancedParens<W = Vec<u64>, S: SelectSupport = NoSelect> {
words: W,
len: usize,
total_ones: usize,
l0_min_excess: Vec<i8>,
l0_word_excess: Vec<i16>,
l1_min_excess: Vec<i16>,
l1_block_excess: Vec<i16>,
l2_min_excess: Vec<i16>,
l2_block_excess: Vec<i16>,
rank_l1: Vec<u32>,
rank_l2: Vec<u64>,
select: S,
}
#[allow(clippy::type_complexity)]
fn build_bp_index(
words: &[u64],
len: usize,
) -> (
Vec<i8>,
Vec<i16>,
Vec<i16>,
Vec<i16>,
Vec<i16>,
Vec<i16>,
Vec<u32>,
Vec<u64>,
usize,
) {
if words.is_empty() || len == 0 {
return (
Vec::new(),
Vec::new(),
Vec::new(),
Vec::new(),
Vec::new(),
Vec::new(),
Vec::new(),
Vec::new(),
0,
);
}
let num_words = words.len();
let num_l1 = num_words.div_ceil(FACTOR_L1);
let num_l2 = num_l1.div_ceil(FACTOR_L2);
let num_rank_blocks = num_words.div_ceil(WORDS_PER_RANK_BLOCK);
let (l0_min_excess, l0_word_excess) = build_l0_index(words, len, num_words);
#[cfg(all(feature = "simd", target_arch = "aarch64"))]
let (l1_min_excess, l1_block_excess) =
build_l1_index_neon(&l0_min_excess, &l0_word_excess, num_l1);
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
let (l1_min_excess, l1_block_excess) = {
if is_x86_feature_detected!("sse4.1") {
build_l1_index_sse41(&l0_min_excess, &l0_word_excess, num_l1)
} else {
let mut l1_min_excess = Vec::with_capacity(num_l1);
let mut l1_block_excess = Vec::with_capacity(num_l1);
for block_idx in 0..num_l1 {
let start = block_idx * FACTOR_L1;
let end = (start + FACTOR_L1).min(num_words);
let mut block_min: i16 = 0;
let mut running_excess: i16 = 0;
for i in start..end {
let word_min = l0_min_excess[i] as i16;
let word_excess = l0_word_excess[i];
block_min = block_min.min(running_excess + word_min);
running_excess += word_excess;
}
l1_min_excess.push(block_min);
l1_block_excess.push(running_excess);
}
(l1_min_excess, l1_block_excess)
}
};
#[cfg(not(any(
all(feature = "simd", target_arch = "aarch64"),
all(feature = "simd", target_arch = "x86_64")
)))]
let (l1_min_excess, l1_block_excess) = {
let mut l1_min_excess = Vec::with_capacity(num_l1);
let mut l1_block_excess = Vec::with_capacity(num_l1);
for block_idx in 0..num_l1 {
let start = block_idx * FACTOR_L1;
let end = (start + FACTOR_L1).min(num_words);
let mut block_min: i16 = 0;
let mut running_excess: i16 = 0;
for i in start..end {
let word_min = l0_min_excess[i] as i16;
let word_excess = l0_word_excess[i];
block_min = block_min.min(running_excess + word_min);
running_excess += word_excess;
}
l1_min_excess.push(block_min);
l1_block_excess.push(running_excess);
}
(l1_min_excess, l1_block_excess)
};
#[cfg(all(feature = "simd", target_arch = "aarch64"))]
let (l2_min_excess, l2_block_excess) =
build_l2_index_neon(&l1_min_excess, &l1_block_excess, num_l2);
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
let (l2_min_excess, l2_block_excess) = {
if is_x86_feature_detected!("sse4.1") {
build_l2_index_sse41(&l1_min_excess, &l1_block_excess, num_l2)
} else {
let mut l2_min_excess = Vec::with_capacity(num_l2);
let mut l2_block_excess = Vec::with_capacity(num_l2);
for block_idx in 0..num_l2 {
let start = block_idx * FACTOR_L2;
let end = (start + FACTOR_L2).min(num_l1);
let mut block_min: i16 = 0;
let mut running_excess: i16 = 0;
for i in start..end {
let l1_min = l1_min_excess[i];
let l1_excess = l1_block_excess[i];
block_min = block_min.min(running_excess + l1_min);
running_excess += l1_excess;
}
l2_min_excess.push(block_min);
l2_block_excess.push(running_excess);
}
(l2_min_excess, l2_block_excess)
}
};
#[cfg(not(any(
all(feature = "simd", target_arch = "aarch64"),
all(feature = "simd", target_arch = "x86_64")
)))]
let (l2_min_excess, l2_block_excess) = {
let mut l2_min_excess = Vec::with_capacity(num_l2);
let mut l2_block_excess = Vec::with_capacity(num_l2);
for block_idx in 0..num_l2 {
let start = block_idx * FACTOR_L2;
let end = (start + FACTOR_L2).min(num_l1);
let mut block_min: i16 = 0;
let mut running_excess: i16 = 0;
for i in start..end {
let l1_min = l1_min_excess[i];
let l1_excess = l1_block_excess[i];
block_min = block_min.min(running_excess + l1_min);
running_excess += l1_excess;
}
l2_min_excess.push(block_min);
l2_block_excess.push(running_excess);
}
(l2_min_excess, l2_block_excess)
};
let mut rank_l1 = Vec::with_capacity(num_rank_blocks);
let mut rank_l2 = Vec::with_capacity(num_rank_blocks);
let mut cumulative_rank: u64 = 0;
for block_idx in 0..num_rank_blocks {
let block_start = block_idx * WORDS_PER_RANK_BLOCK;
let block_end = (block_start + WORDS_PER_RANK_BLOCK).min(num_words);
rank_l1.push(cumulative_rank as u32);
let mut l2_packed: u64 = 0;
let mut block_cumulative: u16 = 0;
for (i, word_idx) in (block_start..block_end).enumerate() {
if i > 0 && i < 8 {
l2_packed |= (block_cumulative as u64) << ((i - 1) * 9);
}
block_cumulative += words[word_idx].count_ones() as u16;
}
rank_l2.push(l2_packed);
cumulative_rank += block_cumulative as u64;
}
(
l0_min_excess,
l0_word_excess,
l1_min_excess,
l1_block_excess,
l2_min_excess,
l2_block_excess,
rank_l1,
rank_l2,
cumulative_rank as usize,
)
}
impl BalancedParens<Vec<u64>, NoSelect> {
pub fn new(words: Vec<u64>, len: usize) -> Self {
let (
l0_min_excess,
l0_word_excess,
l1_min_excess,
l1_block_excess,
l2_min_excess,
l2_block_excess,
rank_l1,
rank_l2,
total_ones,
) = build_bp_index(&words, len);
Self {
words,
len,
total_ones,
l0_min_excess,
l0_word_excess,
l1_min_excess,
l1_block_excess,
l2_min_excess,
l2_block_excess,
rank_l1,
rank_l2,
select: NoSelect,
}
}
}
impl BalancedParens<Vec<u64>, WithSelect> {
pub fn new_with_select(words: Vec<u64>, len: usize) -> Self {
let (
l0_min_excess,
l0_word_excess,
l1_min_excess,
l1_block_excess,
l2_min_excess,
l2_block_excess,
rank_l1,
rank_l2,
total_ones,
) = build_bp_index(&words, len);
let select = WithSelect::build(&words, total_ones);
Self {
words,
len,
total_ones,
l0_min_excess,
l0_word_excess,
l1_min_excess,
l1_block_excess,
l2_min_excess,
l2_block_excess,
rank_l1,
rank_l2,
select,
}
}
}
impl<W: AsRef<[u64]>> BalancedParens<W, NoSelect> {
pub fn from_words(words: W, len: usize) -> Self {
let (
l0_min_excess,
l0_word_excess,
l1_min_excess,
l1_block_excess,
l2_min_excess,
l2_block_excess,
rank_l1,
rank_l2,
total_ones,
) = build_bp_index(words.as_ref(), len);
Self {
words,
len,
total_ones,
l0_min_excess,
l0_word_excess,
l1_min_excess,
l1_block_excess,
l2_min_excess,
l2_block_excess,
rank_l1,
rank_l2,
select: NoSelect,
}
}
}
impl<W: AsRef<[u64]>> BalancedParens<W, WithSelect> {
pub fn from_words_with_select(words: W, len: usize) -> Self {
let (
l0_min_excess,
l0_word_excess,
l1_min_excess,
l1_block_excess,
l2_min_excess,
l2_block_excess,
rank_l1,
rank_l2,
total_ones,
) = build_bp_index(words.as_ref(), len);
let select = WithSelect::build(words.as_ref(), total_ones);
Self {
words,
len,
total_ones,
l0_min_excess,
l0_word_excess,
l1_min_excess,
l1_block_excess,
l2_min_excess,
l2_block_excess,
rank_l1,
rank_l2,
select,
}
}
}
impl<W: AsRef<[u64]>, S: SelectSupport> BalancedParens<W, S> {
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
#[inline]
pub fn words(&self) -> &[u64] {
self.words.as_ref()
}
#[inline]
pub fn total_ones(&self) -> usize {
self.total_ones
}
#[inline]
pub fn select1(&self, k: usize) -> Option<usize> {
self.select
.select1(self.words.as_ref(), self.len, self.total_ones, k)
}
#[inline]
pub fn is_open(&self, p: usize) -> bool {
if p >= self.len {
return false;
}
let words = self.words.as_ref();
let word_idx = p / 64;
let bit_idx = p % 64;
(words[word_idx] >> bit_idx) & 1 == 1
}
#[inline]
pub fn is_close(&self, p: usize) -> bool {
if p >= self.len {
return false;
}
!self.is_open(p)
}
#[inline]
pub fn rank1(&self, p: usize) -> usize {
if p == 0 {
return 0;
}
let p = p.min(self.len);
let words = self.words.as_ref();
let word_idx = p / 64;
let bit_idx = p % 64;
if word_idx >= words.len() {
return self.rank1_slow(p);
}
let block_idx = word_idx / WORDS_PER_RANK_BLOCK;
let word_in_block = word_idx % WORDS_PER_RANK_BLOCK;
let l1_rank = if block_idx < self.rank_l1.len() {
self.rank_l1[block_idx] as usize
} else {
return self.rank1_slow(p);
};
let l2_rank = if word_in_block == 0 {
0
} else if block_idx < self.rank_l2.len() {
let l2_packed = self.rank_l2[block_idx];
let l2_idx = word_in_block - 1;
((l2_packed >> (l2_idx * 9)) & 0x1FF) as usize
} else {
return self.rank1_slow(p);
};
let partial = if bit_idx > 0 {
let word = words[word_idx];
let mask = (1u64 << bit_idx) - 1;
(word & mask).count_ones() as usize
} else {
0
};
l1_rank + l2_rank + partial
}
fn rank1_slow(&self, p: usize) -> usize {
let words = self.words.as_ref();
let word_idx = p / 64;
let bit_idx = p % 64;
let mut count = 0usize;
for word in words.iter().take(word_idx) {
count += word.count_ones() as usize;
}
if bit_idx > 0 && word_idx < words.len() {
let mask = (1u64 << bit_idx) - 1;
count += (words[word_idx] & mask).count_ones() as usize;
}
count
}
#[inline]
pub fn excess(&self, p: usize) -> i32 {
if p >= self.len {
return 0;
}
let ones = self.rank1(p + 1) as i32;
let total = (p + 1) as i32;
2 * ones - total
}
pub fn find_close(&self, p: usize) -> Option<usize> {
if p >= self.len || self.is_close(p) {
return None;
}
self.find_close_from(p + 1, 1)
}
fn find_close_from(&self, start_pos: usize, initial_excess: i32) -> Option<usize> {
if start_pos >= self.len {
return None;
}
#[derive(Clone, Copy, Debug)]
enum State {
ScanWord,
CheckL0,
CheckL1,
CheckL2,
FromL0,
FromL1,
FromL2,
}
let words = self.words.as_ref();
let mut state = State::FromL0;
let mut excess = initial_excess;
let mut pos = start_pos;
loop {
match state {
State::ScanWord => {
if pos >= self.len {
return None;
}
let word_idx = pos / 64;
let bit_idx = pos % 64;
let word = words[word_idx];
let valid_bits = if word_idx * 64 + 64 <= self.len {
64
} else {
self.len - word_idx * 64
};
if let Some(match_bit) =
find_close_in_word_fast(word, bit_idx, excess, valid_bits)
{
return Some(word_idx * 64 + match_bit);
}
let remaining_word = word >> bit_idx;
let remaining_bits = valid_bits - bit_idx;
let ones = if remaining_bits == 64 {
remaining_word.count_ones() as i32
} else {
(remaining_word & ((1u64 << remaining_bits) - 1)).count_ones() as i32
};
excess += 2 * ones - remaining_bits as i32;
pos = (word_idx + 1) * 64;
state = State::FromL0;
}
State::CheckL0 => {
debug_assert!(pos % 64 == 0);
let word_idx = pos / 64;
if word_idx >= self.l0_min_excess.len() {
return None;
}
let min_e = self.l0_min_excess[word_idx] as i32;
if excess + min_e <= 0 {
state = State::ScanWord;
} else {
let word_excess = self.l0_word_excess[word_idx] as i32;
excess += word_excess;
pos += 64;
state = State::FromL0;
}
}
State::CheckL1 => {
debug_assert!(pos % 64 == 0);
let l1_idx = pos / (64 * FACTOR_L1);
if l1_idx >= self.l1_min_excess.len() {
return None;
}
let min_e = self.l1_min_excess[l1_idx] as i32;
if excess + min_e <= 0 {
state = State::CheckL0;
} else if pos < self.len {
if self.is_close(pos) && excess <= 1 {
return Some(pos);
}
let l1_excess = self.l1_block_excess[l1_idx] as i32;
excess += l1_excess;
pos += 64 * FACTOR_L1;
state = State::FromL1;
} else {
return None;
}
}
State::CheckL2 => {
let l2_idx = pos / (64 * FACTOR_L1 * FACTOR_L2);
if l2_idx >= self.l2_min_excess.len() {
return None;
}
let min_e = self.l2_min_excess[l2_idx] as i32;
if excess + min_e <= 0 {
state = State::CheckL1;
} else if pos < self.len {
if self.is_close(pos) && excess <= 1 {
return Some(pos);
}
let l2_excess = self.l2_block_excess[l2_idx] as i32;
excess += l2_excess;
pos += 64 * FACTOR_L1 * FACTOR_L2;
state = State::FromL2;
} else {
return None;
}
}
State::FromL0 => {
if pos % 64 == 0 {
state = State::FromL1;
} else if pos < self.len {
state = State::ScanWord;
} else {
return None;
}
}
State::FromL1 => {
if pos % (64 * FACTOR_L1) == 0 {
if pos < self.len {
state = State::FromL2;
} else {
return None;
}
} else if pos < self.len {
state = State::CheckL0;
} else {
return None;
}
}
State::FromL2 => {
if pos % (64 * FACTOR_L1 * FACTOR_L2) == 0 {
if pos < self.len {
state = State::CheckL2;
} else {
return None;
}
} else if pos < self.len {
state = State::CheckL1;
} else {
return None;
}
}
}
}
}
pub fn find_open(&self, p: usize) -> Option<usize> {
if p >= self.len || self.is_open(p) {
return None;
}
find_open(self.words.as_ref(), self.len, p)
}
pub fn enclose(&self, p: usize) -> Option<usize> {
if p >= self.len || self.is_close(p) {
return None;
}
enclose(self.words.as_ref(), self.len, p)
}
pub fn parent(&self, p: usize) -> Option<usize> {
self.enclose(p)
}
pub fn next_sibling(&self, p: usize) -> Option<usize> {
if !self.is_open(p) {
return None;
}
let close = self.find_close(p)?;
if close + 1 < self.len && self.is_open(close + 1) {
Some(close + 1)
} else {
None
}
}
pub fn first_child(&self, p: usize) -> Option<usize> {
if !self.is_open(p) || p + 1 >= self.len {
return None;
}
if self.is_open(p + 1) {
Some(p + 1)
} else {
None
}
}
pub fn depth(&self, p: usize) -> Option<usize> {
if p >= self.len {
return None;
}
Some(self.excess(p) as usize)
}
pub fn subtree_size(&self, p: usize) -> Option<usize> {
if p >= self.len || self.is_close(p) {
return None;
}
let close = self.find_close(p)?;
Some((close - p) / 2)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_find_unmatched_close_simple() {
assert_eq!(find_unmatched_close_in_word(0), 0);
assert_eq!(find_unmatched_close_in_word(u64::MAX), 64);
let pattern = 0b01 | (u64::MAX << 2);
assert_eq!(find_unmatched_close_in_word(pattern), 64); }
#[test]
fn test_find_unmatched_close_nested() {
let pattern = 0b0011 | (u64::MAX << 4);
assert_eq!(find_unmatched_close_in_word(pattern), 64);
let pattern = 0b011 | (u64::MAX << 3);
assert_eq!(find_unmatched_close_in_word(pattern), 64);
}
#[test]
fn test_find_unmatched_close_with_extra() {
let pattern = 0b001 | (u64::MAX << 3);
assert_eq!(find_unmatched_close_in_word(pattern), 2);
let pattern = 0b00011 | (u64::MAX << 5);
assert_eq!(find_unmatched_close_in_word(pattern), 4);
}
#[test]
fn test_find_close_in_word_simple() {
assert_eq!(find_close_in_word(0b01, 0), Some(1));
}
#[test]
fn test_find_close_in_word_nested() {
assert_eq!(find_close_in_word(0b0011, 0), Some(3)); assert_eq!(find_close_in_word(0b0011, 1), Some(2)); }
#[test]
fn test_find_close_in_word_sequential() {
assert_eq!(find_close_in_word(0b0101, 0), Some(1));
assert_eq!(find_close_in_word(0b0101, 2), Some(3));
}
#[test]
fn test_find_close_in_word_complex() {
assert_eq!(find_close_in_word(0b001011, 0), Some(5)); assert_eq!(find_close_in_word(0b001011, 1), Some(2)); assert_eq!(find_close_in_word(0b001011, 3), Some(4)); }
#[test]
fn test_find_close_in_word_at_close() {
assert_eq!(find_close_in_word(0b01, 1), Some(1));
}
#[test]
fn test_find_close_in_word_beyond_word() {
assert_eq!(find_close_in_word(u64::MAX, 0), None);
assert_eq!(find_close_in_word(u64::MAX, 0), None);
assert_eq!(find_close_in_word(u64::MAX, 1), None);
}
#[test]
fn test_find_close_single_word() {
let words = vec![0b001011u64];
assert_eq!(find_close(&words, 6, 0), Some(5)); assert_eq!(find_close(&words, 6, 1), Some(2)); assert_eq!(find_close(&words, 6, 3), Some(4)); }
#[test]
fn test_find_close_multi_word() {
let words = vec![u64::MAX, 0u64];
let len = 128;
assert_eq!(find_close(&words, len, 0), Some(127));
assert_eq!(find_close(&words, len, 1), Some(126));
assert_eq!(find_close(&words, len, 63), Some(64));
}
#[test]
fn test_find_close_at_close_returns_none() {
let words = vec![0b001011u64];
assert_eq!(find_close(&words, 6, 2), None);
assert_eq!(find_close(&words, 6, 4), None);
assert_eq!(find_close(&words, 6, 5), None);
}
#[test]
fn test_find_open_single_word() {
let words = vec![0b001011u64];
assert_eq!(find_open(&words, 6, 5), Some(0)); assert_eq!(find_open(&words, 6, 2), Some(1)); assert_eq!(find_open(&words, 6, 4), Some(3)); }
#[test]
fn test_find_open_at_open_returns_none() {
let words = vec![0b001011u64];
assert_eq!(find_open(&words, 6, 0), None);
assert_eq!(find_open(&words, 6, 1), None);
assert_eq!(find_open(&words, 6, 3), None);
}
#[test]
fn test_enclose_simple() {
let words = vec![0b0011u64];
assert_eq!(enclose(&words, 4, 1), Some(0));
assert_eq!(enclose(&words, 4, 0), None);
}
#[test]
fn test_enclose_nested() {
let words = vec![0b000111u64];
assert_eq!(enclose(&words, 6, 2), Some(1)); assert_eq!(enclose(&words, 6, 1), Some(0)); assert_eq!(enclose(&words, 6, 0), None); }
#[test]
fn test_balanced_parens_new() {
let bp = BalancedParens::new(vec![0b001011u64], 6);
assert_eq!(bp.len(), 6);
assert!(!bp.is_empty());
}
#[test]
fn test_balanced_parens_empty() {
let bp = BalancedParens::new(vec![], 0);
assert_eq!(bp.len(), 0);
assert!(bp.is_empty());
}
#[test]
fn test_balanced_parens_is_open_close() {
let bp = BalancedParens::new(vec![0b001011u64], 6);
assert!(bp.is_open(0));
assert!(bp.is_open(1));
assert!(bp.is_close(2));
assert!(bp.is_open(3));
assert!(bp.is_close(4));
assert!(bp.is_close(5));
}
#[test]
fn test_balanced_parens_find_close() {
let bp = BalancedParens::new(vec![0b001011u64], 6);
assert_eq!(bp.find_close(0), Some(5));
assert_eq!(bp.find_close(1), Some(2));
assert_eq!(bp.find_close(3), Some(4));
assert_eq!(bp.find_close(2), None);
assert_eq!(bp.find_close(4), None);
assert_eq!(bp.find_close(5), None);
}
#[test]
fn test_balanced_parens_find_open() {
let bp = BalancedParens::new(vec![0b001011u64], 6);
assert_eq!(bp.find_open(5), Some(0));
assert_eq!(bp.find_open(2), Some(1));
assert_eq!(bp.find_open(4), Some(3));
assert_eq!(bp.find_open(0), None);
assert_eq!(bp.find_open(1), None);
assert_eq!(bp.find_open(3), None);
}
#[test]
fn test_balanced_parens_enclose() {
let bp = BalancedParens::new(vec![0b001011u64], 6);
assert_eq!(bp.enclose(1), Some(0)); assert_eq!(bp.enclose(3), Some(0)); assert_eq!(bp.enclose(0), None); }
#[test]
fn test_balanced_parens_navigation() {
let bp = BalancedParens::new(vec![0b001011u64], 6);
assert_eq!(bp.first_child(0), Some(1));
assert_eq!(bp.next_sibling(1), Some(3));
assert_eq!(bp.next_sibling(3), None);
assert_eq!(bp.parent(1), Some(0));
assert_eq!(bp.parent(3), Some(0));
assert_eq!(bp.parent(0), None);
}
#[test]
fn test_balanced_parens_multi_word() {
let words = vec![u64::MAX, 0u64];
let bp = BalancedParens::new(words, 128);
assert_eq!(bp.find_close(0), Some(127));
assert_eq!(bp.find_close(1), Some(126));
assert_eq!(bp.find_close(63), Some(64));
}
#[test]
fn test_balanced_parens_excess() {
let bp = BalancedParens::new(vec![0b0011u64], 4);
assert_eq!(bp.excess(0), 1); assert_eq!(bp.excess(1), 2); assert_eq!(bp.excess(2), 1); assert_eq!(bp.excess(3), 0); }
#[test]
fn test_word_min_excess() {
let (min, total) = word_min_excess(0b01, 2);
assert_eq!(total, 0);
assert_eq!(min, 0);
let (min, total) = word_min_excess(0b10, 2);
assert_eq!(min, -1);
assert_eq!(total, 0);
let (min, total) = word_min_excess(0b11, 2);
assert_eq!(min, 0); assert_eq!(total, 2);
let (min, total) = word_min_excess(0b00, 2);
assert_eq!(min, -2);
assert_eq!(total, -2);
}
#[test]
fn test_find_close_open_roundtrip() {
let bp = BalancedParens::new(vec![0b001011u64], 6);
for p in [0, 1, 3] {
let close = bp.find_close(p).unwrap();
let open = bp.find_open(close).unwrap();
assert_eq!(open, p, "roundtrip failed for position {}", p);
}
}
#[test]
fn test_balanced_parens_matches_linear_scan() {
let words = vec![0b001011u64];
let len = 6;
let bp = BalancedParens::new(words.clone(), len);
for p in 0..len {
if bp.is_open(p) {
let bp_result = bp.find_close(p);
let linear_result = find_close(&words, len, p);
assert_eq!(bp_result, linear_result, "mismatch at position {}", p);
}
}
}
#[test]
fn test_depth() {
let bp = BalancedParens::new(vec![91u64], 10);
assert_eq!(bp.depth(0), Some(1)); assert_eq!(bp.depth(1), Some(2)); assert_eq!(bp.depth(2), Some(1)); assert_eq!(bp.depth(3), Some(2)); assert_eq!(bp.depth(4), Some(3)); assert_eq!(bp.depth(5), Some(2)); assert_eq!(bp.depth(6), Some(3)); assert_eq!(bp.depth(7), Some(2)); assert_eq!(bp.depth(8), Some(1)); assert_eq!(bp.depth(9), Some(0)); }
#[test]
fn test_depth_simple() {
let bp = BalancedParens::new(vec![0b0011u64], 4);
assert_eq!(bp.depth(0), Some(1)); assert_eq!(bp.depth(1), Some(2)); assert_eq!(bp.depth(2), Some(1)); assert_eq!(bp.depth(3), Some(0)); }
#[test]
fn test_subtree_size() {
let bp = BalancedParens::new(vec![91u64], 10);
assert_eq!(bp.subtree_size(0), Some(4));
assert_eq!(bp.subtree_size(1), Some(0));
assert_eq!(bp.subtree_size(2), None);
assert_eq!(bp.subtree_size(3), Some(2));
assert_eq!(bp.subtree_size(4), Some(0));
assert_eq!(bp.subtree_size(5), None);
assert_eq!(bp.subtree_size(6), Some(0));
}
#[test]
fn test_subtree_size_simple() {
let bp = BalancedParens::new(vec![0b0011u64], 4);
assert_eq!(bp.subtree_size(0), Some(1));
assert_eq!(bp.subtree_size(1), Some(0));
assert_eq!(bp.subtree_size(2), None);
assert_eq!(bp.subtree_size(3), None);
}
#[test]
fn test_find_close_multi_word_regression() {
let words: Vec<u64> = vec![
6148914691236517205,
17768248446614328661,
7652237512791978695,
10679207262162398901,
11775793721244512726,
8946029940302164653,
4836752526886871298,
6851556326407803519,
4920914883505671372,
2108412058774050545,
11226564708885533661,
14782248937799897235,
3628355826111158270,
16652187963763892183,
15371706386568957536,
56015962,
];
let len = 992;
let p = 153;
let bp = BalancedParens::new(words.clone(), len);
let linear_result = find_close(&words, len, p);
let bp_result = bp.find_close(p);
assert!(bp.is_open(p), "Position {} should be open", p);
assert_eq!(
bp_result, linear_result,
"find_close({}) mismatch: accelerated={:?}, linear={:?}",
p, bp_result, linear_result
);
}
#[test]
fn test_single_pair() {
let bp = BalancedParens::new(vec![0b01], 2);
assert_eq!(bp.find_close(0), Some(1));
assert_eq!(bp.find_open(1), Some(0));
assert_eq!(bp.depth(0), Some(1));
assert_eq!(bp.depth(1), Some(0));
assert_eq!(bp.subtree_size(0), Some(0));
}
#[test]
fn test_find_close_at_word_boundary() {
let words = vec![u64::MAX, 0u64]; let len = 128;
let bp = BalancedParens::new(words.clone(), len);
assert_eq!(bp.find_close(63), Some(64));
assert_eq!(find_close(&words, len, 63), Some(64));
assert_eq!(bp.find_close(0), Some(127));
}
#[test]
fn test_find_close_spanning_multiple_words() {
let words = vec![u64::MAX, u64::MAX, 0u64, 0u64];
let len = 256;
let bp = BalancedParens::new(words.clone(), len);
assert_eq!(bp.find_close(0), Some(255));
assert_eq!(find_close(&words, len, 0), Some(255));
assert_eq!(bp.find_close(127), Some(128));
assert_eq!(find_close(&words, len, 127), Some(128));
}
#[test]
fn test_find_close_l1_boundary() {
let num_opens = 32; let mut words = vec![u64::MAX; num_opens]; words.extend(std::iter::repeat_n(0u64, num_opens)); let len = 64 * 64; let bp = BalancedParens::new(words.clone(), len);
assert_eq!(bp.find_close(0), Some(4095));
assert_eq!(find_close(&words, len, 0), Some(4095));
assert_eq!(bp.find_close(2047), Some(2048));
assert_eq!(find_close(&words, len, 2047), Some(2048));
for p in [2040, 2044, 2046, 2047] {
let bp_result = bp.find_close(p);
let linear_result = find_close(&words, len, p);
assert_eq!(
bp_result, linear_result,
"L1 boundary: find_close({}) mismatch",
p
);
}
}
#[test]
fn test_find_close_across_l1_boundary() {
let mut words = vec![u64::MAX; 64]; let len = 64 * 64;
words[32] = 0;
let bp = BalancedParens::new(words.clone(), len);
assert_eq!(bp.find_close(2047), Some(2048));
assert_eq!(find_close(&words, len, 2047), Some(2048));
}
#[test]
fn test_partial_last_word() {
let bp = BalancedParens::new(vec![0b0011u64], 4);
assert_eq!(bp.find_close(0), Some(3));
assert_eq!(bp.find_close(1), Some(2));
let word0 = (1u64 << 50) - 1; let word1 = 0u64; let words = vec![word0, word1];
let len = 100;
let bp = BalancedParens::new(words.clone(), len);
assert_eq!(bp.find_close(0), Some(99));
assert_eq!(find_close(&words, len, 0), Some(99));
assert_eq!(bp.find_close(49), Some(50));
assert_eq!(find_close(&words, len, 49), Some(50));
}
#[test]
fn test_deeply_nested() {
let word = (1u64 << 32) - 1; let bp = BalancedParens::new(vec![word], 64);
assert_eq!(bp.find_close(0), Some(63));
assert_eq!(bp.find_close(31), Some(32));
assert_eq!(bp.depth(0), Some(1));
assert_eq!(bp.depth(31), Some(32));
assert_eq!(bp.depth(32), Some(31)); }
#[test]
fn test_alternating_pairs() {
let word = 0x5555555555555555u64;
let bp = BalancedParens::new(vec![word], 64);
for i in 0..32 {
let open = i * 2;
let close = i * 2 + 1;
assert_eq!(bp.find_close(open), Some(close));
assert_eq!(bp.find_open(close), Some(open));
}
}
#[test]
fn test_out_of_bounds() {
let bp = BalancedParens::new(vec![0b01u64], 2);
assert_eq!(bp.find_close(2), None);
assert_eq!(bp.find_close(100), None);
assert_eq!(bp.find_open(2), None);
assert_eq!(bp.find_open(100), None);
assert_eq!(bp.depth(2), None);
assert_eq!(bp.depth(100), None);
assert_eq!(bp.subtree_size(2), None);
}
#[test]
fn test_empty() {
let bp = BalancedParens::new(vec![], 0);
assert!(bp.is_empty());
assert_eq!(bp.len(), 0);
assert_eq!(bp.find_close(0), None);
assert_eq!(bp.find_open(0), None);
}
#[test]
fn test_unbalanced_sequence() {
let words = vec![0b11u64];
let bp = BalancedParens::new(words.clone(), 2);
assert_eq!(bp.find_close(0), None);
assert_eq!(bp.find_close(1), None);
assert_eq!(find_close(&words, 2, 0), None);
}
#[test]
fn test_all_closes() {
let words = vec![0u64];
let bp = BalancedParens::new(words.clone(), 2);
assert_eq!(bp.find_close(0), None);
assert_eq!(bp.find_close(1), None);
}
#[test]
fn test_find_close_near_end() {
let bp = BalancedParens::new(vec![0b0011u64], 4);
assert_eq!(bp.find_close(0), Some(3));
let words = vec![u64::MAX, 0u64];
let len = 128;
let bp = BalancedParens::new(words.clone(), len);
assert_eq!(bp.find_close(0), Some(127));
}
#[test]
fn test_index_structure_correctness() {
let words = vec![0b001011u64]; let bp = BalancedParens::new(words, 6);
assert_eq!(bp.l0_min_excess[0], 0);
assert_eq!(bp.l0_word_excess[0], 0);
}
#[test]
fn test_word_min_excess_full_word() {
let (min, total) = word_min_excess(u64::MAX, 64);
assert_eq!(min, 0);
assert_eq!(total, 64);
let (min, total) = word_min_excess(0, 64);
assert_eq!(min, -64);
assert_eq!(total, -64);
let (min, total) = word_min_excess(0x5555555555555555, 64);
assert_eq!(min, 0);
assert_eq!(total, 0);
let (min, total) = word_min_excess(0xAAAAAAAAAAAAAAAAu64, 64);
assert_eq!(min, -1);
assert_eq!(total, 0);
}
#[test]
fn test_l1_index_construction() {
let words = vec![0x5555555555555555u64; 32]; let len = 32 * 64;
let bp = BalancedParens::new(words, len);
assert_eq!(bp.l1_min_excess.len(), 1);
assert_eq!(bp.l1_block_excess.len(), 1);
assert_eq!(bp.l1_block_excess[0], 0);
}
#[test]
fn test_multiple_l1_blocks() {
let mut words = vec![u64::MAX; 32];
words.extend(std::iter::repeat_n(0u64, 32));
let len = 64 * 64;
let bp = BalancedParens::new(words.clone(), len);
assert_eq!(bp.l1_min_excess.len(), 2);
assert_eq!(bp.l1_min_excess[0], 0);
assert_eq!(bp.l1_block_excess[0], 2048);
assert_eq!(bp.l1_min_excess[1], -2048);
assert_eq!(bp.l1_block_excess[1], -2048);
assert_eq!(bp.find_close(2047), Some(2048));
assert_eq!(find_close(&words, len, 2047), Some(2048));
}
#[test]
fn test_find_close_immediate_match() {
let word = 0x5555555555555555u64;
let bp = BalancedParens::new(vec![word], 64);
for i in 0..32 {
assert_eq!(bp.find_close(i * 2), Some(i * 2 + 1));
}
}
#[test]
fn test_sibling_chain() {
let word = 0x5555555555555555u64;
let bp = BalancedParens::new(vec![word], 64);
for i in 0..31 {
let pos = i * 2;
assert_eq!(bp.next_sibling(pos), Some(pos + 2));
}
assert_eq!(bp.next_sibling(62), None);
}
#[test]
fn test_no_first_child_for_leaf() {
let bp = BalancedParens::new(vec![0b01u64], 2);
assert_eq!(bp.first_child(0), None);
}
#[test]
fn test_first_child_exists() {
let bp = BalancedParens::new(vec![0b0011u64], 4);
assert_eq!(bp.first_child(0), Some(1));
assert_eq!(bp.first_child(1), None); }
#[test]
fn test_v2_rank1_simple() {
let bp = BalancedParens::new(vec![0b001011u64], 6);
assert_eq!(bp.rank1(0), 0); assert_eq!(bp.rank1(1), 1); assert_eq!(bp.rank1(2), 2); assert_eq!(bp.rank1(3), 2); assert_eq!(bp.rank1(4), 3); assert_eq!(bp.rank1(5), 3); assert_eq!(bp.rank1(6), 3); }
#[test]
fn test_v2_rank1_matches_v1() {
let patterns: &[(Vec<u64>, usize)] = &[
(vec![0b001011u64], 6),
(vec![0b0011u64], 4),
(vec![0x5555555555555555u64], 64), (vec![u64::MAX, 0], 128), ];
for (words, len) in patterns {
let v1 = BalancedParens::new(words.clone(), *len);
let v2 = BalancedParens::new(words.clone(), *len);
for p in 0..*len {
assert_eq!(
v1.rank1(p),
v2.rank1(p),
"rank1({}) mismatch for pattern {:?}",
p,
words
);
}
}
}
#[test]
fn test_v2_find_close_matches_v1() {
let patterns: &[(Vec<u64>, usize)] = &[
(vec![0b001011u64], 6),
(vec![0b0011u64], 4),
(vec![0x5555555555555555u64], 64),
];
for (words, len) in patterns {
let v1 = BalancedParens::new(words.clone(), *len);
let v2 = BalancedParens::new(words.clone(), *len);
for p in 0..*len {
if v1.is_open(p) {
assert_eq!(
v1.find_close(p),
v2.find_close(p),
"find_close({}) mismatch for pattern {:?}",
p,
words
);
}
}
}
}
#[test]
fn test_v2_excess() {
let bp = BalancedParens::new(vec![0b001011u64], 6);
assert_eq!(bp.excess(0), 1); assert_eq!(bp.excess(1), 2); assert_eq!(bp.excess(2), 1); assert_eq!(bp.excess(3), 2); assert_eq!(bp.excess(4), 1); assert_eq!(bp.excess(5), 0); }
#[test]
fn test_v2_large_bitvector() {
let mut words = Vec::with_capacity(64);
words.extend(std::iter::repeat_n(u64::MAX, 32));
words.extend(std::iter::repeat_n(0u64, 32));
let len = 64 * 64; let v1 = BalancedParens::new(words.clone(), len);
let v2 = BalancedParens::new(words, len);
let test_positions = [0, 1, 63, 64, 512, 1024, 2048, 2049, 4095, 4096];
for &p in &test_positions {
assert_eq!(
v1.rank1(p),
v2.rank1(p),
"rank1({}) mismatch on large bitvector",
p
);
}
assert_eq!(v1.find_close(0), v2.find_close(0));
assert_eq!(v1.find_close(2047), v2.find_close(2047));
}
#[test]
fn test_enclose_word_boundary_with_zero_excess() {
let mut words = vec![0u64; 4];
words[0] = 0x0000_0000_0000_0001;
words[1] = 0xAAAA_AAAA_AAAA_AAAA;
words[2] = 0xAAAA_AAAA_AAAA_AAAA;
words[3] = 0x0000_0000_0000_0005;
let len = 256; let parent = enclose(&words, len, 194);
assert_eq!(
parent,
Some(191), "enclose(194) should find parent at position 191 (word 2 bit 63). \
With the buggy code that checked word_excess instead of max_excess, \
word 2 would be skipped because word_excess=0, returning wrong parent."
);
let word2_popcount = words[2].count_ones();
assert_eq!(word2_popcount, 32, "Word 2 should have exactly 32 ones");
let word2_excess = 2 * word2_popcount as i32 - 64;
assert_eq!(
word2_excess, 0,
"Word 2 excess should be 0 (this is what triggered the bug)"
);
}
#[test]
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
fn test_sse41_l1_matches_scalar_simple() {
if !is_x86_feature_detected!("sse4.1") {
println!("SSE4.1 not available, skipping test");
return;
}
let words: Vec<u64> = (0..32).map(|_| 0x5555555555555555u64).collect();
let (l0_min, l0_excess) = build_l0_index(&words, 32 * 64, 32);
let num_l1 = 1;
let (scalar_min, scalar_excess) = build_l1_index_scalar(&l0_min, &l0_excess, num_l1);
let (sse41_min, sse41_excess) = build_l1_index_sse41(&l0_min, &l0_excess, num_l1);
assert_eq!(
scalar_min, sse41_min,
"L1 min_excess mismatch: scalar={:?}, sse41={:?}",
scalar_min, sse41_min
);
assert_eq!(
scalar_excess, sse41_excess,
"L1 block_excess mismatch: scalar={:?}, sse41={:?}",
scalar_excess, sse41_excess
);
}
#[test]
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
fn test_sse41_l1_matches_scalar_multiple_blocks() {
if !is_x86_feature_detected!("sse4.1") {
println!("SSE4.1 not available, skipping test");
return;
}
let words: Vec<u64> = (0..100)
.map(|i| if i % 2 == 0 { u64::MAX } else { 0 })
.collect();
let (l0_min, l0_excess) = build_l0_index(&words, 100 * 64, 100);
let num_l1 = 100usize.div_ceil(FACTOR_L1);
let (scalar_min, scalar_excess) = build_l1_index_scalar(&l0_min, &l0_excess, num_l1);
let (sse41_min, sse41_excess) = build_l1_index_sse41(&l0_min, &l0_excess, num_l1);
assert_eq!(
scalar_min, sse41_min,
"L1 min_excess mismatch for {} blocks",
num_l1
);
assert_eq!(
scalar_excess, sse41_excess,
"L1 block_excess mismatch for {} blocks",
num_l1
);
}
#[test]
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
fn test_sse41_l1_matches_scalar_negative_excess() {
if !is_x86_feature_detected!("sse4.1") {
println!("SSE4.1 not available, skipping test");
return;
}
let words: Vec<u64> = vec![0u64; 64];
let (l0_min, l0_excess) = build_l0_index(&words, 64 * 64, 64);
let num_l1 = 64usize.div_ceil(FACTOR_L1);
let (scalar_min, scalar_excess) = build_l1_index_scalar(&l0_min, &l0_excess, num_l1);
let (sse41_min, sse41_excess) = build_l1_index_sse41(&l0_min, &l0_excess, num_l1);
assert_eq!(scalar_min, sse41_min);
assert_eq!(scalar_excess, sse41_excess);
assert_eq!(sse41_excess[0], -2048);
assert_eq!(sse41_excess[1], -2048);
}
#[test]
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
fn test_sse41_l1_matches_scalar_mixed_excess() {
if !is_x86_feature_detected!("sse4.1") {
println!("SSE4.1 not available, skipping test");
return;
}
let mut words = Vec::new();
words.extend(std::iter::repeat_n(u64::MAX, 16));
words.extend(std::iter::repeat_n(0u64, 16));
words.extend(std::iter::repeat_n(u64::MAX, 16));
words.extend(std::iter::repeat_n(0u64, 16));
let (l0_min, l0_excess) = build_l0_index(&words, 64 * 64, 64);
let num_l1 = 64usize.div_ceil(FACTOR_L1);
let (scalar_min, scalar_excess) = build_l1_index_scalar(&l0_min, &l0_excess, num_l1);
let (sse41_min, sse41_excess) = build_l1_index_sse41(&l0_min, &l0_excess, num_l1);
assert_eq!(scalar_min, sse41_min);
assert_eq!(scalar_excess, sse41_excess);
}
#[test]
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
fn test_sse41_l2_matches_scalar() {
if !is_x86_feature_detected!("sse4.1") {
println!("SSE4.1 not available, skipping test");
return;
}
let words: Vec<u64> = (0..1024)
.map(|i| {
if i % 3 == 0 {
u64::MAX
} else {
0x5555555555555555
}
})
.collect();
let (l0_min, l0_excess) = build_l0_index(&words, 1024 * 64, 1024);
let num_l1 = 1024usize.div_ceil(FACTOR_L1);
let num_l2 = num_l1.div_ceil(FACTOR_L2);
let (l1_min, l1_excess) = build_l1_index_scalar(&l0_min, &l0_excess, num_l1);
let (scalar_min, scalar_excess) = build_l2_index_scalar(&l1_min, &l1_excess, num_l2);
let (sse41_min, sse41_excess) = build_l2_index_sse41(&l1_min, &l1_excess, num_l2);
assert_eq!(
scalar_min, sse41_min,
"L2 min_excess mismatch: scalar={:?}, sse41={:?}",
scalar_min, sse41_min
);
assert_eq!(
scalar_excess, sse41_excess,
"L2 block_excess mismatch: scalar={:?}, sse41={:?}",
scalar_excess, sse41_excess
);
}
#[test]
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
fn test_sse41_full_construction_correctness() {
if !is_x86_feature_detected!("sse4.1") {
println!("SSE4.1 not available, skipping test");
return;
}
let mut words = Vec::new();
words.extend(std::iter::repeat_n(u64::MAX, 64));
words.extend(std::iter::repeat_n(0u64, 64));
let len = 128 * 64;
let bp = BalancedParens::new(words.clone(), len);
for i in 0..64 * 64 {
if let Some(close) = bp.find_close(i) {
let expected = len - 1 - i;
assert_eq!(
close, expected,
"find_close({}) = {}, expected {}",
i, close, expected
);
}
}
}
#[test]
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
fn test_sse41_l1_single_simd_iteration() {
if !is_x86_feature_detected!("sse4.1") {
println!("SSE4.1 not available, skipping test");
return;
}
let words: Vec<u64> = vec![
u64::MAX, 0, 0x5555555555555555, 0xAAAAAAAAAAAAAAAA, u64::MAX, u64::MAX, 0, 0, ];
let (l0_min, l0_excess) = build_l0_index(&words, 8 * 64, 8);
let num_l1 = 1;
let (scalar_min, scalar_excess) = build_l1_index_scalar(&l0_min, &l0_excess, num_l1);
let (sse41_min, sse41_excess) = build_l1_index_sse41(&l0_min, &l0_excess, num_l1);
assert_eq!(scalar_min, sse41_min);
assert_eq!(scalar_excess, sse41_excess);
}
#[test]
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
fn test_sse41_l1_with_scalar_tail() {
if !is_x86_feature_detected!("sse4.1") {
println!("SSE4.1 not available, skipping test");
return;
}
let words: Vec<u64> = (0..37)
.map(|i| if i % 2 == 0 { u64::MAX } else { 0 })
.collect();
let (l0_min, l0_excess) = build_l0_index(&words, 37 * 64, 37);
let num_l1 = 37usize.div_ceil(FACTOR_L1);
let (scalar_min, scalar_excess) = build_l1_index_scalar(&l0_min, &l0_excess, num_l1);
let (sse41_min, sse41_excess) = build_l1_index_sse41(&l0_min, &l0_excess, num_l1);
assert_eq!(scalar_min, sse41_min);
assert_eq!(scalar_excess, sse41_excess);
}
#[test]
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
fn test_sse41_l1_extreme_excess_values() {
if !is_x86_feature_detected!("sse4.1") {
println!("SSE4.1 not available, skipping test");
return;
}
let words: Vec<u64> = vec![0u64; 512];
let (l0_min, l0_excess) = build_l0_index(&words, 512 * 64, 512);
let num_l1 = 512usize.div_ceil(FACTOR_L1);
let (scalar_min, scalar_excess) = build_l1_index_scalar(&l0_min, &l0_excess, num_l1);
let (sse41_min, sse41_excess) = build_l1_index_sse41(&l0_min, &l0_excess, num_l1);
assert_eq!(scalar_min, sse41_min);
assert_eq!(scalar_excess, sse41_excess);
}
#[test]
#[cfg(all(feature = "simd", target_arch = "x86_64"))]
fn test_sse41_l1_random_patterns() {
if !is_x86_feature_detected!("sse4.1") {
println!("SSE4.1 not available, skipping test");
return;
}
let patterns: Vec<Vec<u64>> = vec![
vec![u64::MAX; 100],
vec![0u64; 100],
(0..100)
.map(|i| if i % 2 == 0 { u64::MAX } else { 0 })
.collect(),
(0..100)
.map(|i| {
let shift = i % 64;
(1u64 << shift).wrapping_sub(1)
})
.collect(),
{
let mut state = 0xDEADBEEFu64;
(0..100)
.map(|_| {
state ^= state << 13;
state ^= state >> 7;
state ^= state << 17;
state
})
.collect()
},
];
for (pattern_idx, words) in patterns.iter().enumerate() {
let (l0_min, l0_excess) = build_l0_index(words, words.len() * 64, words.len());
let num_l1 = words.len().div_ceil(FACTOR_L1);
let (scalar_min, scalar_excess) = build_l1_index_scalar(&l0_min, &l0_excess, num_l1);
let (sse41_min, sse41_excess) = build_l1_index_sse41(&l0_min, &l0_excess, num_l1);
assert_eq!(
scalar_min, sse41_min,
"L1 min_excess mismatch for pattern {}",
pattern_idx
);
assert_eq!(
scalar_excess, sse41_excess,
"L1 block_excess mismatch for pattern {}",
pattern_idx
);
}
}
}