use std::ops::{Deref, Index};
use crate::ambassador_impl_Index;
use crate::traits::ambassador_impl_Backend;
use crate::traits::bit_vec_ops::ambassador_impl_BitLength;
use crate::traits::rank_sel::ambassador_impl_BitCount;
use crate::traits::rank_sel::ambassador_impl_NumBits;
use crate::traits::rank_sel::ambassador_impl_Rank;
use crate::traits::rank_sel::ambassador_impl_RankHinted;
use crate::traits::rank_sel::ambassador_impl_RankUnchecked;
use crate::traits::rank_sel::ambassador_impl_RankZero;
use crate::traits::rank_sel::ambassador_impl_Select;
use crate::traits::rank_sel::ambassador_impl_SelectHinted;
use crate::traits::rank_sel::ambassador_impl_SelectUnchecked;
use crate::traits::rank_sel::ambassador_impl_SelectZero;
use crate::traits::rank_sel::ambassador_impl_SelectZeroHinted;
use crate::traits::rank_sel::ambassador_impl_SelectZeroUnchecked;
use crate::bits::{BitFieldVec, BitVec};
use crate::dict::{EfDict, EliasFanoBuilder};
use crate::list::comp_int_list::CompIntList;
use crate::list::prefix_sum_int_list::PrefixSumIntList;
use crate::prelude::{
BitCount, NumBits, Rank, RankHinted, RankUnchecked, RankZero, Select, SelectHinted,
SelectUnchecked, SelectZero, SelectZeroHinted, SelectZeroUnchecked,
};
use crate::traits::bal_paren::BalParen;
use crate::traits::indexed_dict::PredUnchecked;
use ambassador::Delegate;
use mem_dbg::*;
use value_traits::slices::{SliceByValue, SliceByValueMut};
const WORD_BITS: usize = usize::BITS as usize;
static BYTE_MIN_EXCESS: [i8; 256] = {
let mut table = [0i8; 256];
let mut b = 0usize;
while b < 256 {
let mut excess = 0i8;
let mut min_e = 0i8;
let mut i = 0;
while i < 8 {
if b & (1 << i) != 0 {
excess += 1;
} else {
excess -= 1;
}
if excess < min_e {
min_e = excess;
}
i += 1;
}
table[b] = min_e;
b += 1;
}
table
};
static BYTE_FIND_CLOSE: [[u8; 8]; 256] = {
let mut table = [[8u8; 8]; 256];
let mut b = 0usize;
while b < 256 {
let mut excess = 0i8;
let mut i = 0u8;
while i < 8 {
if b & (1 << i) != 0 {
excess += 1;
} else {
excess -= 1;
}
if excess < 0 {
let target = (-excess - 1) as usize;
if target < 8 && table[b][target] == 8 {
table[b][target] = i;
}
}
i += 1;
}
b += 1;
}
table
};
fn count_far_open(word: usize, l: usize) -> usize {
let mut c = 0usize;
let mut e = 0i32;
for i in (0..l).rev() {
if word & (1usize << i) != 0 {
e += 1;
if e > 0 {
c += 1;
}
} else if e > 0 {
e = -1;
} else {
e -= 1;
}
}
c
}
fn count_far_close(word: usize, l: usize) -> usize {
let mut c = 0usize;
let mut e = 0i32;
for i in 0..l {
if word & (1usize << i) != 0 {
if e > 0 {
e = -1;
} else {
e -= 1;
}
} else {
e += 1;
if e > 0 {
c += 1;
}
}
}
c
}
#[inline]
pub fn find_near_close(word: usize) -> usize {
let scan_word = word >> 1;
let bytes = scan_word.to_le_bytes();
let mut excess: i8 = 1; for (i, &b) in bytes.iter().enumerate() {
let min_e = BYTE_MIN_EXCESS[b as usize];
if excess + min_e <= 0 {
let target = (excess - 1) as usize;
debug_assert!(target < 8);
let bit_in_byte = BYTE_FIND_CLOSE[b as usize][target] as usize;
return i * 8 + bit_in_byte + 1; }
excess += 2 * b.count_ones() as i8 - 8;
}
WORD_BITS
}
#[inline]
pub fn find_far_close(word: usize, k: i64) -> usize {
let bytes = word.to_le_bytes();
let target = -(k + 1) as i32; let mut excess: i32 = 0;
for (i, &b) in bytes.iter().enumerate() {
let min_e = BYTE_MIN_EXCESS[b as usize] as i32;
if excess + min_e <= target {
let t = (k as i32 + excess) as usize;
debug_assert!(t < 8);
return i * 8 + BYTE_FIND_CLOSE[b as usize][t] as usize;
}
excess += 2 * b.count_ones() as i32 - 8;
}
panic!("find_far_close: k={k} not found in word {word:#x}")
}
#[derive(Debug, Clone, MemSize, MemDbg, Delegate)]
#[cfg_attr(feature = "epserde", derive(epserde::Epserde))]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[delegate(Index<usize>, target = "paren")]
#[delegate(crate::traits::Backend, target = "paren")]
#[delegate(crate::traits::bit_vec_ops::BitLength, target = "paren")]
#[delegate(crate::traits::rank_sel::BitCount, target = "paren")]
#[delegate(crate::traits::rank_sel::NumBits, target = "paren")]
#[delegate(crate::traits::rank_sel::RankHinted, target = "paren")]
#[delegate(crate::traits::rank_sel::RankUnchecked, target = "paren")]
#[delegate(crate::traits::rank_sel::Rank, target = "paren")]
#[delegate(crate::traits::rank_sel::RankZero, target = "paren")]
#[delegate(crate::traits::rank_sel::SelectHinted, target = "paren")]
#[delegate(crate::traits::rank_sel::SelectUnchecked, target = "paren")]
#[delegate(crate::traits::rank_sel::Select, target = "paren")]
#[delegate(crate::traits::rank_sel::SelectZeroHinted, target = "paren")]
#[delegate(crate::traits::rank_sel::SelectZeroUnchecked, target = "paren")]
#[delegate(crate::traits::rank_sel::SelectZero, target = "paren")]
pub struct JacobsonBalParen<B = BitVec<Box<[usize]>>, P = EfDict<usize>, O = CompIntList> {
paren: B,
pioneer_positions: P,
pioneer_match_offsets: O,
}
fn build_pioneers(words: impl AsRef<[usize]> + BitLength) -> (EfDict<usize>, Vec<usize>) {
let len = words.len();
let words = words.as_ref();
let num_words = len.div_ceil(WORD_BITS);
debug_assert!(words.len() >= num_words);
let mut count = vec![0i32; num_words];
let mut residual = vec![0usize; num_words];
let mut opening_pioneers = Vec::new();
let mut opening_pioneer_matches = Vec::new();
for block in (0..num_words).rev() {
let l = std::cmp::min(WORD_BITS, len - block * WORD_BITS);
if block != num_words - 1 {
let mut excess = 0i32;
let mut count_far_opening = count_far_open(words[block], l) as i32;
for j in (0..l).rev() {
if words[block] & (1usize << j) == 0 {
if excess > 0 {
excess = -1;
} else {
excess -= 1;
}
} else {
excess += 1;
if excess > 0 {
let mut matching_block = block;
loop {
matching_block += 1;
if count[matching_block] != 0 {
break;
}
}
count_far_opening -= 1;
if {
count[matching_block] -= 1;
count[matching_block]
} == 0
|| count_far_opening == 0
{
let pos = block * WORD_BITS + j;
let match_pos = matching_block * WORD_BITS
+ find_far_close(
words[matching_block],
residual[matching_block] as i64,
);
opening_pioneers.push(pos);
opening_pioneer_matches.push(match_pos - pos);
}
residual[matching_block] += 1;
}
}
}
}
count[block] = count_far_close(words[block], l) as i32;
}
for &c in &count {
assert!(c == 0, "Unbalanced parentheses");
}
opening_pioneers.reverse();
opening_pioneer_matches.reverse();
let num_pioneers = opening_pioneers.len();
let upper_bound = if num_pioneers > 0 { len - 1 } else { 0 };
let mut builder = EliasFanoBuilder::new(num_pioneers, upper_bound);
for &p in &opening_pioneers {
builder.push(p);
}
let ef_positions = builder.build_with_dict();
(ef_positions, opening_pioneer_matches)
}
impl<B: AsRef<[usize]> + BitLength> JacobsonBalParen<B> {
#[must_use]
pub fn new(paren: B) -> Self {
let (ef_positions, matches) = build_pioneers(&paren);
let min_offset = matches.iter().copied().min().unwrap_or(0);
let offsets = CompIntList::new(min_offset, &matches);
JacobsonBalParen {
paren,
pioneer_positions: ef_positions,
pioneer_match_offsets: offsets,
}
}
}
impl<B: AsRef<[usize]> + BitLength> JacobsonBalParen<B, EfDict<usize>, BitFieldVec<Box<[usize]>>> {
#[must_use]
pub fn new_with_bit_field_vec(paren: B) -> Self {
let (ef_positions, opening_pioneer_matches) = build_pioneers(&paren);
let max_offset = opening_pioneer_matches.iter().copied().max().unwrap_or(0);
let bit_width = if max_offset == 0 {
1
} else {
usize::BITS as usize - max_offset.leading_zeros() as usize
};
let num_pioneers = opening_pioneer_matches.len();
let mut offsets = BitFieldVec::new(bit_width, num_pioneers);
for (i, &off) in opening_pioneer_matches.iter().enumerate() {
unsafe { offsets.set_value_unchecked(i, off) };
}
JacobsonBalParen {
paren,
pioneer_positions: ef_positions,
pioneer_match_offsets: offsets.into(),
}
}
}
impl<B: AsRef<[usize]> + BitLength> JacobsonBalParen<B, EfDict<usize>, PrefixSumIntList> {
#[must_use]
pub fn new_with_prefix_sum(paren: B) -> Self {
let (ef_positions, matches) = build_pioneers(&paren);
let offsets = PrefixSumIntList::new(&matches);
JacobsonBalParen {
paren,
pioneer_positions: ef_positions,
pioneer_match_offsets: offsets,
}
}
}
impl<B, P, O> JacobsonBalParen<B, P, O> {
pub unsafe fn map_pioneer_positions<P2>(
self,
func: impl FnOnce(P) -> P2,
) -> JacobsonBalParen<B, P2, O> {
JacobsonBalParen {
paren: self.paren,
pioneer_positions: func(self.pioneer_positions),
pioneer_match_offsets: self.pioneer_match_offsets,
}
}
pub unsafe fn map_pioneer_match_offsets<O2>(
self,
func: impl FnOnce(O) -> O2,
) -> JacobsonBalParen<B, P, O2> {
JacobsonBalParen {
paren: self.paren,
pioneer_positions: self.pioneer_positions,
pioneer_match_offsets: func(self.pioneer_match_offsets),
}
}
}
impl<B, P, O> Deref for JacobsonBalParen<B, P, O> {
type Target = B;
#[inline(always)]
fn deref(&self) -> &Self::Target {
&self.paren
}
}
impl<
B: AsRef<[usize]> + BitLength,
P: for<'a> PredUnchecked<Input = usize, Output<'a> = usize>,
O: SliceByValue<Value = usize>,
> BalParen for JacobsonBalParen<B, P, O>
{
fn find_close(&self, pos: usize) -> Option<usize> {
assert!(
pos < self.paren.len(),
"find_close: pos {} out of bounds for length {}",
pos,
self.paren.len()
);
let word_idx = pos / WORD_BITS;
let bit_idx = pos % WORD_BITS;
let words = self.paren.as_ref();
if words[word_idx] & (1usize << bit_idx) == 0 {
return None;
}
let result = find_near_close(words[word_idx] >> bit_idx);
if result < WORD_BITS - bit_idx {
return Some(word_idx * WORD_BITS + bit_idx + result);
}
let (pioneer_index, pioneer) =
unsafe { self.pioneer_positions.pred_unchecked::<false>(pos) };
let match_pos = pioneer
+ unsafe {
self.pioneer_match_offsets
.get_value_unchecked(pioneer_index)
};
if pos == pioneer {
return Some(match_pos);
}
debug_assert_eq!(word_idx, pioneer / WORD_BITS);
let pioneer_bit = pioneer % WORD_BITS;
let dist = pos - pioneer;
debug_assert!(dist < WORD_BITS);
let mask_bits = (words[word_idx] >> pioneer_bit) & ((1usize << dist) - 1);
let e = 2 * mask_bits.count_ones() as i64 - dist as i64;
let match_word = match_pos / WORD_BITS;
let match_bit = match_pos % WORD_BITS;
let match_word_bits = words[match_word];
let mask_before_match = if match_bit == 0 {
0
} else {
(1usize << match_bit) - 1
};
let num_far_close =
match_bit as i64 - ((match_word_bits & mask_before_match).count_ones() as i64) * 2;
Some(match_word * WORD_BITS + find_far_close(match_word_bits, num_far_close - e))
}
}
use crate::traits::{Backend, BitLength, TryIntoUnaligned};
impl<B, P: TryIntoUnaligned, O: TryIntoUnaligned> TryIntoUnaligned for JacobsonBalParen<B, P, O> {
type Unaligned = JacobsonBalParen<B, P::Unaligned, O::Unaligned>;
fn try_into_unaligned(
self,
) -> Result<Self::Unaligned, crate::traits::UnalignedConversionError> {
Ok(JacobsonBalParen {
paren: self.paren,
pioneer_positions: self.pioneer_positions.try_into_unaligned()?,
pioneer_match_offsets: self.pioneer_match_offsets.try_into_unaligned()?,
})
}
}
impl<W, B: AsRef<[W]>, P, O> AsRef<[W]> for JacobsonBalParen<B, P, O> {
fn as_ref(&self) -> &[W] {
self.paren.as_ref()
}
}
#[cfg(test)]
mod tests {
use crate::traits::BitVecOpsMut;
use super::*;
fn find_near_close_naive(word: usize) -> usize {
let mut c = 1i32;
for i in 1..WORD_BITS {
if word & (1usize << i) != 0 {
c += 1;
} else {
c -= 1;
}
if c == 0 {
return i;
}
}
WORD_BITS
}
fn find_far_close_reference(word: usize, k: i64) -> usize {
let mut e = 0i32;
let mut remaining = k;
for i in 0..WORD_BITS {
if word & (1usize << i) != 0 {
if e > 0 {
e = -1;
} else {
e -= 1;
}
} else {
e += 1;
if e > 0 {
if remaining == 0 {
return i;
}
remaining -= 1;
}
}
}
panic!("Not enough far close parens")
}
fn find_close_naive(words: &[usize], pos: usize, len: usize) -> usize {
assert!(pos < len);
assert!(
words[pos / WORD_BITS] & (1usize << (pos % WORD_BITS)) != 0,
"Not an open paren"
);
let mut c = 1i32;
for i in (pos + 1)..len {
let w = words[i / WORD_BITS];
if w & (1usize << (i % WORD_BITS)) != 0 {
c += 1;
} else {
c -= 1;
}
if c == 0 {
return i;
}
}
panic!("Unmatched open paren at position {pos}")
}
#[test]
fn test_find_near_close_simple() {
assert_eq!(find_near_close(0b01), 1);
assert_eq!(find_near_close_naive(0b01), 1);
}
#[test]
fn test_find_near_close_nested() {
assert_eq!(find_near_close(0b0011), 3);
assert_eq!(find_near_close_naive(0b0011), 3);
assert_eq!(find_near_close(0b0011 >> 1), 1);
}
#[test]
fn test_find_near_close_exhaustive_16bit() {
for w in (1usize..65536).step_by(2) {
let naive = find_near_close_naive(w);
let fast = find_near_close(w);
assert_eq!(
fast.min(WORD_BITS),
naive.min(WORD_BITS),
"find_near_close mismatch for word 0x{w:04x}"
);
}
}
#[test]
fn test_find_near_close_large_words() {
let test_words: [usize; 3] = [
usize::MAX / 3, 1, usize::MAX, ];
for w in test_words {
let naive = find_near_close_naive(w);
let fast = find_near_close(w);
assert_eq!(
fast.min(WORD_BITS),
naive.min(WORD_BITS),
"find_near_close mismatch for word 0x{w:016x}"
);
}
}
#[test]
fn test_find_far_close_simple() {
assert_eq!(find_far_close(0, 0), 0);
assert_eq!(find_far_close(0, 1), 1);
assert_eq!(find_far_close(0, 2), 2);
}
#[cfg(target_pointer_width = "64")]
#[test]
fn test_find_far_close_against_naive() {
let test_words: Vec<usize> = vec![
0,
0b0010, 0b1010, 0x5555_5555_5555_5555,
0xAAAA_AAAA_AAAA_AAAA,
0x0000_0000_FFFF_FFFF,
0xFFFF_FFFF_0000_0000,
0x0123_4567_89AB_CDEF,
0xFEDC_BA98_7654_3210,
0x0F0F_0F0F_0F0F_0F0F,
0xF0F0_F0F0_F0F0_F0F0,
0xDEAD_BEEF_CAFE_BABE,
];
for &w in &test_words {
let n_far = count_far_close(w, WORD_BITS);
for k in 0..n_far {
let naive = find_far_close_reference(w, k as i64);
let fast = find_far_close(w, k as i64);
assert_eq!(
fast, naive,
"find_far_close mismatch for word 0x{:016x}, k={}: fast={}, naive={}",
w, k, fast, naive
);
}
}
}
#[test]
fn test_find_far_close_exhaustive_8bit() {
for w in 0usize..256 {
let n_far = count_far_close(w, WORD_BITS);
for k in 0..n_far {
let naive = find_far_close_reference(w, k as i64);
let fast = find_far_close(w, k as i64);
assert_eq!(
fast, naive,
"find_far_close mismatch for word 0b{:08b}, k={}: fast={}, naive={}",
w, k, fast, naive
);
}
}
}
#[cfg(target_pointer_width = "64")]
#[test]
fn test_find_close_far_match() {
let bp = JacobsonBalParen::new(unsafe {
BitVec::from_raw_parts(
vec![0x5555_5555_5555_555Fusize, 0x0000_0000_0000_0000usize].into_boxed_slice(),
68,
)
});
let words = bp.as_ref();
let len = bp.len();
for pos in 0..len {
if words[pos / WORD_BITS] & (1usize << (pos % WORD_BITS)) != 0 {
let expected = find_close_naive(words, pos, len);
assert_eq!(
bp.find_close(pos),
Some(expected),
"find_close mismatch at pos={pos}"
);
}
}
}
fn random_balanced(n: usize) -> BitVec<Box<[usize]>> {
use rand::rngs::SmallRng;
use rand::{RngExt, SeedableRng};
assert!(n % 2 == 0, "Need even length for balanced parens");
let mut rng = SmallRng::seed_from_u64(n as u64);
let half = n / 2;
let mut bv: BitVec = BitVec::new(n);
let mut opens_remaining = half;
let mut excess = 0usize;
for i in 0..n {
let place_open = if opens_remaining == 0 {
false
} else if excess == 0 {
true
} else {
rng.random_bool(opens_remaining as f64 / (opens_remaining + excess) as f64)
};
if place_open {
bv.set(i, true);
opens_remaining -= 1;
excess += 1;
} else {
excess -= 1;
}
}
assert_eq!(excess, 0);
assert_eq!(opens_remaining, 0);
bv.into()
}
#[test]
fn test_find_close_random_small() {
for size in (2..=40).step_by(2) {
let bp = JacobsonBalParen::new(random_balanced(size));
let words = bp.as_ref();
let len = bp.len();
for pos in 0..len {
if words[pos / WORD_BITS] & (1usize << (pos % WORD_BITS)) != 0 {
let expected = find_close_naive(words, pos, len);
assert_eq!(bp.find_close(pos), Some(expected), "size={size}, pos={pos}");
}
}
}
}
#[test]
fn test_find_close_random_large() {
for &size in &[128, 256, 512, 1024, 2048] {
let bp = JacobsonBalParen::new(random_balanced(size));
let words = bp.as_ref();
let len = bp.len();
for pos in 0..len {
if words[pos / WORD_BITS] & (1usize << (pos % WORD_BITS)) != 0 {
let expected = find_close_naive(words, pos, len);
assert_eq!(bp.find_close(pos), Some(expected), "size={size}, pos={pos}");
}
}
}
}
#[cfg(target_pointer_width = "64")]
#[test]
fn test_find_close_all_nested() {
let word = [0x0000_0000_FFFF_FFFFusize];
let bp = JacobsonBalParen::new(unsafe { BitVec::from_raw_parts(&word, WORD_BITS) });
for i in 0..32 {
assert_eq!(bp.find_close(i), Some(63 - i), "pos={i}");
}
}
#[cfg(not(target_pointer_width = "64"))]
#[test]
fn test_find_close_all_nested() {
let word = [0x0000_FFFFusize];
let bp = JacobsonBalParen::new(unsafe { BitVec::from_raw_parts(&word, WORD_BITS) });
for i in 0..16 {
assert_eq!(bp.find_close(i), Some(31 - i), "pos={i}");
}
}
#[test]
fn test_find_close_3_words() {
let bp = JacobsonBalParen::new(random_balanced(192));
let words = bp.as_ref();
let len = bp.len();
for pos in 0..len {
if words[pos / WORD_BITS] & (1usize << (pos % WORD_BITS)) != 0 {
let expected = find_close_naive(words, pos, len);
assert_eq!(bp.find_close(pos), Some(expected), "3-word test, pos={pos}");
}
}
}
#[test]
fn test_count_far_open_close() {
assert_eq!(count_far_open(usize::MAX, WORD_BITS), WORD_BITS);
assert_eq!(count_far_close(0, WORD_BITS), WORD_BITS);
assert_eq!(count_far_open(usize::MAX / 3, WORD_BITS), 0);
assert_eq!(count_far_close(usize::MAX / 3, WORD_BITS), 0);
}
}