use std::cmp::min_by;
use std::mem::size_of;
use std::ops::{Bound, Deref, RangeBounds};
use crate::rmq::binary_rmq::BinaryRmq;
use crate::util::pdep::Pdep;
const BLOCK_SIZE: usize = 128;
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
#[cfg_attr(feature = "mem_dbg", mem_size(flat))]
struct SmallBitVector(u128);
impl SmallBitVector {
#[allow(clippy::cast_possible_truncation)] fn rank0(&self, i: usize) -> usize {
debug_assert!(i <= 128);
let mask = 1u128.checked_shl(i as u32).unwrap_or(0).wrapping_sub(1);
(!self.0 & mask).count_ones() as usize
}
fn select0(&self, mut rank: usize) -> usize {
let word = (self.0 & 0xFFFF_FFFF_FFFF_FFFF) as u64;
if (word.count_zeros() as usize) <= rank {
rank -= word.count_zeros() as usize;
} else {
return (1 << rank).pdep(!word).trailing_zeros() as usize;
}
let word = (self.0 >> 64) as u64;
64 + (1 << (rank % 64)).pdep(!word).trailing_zeros() as usize
}
fn set_bit(&mut self, i: usize) {
debug_assert!(i <= 128);
let mask = 1u128 << i;
self.0 |= mask;
}
}
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
#[cfg_attr(feature = "mem_dbg", mem_size(flat))]
struct Block {
prefix_minima: SmallBitVector,
suffix_minima: SmallBitVector,
}
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
pub struct FastRmq {
data: Vec<u64>,
block_minima: BinaryRmq,
block_min_indices: Vec<u8>,
blocks: Vec<Block>,
}
impl FastRmq {
#[must_use]
pub fn from_vec(data: Vec<u64>) -> Self {
assert!(data.len() < 1 << 40, "input too large for fast rmq");
let mut block_minima = Vec::with_capacity(data.len() / BLOCK_SIZE + 1);
let mut block_min_indices = Vec::with_capacity(data.len() / BLOCK_SIZE + 1);
let mut blocks = Vec::with_capacity(data.len() / BLOCK_SIZE + 1);
data.chunks(BLOCK_SIZE).for_each(|block| {
let mut prefix_minima = SmallBitVector::default();
let mut suffix_minima = SmallBitVector::default();
let mut prefix_minimum = block[0];
let mut block_minimum = block[0];
let mut block_minimum_index = 0u8;
for (i, elem) in block.iter().enumerate().skip(1) {
if *elem < prefix_minimum {
prefix_minimum = *elem;
} else {
prefix_minima.set_bit(i);
}
#[allow(clippy::cast_possible_truncation)]
if *elem < block_minimum {
block_minimum = *elem;
block_minimum_index = i as u8;
}
}
let mut suffix_minimum = block[block.len() - 1];
for i in 2..=block.len() {
if block[block.len() - i] < suffix_minimum {
suffix_minimum = block[block.len() - i];
} else {
suffix_minima.set_bit(i - 1);
}
}
block_minima.push(block_minimum);
block_min_indices.push(block_minimum_index);
blocks.push(Block {
prefix_minima,
suffix_minima,
});
});
Self {
data,
block_minima: BinaryRmq::from_vec(block_minima),
block_min_indices,
blocks,
}
}
#[must_use]
pub fn range_min_with_range<T: RangeBounds<usize>>(&self, range: T) -> usize {
let start = match range.start_bound() {
Bound::Included(i) => *i,
Bound::Excluded(i) => *i + 1,
Bound::Unbounded => 0,
}
.clamp(0, self.len() - 1);
let end = match range.end_bound() {
Bound::Included(i) => *i,
Bound::Excluded(i) => *i - 1,
Bound::Unbounded => self.len() - 1,
}
.clamp(0, self.len() - 1);
self.range_min(start, end)
}
#[must_use]
#[allow(clippy::similar_names)]
pub fn range_min(&self, i: usize, j: usize) -> usize {
let block_i = i / BLOCK_SIZE;
let block_j = j / BLOCK_SIZE;
if block_i == block_j {
let rank_i_prefix = self.blocks[block_i].prefix_minima.rank0(i % BLOCK_SIZE + 1);
let rank_j_prefix = self.blocks[block_i].prefix_minima.rank0(j % BLOCK_SIZE + 1);
if rank_j_prefix > rank_i_prefix {
return block_i * BLOCK_SIZE
+ self.blocks[block_i]
.prefix_minima
.select0(rank_j_prefix - 1);
}
let rank_i_suffix = self.blocks[block_i]
.suffix_minima
.rank0(BLOCK_SIZE - (i % BLOCK_SIZE));
let rank_j_suffix = self.blocks[block_i]
.suffix_minima
.rank0(BLOCK_SIZE - (j % BLOCK_SIZE));
if rank_j_suffix > rank_i_suffix {
return (block_i + 1) * BLOCK_SIZE
- self.blocks[block_i]
.suffix_minima
.select0(rank_j_suffix - 1);
}
return i + self.data[i..=j]
.iter()
.enumerate()
.min_by_key(|(_, &x)| x)
.unwrap()
.0;
}
let partial_block_i_min = (block_i + 1) * BLOCK_SIZE
- self.blocks[block_i].suffix_minima.select0(
self.blocks[block_i]
.suffix_minima
.rank0(BLOCK_SIZE - (i % BLOCK_SIZE))
- 1,
)
- 1;
let partial_block_j_min = block_j * BLOCK_SIZE
+ self.blocks[block_j]
.prefix_minima
.select0(self.blocks[block_j].prefix_minima.rank0(j % BLOCK_SIZE + 1) - 1);
if block_i + 1 < block_j {
let intermediate_min_block = self.block_minima.range_min(block_i + 1, block_j - 1);
let min_block_index = intermediate_min_block * BLOCK_SIZE
+ self.block_min_indices[intermediate_min_block] as usize;
min_by(
min_by(partial_block_i_min, partial_block_j_min, |&a, &b| {
self.data[a].cmp(&self.data[b])
}),
min_block_index,
|&a, &b| self.data[a].cmp(&self.data[b]),
)
} else {
min_by(partial_block_i_min, partial_block_j_min, |&a, &b| {
self.data[a].cmp(&self.data[b])
})
}
}
#[must_use]
pub fn len(&self) -> usize {
self.data.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
#[must_use]
pub fn heap_size(&self) -> usize {
self.data.len() * size_of::<u64>()
+ self.block_minima.heap_size()
+ self.block_min_indices.len()
+ self.blocks.len() * size_of::<Block>()
}
}
impl Deref for FastRmq {
type Target = Vec<u64>;
fn deref(&self) -> &Self::Target {
&self.data
}
}
impl From<Vec<u64>> for FastRmq {
fn from(data: Vec<u64>) -> Self {
Self::from_vec(data)
}
}
impl FromIterator<u64> for FastRmq {
fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
Self::from_vec(iter.into_iter().collect())
}
}
#[cfg(test)]
mod tests;