use crate::{
IndexStorage,
batch_computed_cursors::Buffers,
construction::slice_compression::{NoSliceCompression, SliceCompression},
maybe_savefile,
sealed::Sealed,
};
mod block;
mod condensed;
mod flat;
#[doc(inline)]
pub use block::{Block, Block64, Block512};
#[doc(inline)]
pub use condensed::CondensedTextWithRankSupport;
#[doc(inline)]
pub use flat::FlatTextWithRankSupport;
pub(crate) trait PrivateTextWithRankSupport<I: IndexStorage>: Sealed {
fn construct_from_maybe_slice_compressed_text<S: SliceCompression>(
text: &[u8],
uncompressed_text_len: usize,
alphabet_size: usize,
) -> Self;
fn _alphabet_size(&self) -> usize;
fn _text_len(&self) -> usize;
fn replace_many_interval_borders_with_ranks<const N: usize>(
&self,
buffers: &mut Buffers<N>,
num_remaining_unfinished_queries: usize,
) {
let all_symbols_valid = buffers
.symbols
.iter()
.all(|&s| (s as usize) < self._alphabet_size());
let all_idx_valid = buffers
.intervals
.iter()
.all(|ivl| ivl.start <= self._text_len() && ivl.end <= self._text_len());
let is_safe = all_symbols_valid && all_idx_valid;
assert!(is_safe);
unsafe {
self.replace_many_interval_borders_with_ranks_unchecked(
buffers,
num_remaining_unfinished_queries,
);
}
}
unsafe fn replace_many_interval_borders_with_ranks_unchecked<const N: usize>(
&self,
buffers: &mut Buffers<N>,
num_remaining_unfinished_queries: usize,
);
}
#[allow(private_bounds)]
pub trait TextWithRankSupport<I: IndexStorage>:
maybe_savefile::MaybeSavefile + PrivateTextWithRankSupport<I> + 'static
{
fn construct(text: &[u8], alphabet_size: usize) -> Self {
Self::construct_from_maybe_slice_compressed_text::<NoSliceCompression>(
text,
text.len(),
alphabet_size,
)
}
fn rank(&self, symbol: u8, idx: usize) -> usize {
let is_safe = (symbol as usize) < self.alphabet_size() && idx <= self.text_len();
assert!(is_safe);
unsafe { self.rank_unchecked(symbol, idx) }
}
unsafe fn rank_unchecked(&self, symbol: u8, idx: usize) -> usize;
fn symbol_at(&self, idx: usize) -> u8;
fn text_len(&self) -> usize {
self._text_len()
}
fn alphabet_size(&self) -> usize {
self._alphabet_size()
}
}
#[cfg(test)]
mod tests {
use crate::{
HalfOpenInterval,
batch_computed_cursors::Buffers,
construction::slice_compression::{
HalfBytesCompression, NoSliceCompression, half_byte_compress_text,
},
text_with_rank_support::{
CondensedTextWithRankSupport, FlatTextWithRankSupport, TextWithRankSupport,
},
};
use proptest::prelude::*;
prop_compose! {
fn even_len_text()
(text_len in (0usize..500).prop_map(|len| len * 2))
(text in prop::collection::vec(0u8..16, text_len)) -> Vec<u8> {
text
}
}
prop_compose! {
fn text_and_alphabet_size()
(max_char in 2u8..32)
(text in prop::collection::vec(0u8..=max_char, 0usize..1000), max_char in Just(max_char)) -> (Vec<u8>, usize) {
(text, max_char as usize + 1)
}
}
fn test_with_and_without_half_byte_compression<R: TextWithRankSupport<u32>>(
text: &[u8],
half_byte_compressed_text: &[u8],
) {
let ranks = R::construct_from_maybe_slice_compressed_text::<NoSliceCompression>(
text,
text.len(),
16,
);
let ranks_compressed = R::construct_from_maybe_slice_compressed_text::<HalfBytesCompression>(
half_byte_compressed_text,
text.len(),
16,
);
for symbol in 0..16 {
for idx in 0..=text.len() {
assert_eq!(
ranks.rank(symbol, idx),
ranks_compressed.rank(symbol, idx),
"symbol: {}, idx: {}",
symbol,
idx
);
}
}
}
fn test_replace_many_intervals_same_as_rank<R: TextWithRankSupport<u32>>(
text: &[u8],
alphabet_size: usize,
) {
let ranks = R::construct(text, alphabet_size);
for _ in 0..20 {
const N: usize = 8;
let intervals: [HalfOpenInterval; N] = core::array::from_fn(|_| HalfOpenInterval {
start: rand::random_range(0..=text.len()),
end: rand::random_range(0..=text.len()),
});
let symbols: [u8; N] =
core::array::from_fn(|_| rand::random_range(0..=(alphabet_size as u8 - 1)));
let mut expected_intervals = intervals;
for (interval, symbol) in expected_intervals.iter_mut().zip(symbols) {
interval.start = ranks.rank(symbol, interval.start);
interval.end = ranks.rank(symbol, interval.end);
}
let mut buffers = Buffers::new();
buffers.intervals = intervals;
buffers.symbols = symbols;
ranks.replace_many_interval_borders_with_ranks(&mut buffers, N);
assert_eq!(expected_intervals, buffers.intervals);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(2048))]
#[test]
fn half_byte_compressed_construction(text in even_len_text()) {
let mut text_copy = text.clone();
half_byte_compress_text(&mut text_copy);
let compressed = &text_copy[..text.len() / 2];
test_with_and_without_half_byte_compression::<FlatTextWithRankSupport<u32>>(&text, compressed);
test_with_and_without_half_byte_compression::<CondensedTextWithRankSupport<u32>>(&text, compressed);
}
#[test]
fn replace_many_intervals_same_as_rank((text, alphabet_size) in text_and_alphabet_size()) {
test_replace_many_intervals_same_as_rank::<FlatTextWithRankSupport<u32>>(&text, alphabet_size);
test_replace_many_intervals_same_as_rank::<CondensedTextWithRankSupport<u32>>(&text, alphabet_size);
}
}
}