#[cfg(not(test))]
use alloc::vec::Vec;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct DsvIndexLightweight {
pub markers: Vec<u64>,
pub markers_rank: Vec<u32>,
pub newlines: Vec<u64>,
pub newlines_rank: Vec<u32>,
pub text_len: usize,
}
fn build_rank(words: &[u64]) -> Vec<u32> {
let mut rank = Vec::with_capacity(words.len() + 1);
let mut cumulative: u32 = 0;
rank.push(0); for &word in words {
cumulative += word.count_ones();
rank.push(cumulative);
}
rank
}
impl DsvIndexLightweight {
pub fn new(markers: Vec<u64>, newlines: Vec<u64>, text_len: usize) -> Self {
let markers_rank = build_rank(&markers);
let newlines_rank = build_rank(&newlines);
Self {
markers,
markers_rank,
newlines,
newlines_rank,
text_len,
}
}
#[inline]
pub fn markers_rank1(&self, i: usize) -> usize {
if i == 0 {
return 0;
}
if i >= self.text_len {
return *self.markers_rank.last().unwrap_or(&0) as usize;
}
let word_idx = i / 64;
let bit_idx = i % 64;
let cumulative = self.markers_rank[word_idx] as usize;
if word_idx < self.markers.len() {
let word = self.markers[word_idx];
let mask = (1u64 << bit_idx) - 1;
cumulative + (word & mask).count_ones() as usize
} else {
cumulative
}
}
#[inline]
pub fn markers_select1(&self, k: usize) -> Option<usize> {
let total_ones = *self.markers_rank.last().unwrap_or(&0) as usize;
if k >= total_ones {
return None;
}
let target_rank = (k + 1) as u32;
let word_idx = match self.markers_rank.binary_search(&target_rank) {
Ok(idx) => idx.saturating_sub(1),
Err(idx) => idx.saturating_sub(1),
};
if word_idx >= self.markers.len() {
return None;
}
let rank_before = self.markers_rank[word_idx] as usize;
let remaining = k - rank_before;
let word = self.markers[word_idx];
let bit_pos = select_in_word(word, remaining as u32) as usize;
let result = word_idx * 64 + bit_pos;
if result < self.text_len {
Some(result)
} else {
None
}
}
#[inline]
pub fn newlines_rank1(&self, i: usize) -> usize {
if i == 0 {
return 0;
}
if i >= self.text_len {
return *self.newlines_rank.last().unwrap_or(&0) as usize;
}
let word_idx = i / 64;
let bit_idx = i % 64;
let cumulative = self.newlines_rank[word_idx] as usize;
if word_idx < self.newlines.len() {
let word = self.newlines[word_idx];
let mask = (1u64 << bit_idx) - 1;
cumulative + (word & mask).count_ones() as usize
} else {
cumulative
}
}
#[inline]
pub fn newlines_select1(&self, k: usize) -> Option<usize> {
let total_ones = *self.newlines_rank.last().unwrap_or(&0) as usize;
if k >= total_ones {
return None;
}
let target_rank = (k + 1) as u32;
let word_idx = match self.newlines_rank.binary_search(&target_rank) {
Ok(idx) => idx.saturating_sub(1),
Err(idx) => idx.saturating_sub(1),
};
if word_idx >= self.newlines.len() {
return None;
}
let rank_before = self.newlines_rank[word_idx] as usize;
let remaining = k - rank_before;
let word = self.newlines[word_idx];
let bit_pos = select_in_word(word, remaining as u32) as usize;
let result = word_idx * 64 + bit_pos;
if result < self.text_len {
Some(result)
} else {
None
}
}
#[inline]
pub fn marker_count(&self) -> usize {
*self.markers_rank.last().unwrap_or(&0) as usize
}
#[inline]
pub fn row_count(&self) -> usize {
*self.newlines_rank.last().unwrap_or(&0) as usize
}
#[inline]
pub fn is_empty(&self) -> bool {
self.text_len == 0
}
}
#[inline]
fn select_in_word(mut val: u64, mut k: u32) -> u32 {
loop {
if val == 0 {
return 64;
}
let t = val.trailing_zeros();
if k == 0 {
return t;
}
k -= 1;
val &= val - 1; }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rank() {
let markers = vec![0b1010_1010u64];
let newlines = vec![0b0000_0001u64];
let index = DsvIndexLightweight::new(markers, newlines, 64);
assert_eq!(index.markers_rank1(0), 0);
assert_eq!(index.markers_rank1(1), 0);
assert_eq!(index.markers_rank1(2), 1);
assert_eq!(index.markers_rank1(8), 4);
}
#[test]
fn test_select() {
let markers = vec![0b1010_1010u64];
let newlines = vec![0b0000_0001u64];
let index = DsvIndexLightweight::new(markers, newlines, 64);
assert_eq!(index.markers_select1(0), Some(1));
assert_eq!(index.markers_select1(1), Some(3));
assert_eq!(index.markers_select1(2), Some(5));
assert_eq!(index.markers_select1(3), Some(7));
}
}