#[cfg(not(test))]
use alloc::vec::Vec;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
pub const DEFAULT_SAMPLE_RATE: u32 = 256;
#[derive(Clone, Copy, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
struct SampleEntry {
word_idx: u32,
cumulative_before: u32,
}
#[derive(Clone, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SelectIndex {
samples: Vec<SampleEntry>,
sample_rate: u32,
}
impl SelectIndex {
pub fn empty() -> Self {
Self {
samples: Vec::new(),
sample_rate: DEFAULT_SAMPLE_RATE,
}
}
pub fn build(words: &[u64], total_ones: usize, sample_rate: u32) -> Self {
if words.is_empty() || total_ones == 0 {
return Self {
samples: Vec::new(),
sample_rate,
};
}
let sample_rate = sample_rate.max(1);
let num_samples = total_ones / sample_rate as usize + 1;
let mut samples = Vec::with_capacity(num_samples);
let mut count: usize = 0;
let mut next_sample = 0usize;
for (word_idx, &word) in words.iter().enumerate() {
let pop = word.count_ones() as usize;
while next_sample < total_ones && count + pop > next_sample {
samples.push(SampleEntry {
word_idx: word_idx as u32,
cumulative_before: count as u32,
});
next_sample += sample_rate as usize;
}
count += pop;
}
Self {
samples,
sample_rate,
}
}
#[inline]
pub fn jump_to(&self, k: usize) -> (usize, usize) {
if self.samples.is_empty() {
return (0, k);
}
let sample_rate = self.sample_rate as usize;
let sample_idx = k / sample_rate;
if sample_idx >= self.samples.len() {
let last = &self.samples[self.samples.len() - 1];
let start_word = last.word_idx as usize;
let remaining = k - last.cumulative_before as usize;
return (start_word, remaining);
}
let entry = &self.samples[sample_idx];
let start_word = entry.word_idx as usize;
let remaining = k - entry.cumulative_before as usize;
(start_word, remaining)
}
#[inline]
pub fn sample_rate(&self) -> u32 {
self.sample_rate
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_index() {
let idx = SelectIndex::build(&[], 0, 256);
assert_eq!(idx.jump_to(0), (0, 0));
assert_eq!(idx.jump_to(100), (0, 100));
}
#[test]
fn test_single_word() {
let words = vec![0b1111u64]; let idx = SelectIndex::build(&words, 4, 2);
assert_eq!(idx.jump_to(0), (0, 0));
assert_eq!(idx.jump_to(1), (0, 1));
assert_eq!(idx.jump_to(2), (0, 2));
assert_eq!(idx.jump_to(3), (0, 3));
}
#[test]
fn test_multiple_words() {
let words = vec![0b1111u64; 4];
let idx = SelectIndex::build(&words, 16, 4);
assert_eq!(idx.jump_to(0), (0, 0));
let (word, rem) = idx.jump_to(5);
assert_eq!(word, 1);
assert_eq!(rem, 1);
}
#[test]
fn test_sparse_data() {
let words: Vec<u64> = vec![1; 100];
let idx = SelectIndex::build(&words, 100, 10);
let (word, _rem) = idx.jump_to(25);
assert!(word <= 25);
}
#[test]
fn test_dense_data() {
let words: Vec<u64> = vec![u64::MAX; 10];
let idx = SelectIndex::build(&words, 640, 64);
let (word, _rem) = idx.jump_to(128);
assert!(word <= 2);
}
#[test]
fn test_sample_rate_one() {
let words = vec![0b1111u64];
let idx = SelectIndex::build(&words, 4, 1);
assert_eq!(idx.jump_to(0), (0, 0));
assert_eq!(idx.jump_to(1), (0, 1));
assert_eq!(idx.jump_to(2), (0, 2));
assert_eq!(idx.jump_to(3), (0, 3));
}
#[test]
fn test_large_sample_rate() {
let words = vec![0b1111u64; 4];
let idx = SelectIndex::build(&words, 16, 256);
assert_eq!(idx.jump_to(0), (0, 0));
assert_eq!(idx.jump_to(15), (0, 15));
}
}