use super::*;
use std::ops::Range;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct SampleIndex {
num_values: usize,
divisor: usize,
samples: IntVector,
}
impl SampleIndex {
pub const RATIO: usize = 8;
pub fn new<T: Iterator<Item = usize> + ExactSizeIterator>(iter: T, universe: usize) -> Self {
let mut iter = iter;
let len = iter.len();
if len == 0 || universe == 0 {
return SampleIndex {
num_values: 0,
divisor: usize::MAX,
samples: IntVector::with_len(1, 1, 0).unwrap(),
}
}
let (num_samples, divisor) = Self::parameters(len, universe);
let width = bits::bit_len((len - 1) as u64);
let mut samples = IntVector::with_len(num_samples, width, 0).unwrap();
let mut offset = 0;
let mut prev = iter.next().unwrap();
let mut next = iter.next();
assert_eq!(prev, 0, "SampleIndex::new(): The initial value must be 0");
for sample in 1..samples.len() {
let threshold = sample * divisor;
while next.is_some() {
let value = next.unwrap();
if value > threshold {
break;
}
assert!(prev < value, "SampleIndex::new(): The values must be strictly increasing");
offset += 1;
prev = value;
next = iter.next();
}
samples.set(sample, offset as u64);
}
assert!(universe > prev, "SampleIndex::new(): Universe size ({}) must be larger than the last value ({})", universe, prev);
SampleIndex {
num_values: len,
divisor,
samples,
}
}
pub fn range(&self, value: usize) -> Range<usize> {
let offset = value / self.divisor;
let start = self.samples.get_or(offset, self.num_values as u64) as usize;
let mut limit = self.samples.get_or(offset + 1, self.num_values as u64) as usize;
if limit < self.num_values {
limit += 1;
}
start..limit
}
fn parameters(values: usize, universe: usize) -> (usize, usize) {
let num_samples = bits::div_round_up(values, Self::RATIO);
let divisor = bits::div_round_up(universe, num_samples);
let num_samples = bits::div_round_up(universe, divisor);
(num_samples, divisor)
}
}
#[cfg(test)]
mod tests {
use super::*;
use rand::Rng;
use rand::rngs::ThreadRng;
fn check_parameters(values: usize, universe: usize) {
let (samples, divisor) = SampleIndex::parameters(values, universe);
assert!((samples - 1) * divisor < universe, "Divisor {} is too large with {} samples and universe size {}", divisor, samples, universe);
assert!(samples * divisor >= universe, "Divisor {} is too small with {} samples and universe size {}", divisor, samples, universe);
assert_eq!((universe - 1) / divisor, samples - 1, "Last value {} does not map to to last sample {} with {} values", universe - 1, samples - 1, values);
}
fn generate_values(universe: usize, density: usize, rng: &mut ThreadRng) -> Vec<usize> {
let mut values: Vec<usize> = Vec::new();
let mut last = 0;
while last < universe {
values.push(last);
last += 1 + rng.gen_range(0..density);
}
values
}
#[test]
fn parameters() {
let mut rng = rand::thread_rng();
let mut values: Vec<usize> = (1..10).collect();
for _ in 0..100 {
values.push(rng.gen_range(1..10000));
}
for v in values {
let mut universe: Vec<usize> = vec![v, v + 1, v * SampleIndex::RATIO - 1, v * SampleIndex::RATIO, v * SampleIndex::RATIO + 1];
for _ in 0..10 {
universe.push(v * rng.gen_range(1..10) + rng.gen_range(0..v));
}
for u in universe {
check_parameters(v, u);
}
}
}
#[test]
fn sample_index() {
let mut rng = rand::thread_rng();
for _ in 0..10 {
let universe = rng.gen_range(10_000..100_000);
let density = rng.gen_range(2..1000);
let values = generate_values(universe, density, &mut rng);
let index = SampleIndex::new(values.iter().copied(), universe);
for _ in 0..1000 {
let query = rng.gen_range(0..universe);
let range = index.range(query);
let start = values[range.start];
assert!(start <= query, "Range start values[{}] = {} is too large for query {} (universe size {}, density {})", range.start, start, query, universe, density);
let end = if range.end >= values.len() { universe } else { values[range.end] };
assert!(end > query, "Range end values[{}] = {} is too small for query {} (universe size {}, density {})", range.end, end, query, universe, density);
}
}
}
}