simple_sds_sbwt/rl_vector/
index.rs1use super::*;
4
5use std::ops::Range;
6
7#[derive(Clone, Debug, PartialEq, Eq)]
37pub struct SampleIndex {
38 num_values: usize,
39 divisor: usize,
40 samples: IntVector,
41}
42
43impl SampleIndex {
44 pub const RATIO: usize = 8;
46
47 pub fn new<T: Iterator<Item = usize> + ExactSizeIterator>(iter: T, universe: usize) -> Self {
58 let mut iter = iter;
59 let len = iter.len();
60 if len == 0 || universe == 0 {
61 return SampleIndex {
62 num_values: 0,
63 divisor: usize::MAX,
64 samples: IntVector::with_len(1, 1, 0).unwrap(),
65 }
66 }
67
68 let (num_samples, divisor) = Self::parameters(len, universe);
69 let width = bits::bit_len((len - 1) as u64);
70 let mut samples = IntVector::with_len(num_samples, width, 0).unwrap();
71
72 let mut offset = 0;
74 let mut prev = iter.next().unwrap();
75 let mut next = iter.next();
76 assert_eq!(prev, 0, "SampleIndex::new(): The initial value must be 0");
77 for sample in 1..samples.len() {
78 let threshold = sample * divisor;
79 while next.is_some() {
80 let value = next.unwrap();
81 if value > threshold {
82 break;
83 }
84 assert!(prev < value, "SampleIndex::new(): The values must be strictly increasing");
85 offset += 1;
86 prev = value;
87 next = iter.next();
88 }
89 samples.set(sample, offset as u64);
90 }
91 assert!(universe > prev, "SampleIndex::new(): Universe size ({}) must be larger than the last value ({})", universe, prev);
92
93 SampleIndex {
94 num_values: len,
95 divisor,
96 samples,
97 }
98 }
99
100 pub fn range(&self, value: usize) -> Range<usize> {
107 let offset = value / self.divisor;
108 let start = self.samples.get_or(offset, self.num_values as u64) as usize;
109 let mut limit = self.samples.get_or(offset + 1, self.num_values as u64) as usize;
110 if limit < self.num_values {
112 limit += 1;
113 }
114 start..limit
115 }
116
117 fn parameters(values: usize, universe: usize) -> (usize, usize) {
119 let num_samples = bits::div_round_up(values, Self::RATIO);
120 let divisor = bits::div_round_up(universe, num_samples);
121 let num_samples = bits::div_round_up(universe, divisor);
122 (num_samples, divisor)
123 }
124}
125
126#[cfg(test)]
129mod tests {
130 use super::*;
131
132 use rand::Rng;
133 use rand::rngs::ThreadRng;
134
135 fn check_parameters(values: usize, universe: usize) {
136 let (samples, divisor) = SampleIndex::parameters(values, universe);
137 assert!((samples - 1) * divisor < universe, "Divisor {} is too large with {} samples and universe size {}", divisor, samples, universe);
138 assert!(samples * divisor >= universe, "Divisor {} is too small with {} samples and universe size {}", divisor, samples, universe);
139 assert_eq!((universe - 1) / divisor, samples - 1, "Last value {} does not map to to last sample {} with {} values", universe - 1, samples - 1, values);
140 }
141
142 fn generate_values(universe: usize, density: usize, rng: &mut ThreadRng) -> Vec<usize> {
143 let mut values: Vec<usize> = Vec::new();
144 let mut last = 0;
145 while last < universe {
146 values.push(last);
147 last += 1 + rng.gen_range(0..density);
148 }
149 values
150 }
151
152 #[test]
153 fn parameters() {
154 let mut rng = rand::thread_rng();
155 let mut values: Vec<usize> = (1..10).collect();
156 for _ in 0..100 {
157 values.push(rng.gen_range(1..10000));
158 }
159
160 for v in values {
161 let mut universe: Vec<usize> = vec![v, v + 1, v * SampleIndex::RATIO - 1, v * SampleIndex::RATIO, v * SampleIndex::RATIO + 1];
162 for _ in 0..10 {
163 universe.push(v * rng.gen_range(1..10) + rng.gen_range(0..v));
164 }
165 for u in universe {
166 check_parameters(v, u);
167 }
168 }
169 }
170
171 #[test]
172 fn sample_index() {
173 let mut rng = rand::thread_rng();
174 for _ in 0..10 {
175 let universe = rng.gen_range(10_000..100_000);
176 let density = rng.gen_range(2..1000);
177 let values = generate_values(universe, density, &mut rng);
178 let index = SampleIndex::new(values.iter().copied(), universe);
179 for _ in 0..1000 {
180 let query = rng.gen_range(0..universe);
181 let range = index.range(query);
182 let start = values[range.start];
183 assert!(start <= query, "Range start values[{}] = {} is too large for query {} (universe size {}, density {})", range.start, start, query, universe, density);
184 let end = if range.end >= values.len() { universe } else { values[range.end] };
185 assert!(end > query, "Range end values[{}] = {} is too small for query {} (universe size {}, density {})", range.end, end, query, universe, density);
186 }
187 }
188 }
189}
190
191