faex/bit_vectors/rank_select/dense_sampling_rank/
mod.rs1use super::{RankStructure, RankSupport, SelectSupport};
2use crate::Build;
3use crate::{
4 bit_vectors::BitVec, int_vectors::compact_int_vec::CompactIntVec, profiling::HeapSize,
5 util::BitsRequired,
6};
7use serde::{Deserialize, Serialize};
8use std::cmp::min;
9
10#[derive(Debug, Serialize, Deserialize)]
12pub struct DenseSamplingRank {
13 superblocks: Vec<usize>,
14 superblock_size: usize,
15 blocks: CompactIntVec,
16 k: usize,
17 total_rank: usize,
18}
19
20impl DenseSamplingRank {
21 #[inline]
22 pub fn new(data: &BitVec, k: usize) -> Self {
23 assert!(k > 0, "k must be greater than 0");
24
25 let superblock_size = k * BitVec::CONTAINER_WIDTH;
26 let num_superblocks = data.len() / superblock_size;
27 let mut superblocks = Vec::with_capacity(num_superblocks + 1);
28
29 let max_rank_offset_value = (k - 1) * BitVec::CONTAINER_WIDTH;
31 let rank_offset_int_width = max_rank_offset_value.bits_required() as usize;
32
33 let num_blocks = data.len() / BitVec::CONTAINER_WIDTH;
34 let mut blocks = CompactIntVec::with_capacity(rank_offset_int_width, num_blocks + 1);
35
36 let mut rank = 0;
37 let mut rank_offset = 0;
38 superblocks.push(rank);
40 blocks.push(rank_offset);
41
42 let raw_data = data.raw_data();
43 for i in 0..num_superblocks {
44 for j in i * k..(i + 1) * k - 1 {
45 rank_offset += unsafe { raw_data.get_unchecked(j) }.count_ones() as usize;
46 blocks.push(rank_offset);
47 }
48 let last_block_rank =
49 unsafe { raw_data.get_unchecked((i + 1) * k - 1) }.count_ones() as usize;
50 rank_offset += last_block_rank;
51 rank += rank_offset;
52 rank_offset = 0;
53
54 blocks.push(rank_offset);
55 superblocks.push(rank);
56 }
57
58 let mut unsampled_superblock_rank = 0;
59 for i in num_superblocks * k..num_blocks {
61 unsampled_superblock_rank += unsafe { raw_data.get_unchecked(i) }.count_ones() as usize;
62 blocks.push(unsampled_superblock_rank);
63 }
64
65 if num_blocks != raw_data.len() {
69 unsampled_superblock_rank += raw_data.last().unwrap().count_ones() as usize;
70 }
71
72 if unsampled_superblock_rank != 0 {
82 rank += unsampled_superblock_rank;
83 superblocks.push(rank);
84 }
85
86 Self {
87 superblocks,
88 superblock_size,
89 blocks,
90 k,
91 total_rank: rank,
92 }
93 }
94
95 #[inline]
96 pub fn superblocks(&self) -> &[usize] {
97 &self.superblocks
98 }
99
100 #[inline]
101 pub fn superblock_size(&self) -> usize {
102 self.superblock_size
103 }
104
105 #[inline]
106 pub fn blocks(&self) -> &CompactIntVec {
107 &self.blocks
108 }
109
110 #[inline]
111 pub fn k(&self) -> usize {
112 self.k
113 }
114}
115impl HeapSize for DenseSamplingRank {
116 #[inline]
117 fn heap_size_in_bits(&self) -> usize {
118 self.superblocks.heap_size_in_bits() + self.blocks.heap_size_in_bits()
122 }
123}
124
125impl RankSupport<BitVec> for DenseSamplingRank {
126 #[inline]
127 unsafe fn rank(&self, data: &BitVec, index: usize) -> Option<usize> {
128 if index == 0 {
130 return Some(0);
131 }
132 if index > data.len() {
133 return None;
134 }
135
136 let is = index / self.superblock_size;
137 let iw = index / BitVec::CONTAINER_WIDTH;
138
139 let rank = self.superblocks.get_unchecked(is) + self.blocks.get_unchecked(iw);
140 let block_offset = index % BitVec::CONTAINER_WIDTH;
141 let last_block = data.raw_data().get(iw).copied().unwrap_or(0);
142
143 let last_block_target = last_block & ((1 << block_offset) - 1);
145 let last_block_rank = last_block_target.count_ones() as usize;
146
147 Some(rank + last_block_rank)
148 }
149}
150
151impl SelectSupport<BitVec> for DenseSamplingRank {
152 #[inline]
153 unsafe fn select(&self, data: &BitVec, rank: usize) -> Option<usize> {
154 if rank == 0 {
156 return Some(0);
157 }
158
159 if rank > self.total_rank {
160 return None;
161 }
162
163 let mut left_superblock = 0;
164 let mut right_superblock = self.superblocks.len() - 1;
165
166 while right_superblock - left_superblock > 1 {
167 let mid_superblock = (left_superblock + right_superblock) / 2;
168 let mid_rank = *self.superblocks.get_unchecked(mid_superblock);
169 if mid_rank < rank {
170 left_superblock = mid_superblock;
171 } else {
172 right_superblock = mid_superblock;
173 }
174 }
175
176 let superblock_rank = *self.superblocks.get_unchecked(left_superblock);
181 let remaining_rank = rank - superblock_rank;
182 let raw_data = data.raw_data();
183 let mut left_block_index = left_superblock * self.k;
184
185 let mut right_block_index = min(left_block_index + self.k - 1, raw_data.len() - 1);
186 while right_block_index - left_block_index > 1 {
187 let mid = (left_block_index + right_block_index) / 2;
188 let mid_rank = self.blocks.get_unchecked(mid);
189 if mid_rank < remaining_rank {
190 left_block_index = mid;
191 } else {
192 right_block_index = mid;
193 }
194 }
195
196 let right_rank = self.blocks.get_unchecked(right_block_index);
202 let target_block_index = if right_rank < remaining_rank {
205 right_block_index
206 } else {
207 left_block_index
208 };
209
210 let mut local_rank = self.blocks.get_unchecked(target_block_index);
211
212 let mut block = *raw_data.get_unchecked(target_block_index);
214
215 let mut bit_index = 0;
217 while local_rank < remaining_rank {
218 if block & 0b1 == 0b1 {
219 local_rank += 1;
220 }
221 block >>= 1;
222 bit_index += 1;
223 }
224
225 Some(target_block_index * BitVec::CONTAINER_WIDTH + bit_index)
226 }
227
228 #[inline]
229 unsafe fn select0(&self, data: &BitVec, rank0: usize) -> Option<usize> {
230 if rank0 == 0 {
232 return Some(0);
233 }
234 let total_rank0 = data.len() - self.total_rank;
235 if rank0 > total_rank0 {
236 return None;
237 }
238
239 let mut left = 0;
240 let mut right = self.superblocks.len() - 1;
241
242 while right - left > 1 {
243 let mid = (left + right) / 2;
244 let bits_before_mid = mid * self.superblock_size;
245 let mid_rank0 = bits_before_mid - *self.superblocks.get_unchecked(mid);
246 if mid_rank0 < rank0 {
247 left = mid;
248 } else {
249 right = mid;
250 }
251 }
252
253 let bits_before_left = left * self.superblock_size;
259
260 let superblock_rank0 = bits_before_left - *self.superblocks.get_unchecked(left);
261 let remaining_rank0 = rank0 - superblock_rank0;
262 let raw_data = data.raw_data();
263
264 let first_block_index = left * self.k;
265 let mut left_block_index = first_block_index;
266
267 let mut right_block_index = min(left_block_index + self.k - 1, raw_data.len() - 1);
268 while right_block_index - left_block_index > 1 {
269 let mid = (left_block_index + right_block_index) / 2;
270 let bits_before_mid = (mid - first_block_index) * BitVec::CONTAINER_WIDTH;
271 let mid_rank0 = bits_before_mid - self.blocks.get_unchecked(mid);
272 if mid_rank0 < remaining_rank0 {
273 left_block_index = mid;
274 } else {
275 right_block_index = mid;
276 }
277 }
278
279 let bits_before_right = (right_block_index - first_block_index) * BitVec::CONTAINER_WIDTH;
285 let right_rank0 = bits_before_right - self.blocks.get_unchecked(right_block_index);
286 let target_block_index = if right_rank0 < remaining_rank0 {
289 right_block_index
290 } else {
291 left_block_index
292 };
293
294 let bits_before_target = (target_block_index - first_block_index) * BitVec::CONTAINER_WIDTH;
295 let mut local_rank0 = bits_before_target - self.blocks.get_unchecked(target_block_index);
296
297 let mut block = *raw_data.get_unchecked(target_block_index);
299
300 let mut bit_index = 0;
302 while local_rank0 < remaining_rank0 {
303 if block & 0b1 == 0b0 {
304 local_rank0 += 1;
305 }
306 block >>= 1;
307 bit_index += 1;
308 }
309
310 Some(target_block_index * BitVec::CONTAINER_WIDTH + bit_index)
311 }
312}
313
314pub struct DenseSamplingRankSpec {
315 pub k: usize,
316}
317
318impl DenseSamplingRankSpec {
319 #[inline]
320 pub const fn new(k: usize) -> Self {
321 Self { k }
322 }
323}
324
325impl DenseSamplingRank {
326 #[inline]
327 pub const fn spec(k: usize) -> DenseSamplingRankSpec {
328 DenseSamplingRankSpec::new(k)
329 }
330}
331
332impl Build<BitVec, RankStructure<BitVec, DenseSamplingRank>> for DenseSamplingRankSpec {
333 #[inline]
334 fn build(&self, data: BitVec) -> RankStructure<BitVec, DenseSamplingRank> {
335 let sparse_sampling = DenseSamplingRank::new(&data, self.k);
336 unsafe { RankStructure::new(data, sparse_sampling) }
337 }
338}
339
340impl Build<&BitVec, DenseSamplingRank> for DenseSamplingRankSpec {
341 #[inline]
342 fn build(&self, data: &BitVec) -> DenseSamplingRank {
343 DenseSamplingRank::new(data, self.k)
344 }
345}
346
347#[cfg(test)]
348mod tests;