1#![allow(unsafe_op_in_unsafe_fn)]
2use polars_buffer::SharedStorage;
3use polars_utils::IdxSize;
4use polars_utils::slice::load_padded_le_u64;
5
6use super::bitmask::BitMask;
7use crate::bitmap::{Bitmap, MutableBitmap};
8use crate::trusted_len::TrustedLen;
9
10#[derive(Default, Clone)]
12pub struct BitmapBuilder {
13 buf: u64, bit_len: usize, bit_cap: usize, set_bits_in_bytes: usize, bytes: Vec<u8>,
18}
19
20impl BitmapBuilder {
21 pub fn new() -> Self {
22 Self::default()
23 }
24
25 #[inline(always)]
26 pub fn len(&self) -> usize {
27 self.bit_len
28 }
29
30 #[inline(always)]
31 pub fn is_empty(&self) -> bool {
32 self.bit_len == 0
33 }
34
35 #[inline(always)]
36 pub fn capacity(&self) -> usize {
37 self.bit_cap
38 }
39
40 #[inline(always)]
41 pub fn set_bits(&self) -> usize {
42 self.set_bits_in_bytes + self.buf.count_ones() as usize
43 }
44
45 #[inline(always)]
46 pub fn unset_bits(&self) -> usize {
47 self.bit_len - self.set_bits()
48 }
49
50 pub fn with_capacity(bits: usize) -> Self {
51 let bytes = Vec::with_capacity(bits.div_ceil(64) * 8);
52 let words_available = bytes.capacity() / 8;
53 Self {
54 buf: 0,
55 bit_len: 0,
56 bit_cap: words_available * 64,
57 set_bits_in_bytes: 0,
58 bytes,
59 }
60 }
61
62 #[inline(always)]
63 pub fn reserve(&mut self, additional: usize) {
64 if self.bit_len + additional > self.bit_cap {
65 self.reserve_slow(additional)
66 }
67 }
68
69 #[cold]
70 #[inline(never)]
71 fn reserve_slow(&mut self, additional: usize) {
72 let bytes_needed = (self.bit_len + additional).div_ceil(64) * 8;
73 self.bytes.reserve(bytes_needed - self.bytes.len());
74 let words_available = self.bytes.capacity() / 8;
75 self.bit_cap = words_available * 64;
76 }
77
78 pub fn clear(&mut self) {
79 self.buf = 0;
80 self.bit_len = 0;
81 self.set_bits_in_bytes = 0;
82 self.bytes.clear();
83 }
84
85 #[inline(always)]
86 pub fn push(&mut self, x: bool) {
87 self.reserve(1);
88 unsafe { self.push_unchecked(x) }
89 }
90
91 #[inline(always)]
95 unsafe fn flush_word_unchecked(&mut self, w: u64) {
96 let cur_len = self.bytes.len();
97 let p = self.bytes.as_mut_ptr().add(cur_len).cast::<u64>();
98 p.write_unaligned(w.to_le());
99 self.bytes.set_len(cur_len + 8);
100 }
101
102 #[inline(always)]
105 pub unsafe fn push_unchecked(&mut self, x: bool) {
106 debug_assert!(self.bit_len < self.bit_cap);
107 self.buf |= (x as u64) << (self.bit_len % 64);
108 self.bit_len += 1;
109 if self.bit_len.is_multiple_of(64) {
110 self.flush_word_unchecked(self.buf);
111 self.set_bits_in_bytes += self.buf.count_ones() as usize;
112 self.buf = 0;
113 }
114 }
115
116 #[inline(always)]
117 pub fn extend_constant(&mut self, length: usize, value: bool) {
118 let bits_in_buf = self.bit_len % 64;
120 if bits_in_buf + length < 64 {
121 let bit_block = ((value as u64) << length) - (value as u64);
122 self.buf |= bit_block << bits_in_buf;
123 self.bit_len += length;
124 } else {
125 self.extend_constant_slow(length, value);
126 }
127 }
128
129 #[cold]
130 fn extend_constant_slow(&mut self, length: usize, value: bool) {
131 unsafe {
132 let value_spread = if value { u64::MAX } else { 0 }; self.reserve(length);
136 let bits_in_buf = self.bit_len % 64;
137 let ext_buf = self.buf | (value_spread << bits_in_buf);
138 self.flush_word_unchecked(ext_buf);
139 self.set_bits_in_bytes += ext_buf.count_ones() as usize;
140
141 let remaining_bits = length - (64 - bits_in_buf);
143 let remaining_words = remaining_bits / 64;
144 for _ in 0..remaining_words {
145 self.flush_word_unchecked(value_spread);
146 }
147 self.set_bits_in_bytes += (remaining_words * 64) & value_spread as usize;
148
149 self.buf = ((value as u64) << (remaining_bits % 64)) - (value as u64);
151 self.bit_len += length;
152 }
153 }
154
155 pub unsafe fn push_word_with_len_unchecked(&mut self, word: u64, length: usize) {
160 debug_assert!(self.bit_len + length <= self.bit_cap);
161 debug_assert!(length <= 64);
162 debug_assert!(length == 64 || (word >> length) == 0);
163 let bits_in_buf = self.bit_len % 64;
164 self.buf |= word << bits_in_buf;
165 if bits_in_buf + length >= 64 {
166 self.flush_word_unchecked(self.buf);
167 self.set_bits_in_bytes += self.buf.count_ones() as usize;
168 self.buf = if bits_in_buf > 0 {
169 word >> (64 - bits_in_buf)
170 } else {
171 0
172 };
173 }
174 self.bit_len += length;
175 }
176
177 unsafe fn extend_from_slice_unchecked(
181 &mut self,
182 mut slice: &[u8],
183 mut offset: usize,
184 mut length: usize,
185 ) {
186 if length == 0 {
187 return;
188 }
189
190 let slice_bit_offset = offset % 8;
192 if slice_bit_offset > 0 {
193 let bits_in_first_byte = (8 - slice_bit_offset).min(length);
194 let first_byte = *slice.get_unchecked(offset / 8) >> slice_bit_offset;
195 self.push_word_with_len_unchecked(
196 first_byte as u64 & ((1 << bits_in_first_byte) - 1),
197 bits_in_first_byte,
198 );
199 length -= bits_in_first_byte;
200 offset += bits_in_first_byte;
201 }
202 slice = slice.get_unchecked(offset / 8..);
203
204 let bits_in_buf = self.bit_len % 64;
206 if bits_in_buf > 0 {
207 while length >= 64 {
208 let word = u64::from_le_bytes(slice.get_unchecked(0..8).try_into().unwrap());
209 self.buf |= word << bits_in_buf;
210 self.flush_word_unchecked(self.buf);
211 self.set_bits_in_bytes += self.buf.count_ones() as usize;
212 self.buf = word >> (64 - bits_in_buf);
213 self.bit_len += 64;
214 length -= 64;
215 slice = slice.get_unchecked(8..);
216 }
217 } else {
218 while length >= 64 {
219 let word = u64::from_le_bytes(slice.get_unchecked(0..8).try_into().unwrap());
220 self.flush_word_unchecked(word);
221 self.set_bits_in_bytes += word.count_ones() as usize;
222 self.bit_len += 64;
223 length -= 64;
224 slice = slice.get_unchecked(8..);
225 }
226 }
227
228 if length > 0 {
230 let word = load_padded_le_u64(slice);
231 self.push_word_with_len_unchecked(word & ((1 << length) - 1), length);
232 }
233 }
234
235 unsafe fn extend_each_repeated_from_slice_unchecked(
239 &mut self,
240 slice: &[u8],
241 offset: usize,
242 length: usize,
243 repeats: usize,
244 ) {
245 if repeats == 0 {
246 return;
247 }
248 if repeats == 1 {
249 return self.extend_from_slice_unchecked(slice, offset, length);
250 }
251 for bit_idx in offset..length {
252 let bit = (*slice.get_unchecked(bit_idx / 8) >> (bit_idx % 8)) & 1 != 0;
253 self.extend_constant(repeats, bit);
254 }
255 }
256
257 pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) {
258 assert!(8 * slice.len() >= offset + length);
259 self.reserve(length);
260 unsafe {
261 self.extend_from_slice_unchecked(slice, offset, length);
262 }
263 }
264
265 pub fn extend_each_repeated_from_slice(
266 &mut self,
267 slice: &[u8],
268 offset: usize,
269 length: usize,
270 repeats: usize,
271 ) {
272 assert!(8 * slice.len() >= offset + length);
273 self.reserve(length * repeats);
274 unsafe {
275 self.extend_each_repeated_from_slice_unchecked(slice, offset, length, repeats);
276 }
277 }
278
279 pub fn extend_from_bitmap(&mut self, bitmap: &Bitmap) {
280 let (slice, offset, length) = bitmap.as_slice();
283 self.extend_from_slice(slice, offset, length);
284 }
285
286 pub fn extend_from_bitmask(&mut self, bitmap: BitMask<'_>) {
287 let (slice, offset, length) = bitmap.inner();
288 self.extend_from_slice(slice, offset, length);
289 }
290
291 pub fn subslice_extend_from_bitmap(&mut self, bitmap: &Bitmap, start: usize, length: usize) {
293 let (slice, bm_offset, bm_length) = bitmap.as_slice();
294 assert!(start + length <= bm_length);
295 self.extend_from_slice(slice, bm_offset + start, length);
296 }
297
298 pub fn subslice_extend_each_repeated_from_bitmap(
300 &mut self,
301 bitmap: &Bitmap,
302 start: usize,
303 length: usize,
304 repeats: usize,
305 ) {
306 let (slice, bm_offset, bm_length) = bitmap.as_slice();
307 assert!(start + length <= bm_length);
308 self.extend_each_repeated_from_slice(slice, bm_offset + start, length, repeats);
309 }
310
311 pub fn subslice_extend_from_opt_validity(
312 &mut self,
313 bitmap: Option<&Bitmap>,
314 start: usize,
315 length: usize,
316 ) {
317 match bitmap {
318 Some(bm) => self.subslice_extend_from_bitmap(bm, start, length),
319 None => self.extend_constant(length, true),
320 }
321 }
322
323 pub fn subslice_extend_each_repeated_from_opt_validity(
324 &mut self,
325 bitmap: Option<&Bitmap>,
326 start: usize,
327 length: usize,
328 repeats: usize,
329 ) {
330 match bitmap {
331 Some(bm) => self.subslice_extend_each_repeated_from_bitmap(bm, start, length, repeats),
332 None => self.extend_constant(length * repeats, true),
333 }
334 }
335
336 pub unsafe fn gather_extend_from_slice(
339 &mut self,
340 slice: &[u8],
341 offset: usize,
342 length: usize,
343 idxs: &[IdxSize],
344 ) {
345 assert!(8 * slice.len() >= offset + length);
346
347 self.reserve(idxs.len());
348 unsafe {
349 for idx in idxs {
350 debug_assert!((*idx as usize) < length);
351 let idx_in_slice = offset + *idx as usize;
352 let bit = (*slice.get_unchecked(idx_in_slice / 8) >> (idx_in_slice % 8)) & 1;
353 self.push_unchecked(bit != 0);
354 }
355 }
356 }
357
358 pub fn opt_gather_extend_from_slice(
359 &mut self,
360 slice: &[u8],
361 offset: usize,
362 length: usize,
363 idxs: &[IdxSize],
364 ) {
365 assert!(8 * slice.len() >= offset + length);
366
367 self.reserve(idxs.len());
368 unsafe {
369 for idx in idxs {
370 if (*idx as usize) < length {
371 let idx_in_slice = offset + *idx as usize;
372 let bit = (*slice.get_unchecked(idx_in_slice / 8) >> (idx_in_slice % 8)) & 1;
373 self.push_unchecked(bit != 0);
374 } else {
375 self.push_unchecked(false);
376 }
377 }
378 }
379 }
380
381 pub unsafe fn gather_extend_from_bitmap(&mut self, bitmap: &Bitmap, idxs: &[IdxSize]) {
384 let (slice, offset, length) = bitmap.as_slice();
385 self.gather_extend_from_slice(slice, offset, length, idxs);
386 }
387
388 pub fn opt_gather_extend_from_bitmap(&mut self, bitmap: &Bitmap, idxs: &[IdxSize]) {
389 let (slice, offset, length) = bitmap.as_slice();
390 self.opt_gather_extend_from_slice(slice, offset, length, idxs);
391 }
392
393 pub unsafe fn gather_extend_from_opt_validity(
396 &mut self,
397 bitmap: Option<&Bitmap>,
398 idxs: &[IdxSize],
399 length: usize,
400 ) {
401 if let Some(bm) = bitmap {
402 let (slice, offset, sl_length) = bm.as_slice();
403 debug_assert_eq!(sl_length, length);
404 self.gather_extend_from_slice(slice, offset, length, idxs);
405 } else {
406 self.extend_constant(length, true);
407 }
408 }
409
410 pub fn opt_gather_extend_from_opt_validity(
411 &mut self,
412 bitmap: Option<&Bitmap>,
413 idxs: &[IdxSize],
414 length: usize,
415 ) {
416 if let Some(bm) = bitmap {
417 let (slice, offset, sl_length) = bm.as_slice();
418 debug_assert_eq!(sl_length, length);
419 self.opt_gather_extend_from_slice(slice, offset, sl_length, idxs);
420 } else {
421 unsafe {
422 self.reserve(idxs.len());
423 for idx in idxs {
424 self.push_unchecked((*idx as usize) < length);
425 }
426 }
427 }
428 }
429
430 unsafe fn finish(&mut self) {
433 if !self.bit_len.is_multiple_of(64) {
434 self.bytes.extend_from_slice(&self.buf.to_le_bytes());
435 self.set_bits_in_bytes += self.buf.count_ones() as usize;
436 self.buf = 0;
437 }
438 }
439
440 pub fn into_mut(mut self) -> MutableBitmap {
442 unsafe {
443 self.finish();
444 MutableBitmap::from_vec(self.bytes, self.bit_len)
445 }
446 }
447
448 pub fn into_opt_mut_validity(mut self) -> Option<MutableBitmap> {
450 unsafe {
451 self.finish();
452 if self.set_bits_in_bytes == self.bit_len {
453 return None;
454 }
455 Some(MutableBitmap::from_vec(self.bytes, self.bit_len))
456 }
457 }
458
459 pub fn freeze(mut self) -> Bitmap {
461 unsafe {
462 self.finish();
463 let storage = SharedStorage::from_vec(self.bytes);
464 Bitmap::from_inner_unchecked(
465 storage,
466 0,
467 self.bit_len,
468 Some(self.bit_len - self.set_bits_in_bytes),
469 )
470 }
471 }
472
473 pub fn into_opt_validity(mut self) -> Option<Bitmap> {
475 unsafe {
476 self.finish();
477 if self.set_bits_in_bytes == self.bit_len {
478 return None;
479 }
480 let storage = SharedStorage::from_vec(self.bytes);
481 let bitmap = Bitmap::from_inner_unchecked(
482 storage,
483 0,
484 self.bit_len,
485 Some(self.bit_len - self.set_bits_in_bytes),
486 );
487 Some(bitmap)
488 }
489 }
490
491 pub fn extend_trusted_len_iter<I>(&mut self, iterator: I)
492 where
493 I: Iterator<Item = bool> + TrustedLen,
494 {
495 self.reserve(iterator.size_hint().1.unwrap());
496 for b in iterator {
497 unsafe {
499 self.push_unchecked(b);
500 }
501 }
502 }
503
504 #[inline]
505 pub fn from_trusted_len_iter<I>(iterator: I) -> Self
506 where
507 I: Iterator<Item = bool> + TrustedLen,
508 {
509 let mut builder = Self::new();
510 builder.extend_trusted_len_iter(iterator);
511 builder
512 }
513}
514
515pub enum OptBitmapBuilder {
519 AllTrue { bit_len: usize, bit_cap: usize },
520 MayHaveFalse(BitmapBuilder),
521}
522
523impl Default for OptBitmapBuilder {
524 fn default() -> Self {
525 Self::AllTrue {
526 bit_len: 0,
527 bit_cap: 0,
528 }
529 }
530}
531
532impl OptBitmapBuilder {
533 pub fn reserve(&mut self, additional: usize) {
534 match self {
535 Self::AllTrue { bit_len, bit_cap } => {
536 *bit_cap = usize::max(*bit_cap, *bit_len + additional);
537 },
538 Self::MayHaveFalse(inner) => inner.reserve(additional),
539 }
540 }
541
542 pub fn extend_constant(&mut self, length: usize, value: bool) {
543 match self {
544 Self::AllTrue { bit_len, bit_cap } => {
545 if value {
546 *bit_cap = usize::max(*bit_cap, *bit_len + length);
547 *bit_len += length;
548 } else {
549 self.get_builder().extend_constant(length, value);
550 }
551 },
552 Self::MayHaveFalse(inner) => inner.extend_constant(length, value),
553 }
554 }
555
556 pub fn into_opt_validity(self) -> Option<Bitmap> {
557 match self {
558 Self::AllTrue { .. } => None,
559 Self::MayHaveFalse(inner) => inner.into_opt_validity(),
560 }
561 }
562
563 pub fn subslice_extend_from_opt_validity(
564 &mut self,
565 bitmap: Option<&Bitmap>,
566 start: usize,
567 length: usize,
568 ) {
569 match bitmap {
570 Some(bm) => {
571 self.get_builder()
572 .subslice_extend_from_bitmap(bm, start, length);
573 },
574 None => {
575 self.extend_constant(length, true);
576 },
577 }
578 }
579
580 pub fn subslice_extend_each_repeated_from_opt_validity(
581 &mut self,
582 bitmap: Option<&Bitmap>,
583 start: usize,
584 length: usize,
585 repeats: usize,
586 ) {
587 match bitmap {
588 Some(bm) => {
589 self.get_builder()
590 .subslice_extend_each_repeated_from_bitmap(bm, start, length, repeats);
591 },
592 None => {
593 self.extend_constant(length * repeats, true);
594 },
595 }
596 }
597
598 pub unsafe fn gather_extend_from_opt_validity(
601 &mut self,
602 bitmap: Option<&Bitmap>,
603 idxs: &[IdxSize],
604 ) {
605 match bitmap {
606 Some(bm) => {
607 self.get_builder().gather_extend_from_bitmap(bm, idxs);
608 },
609 None => {
610 self.extend_constant(idxs.len(), true);
611 },
612 }
613 }
614
615 pub fn opt_gather_extend_from_opt_validity(
616 &mut self,
617 bitmap: Option<&Bitmap>,
618 idxs: &[IdxSize],
619 length: usize,
620 ) {
621 match bitmap {
622 Some(bm) => {
623 self.get_builder().opt_gather_extend_from_bitmap(bm, idxs);
624 },
625 None => {
626 if let Some(first_oob) = idxs.iter().position(|idx| *idx as usize >= length) {
627 let builder = self.get_builder();
628 builder.extend_constant(first_oob, true);
629 for idx in idxs.iter().skip(first_oob) {
630 builder.push((*idx as usize) < length);
631 }
632 } else {
633 self.extend_constant(idxs.len(), true);
634 }
635 },
636 }
637 }
638
639 fn get_builder(&mut self) -> &mut BitmapBuilder {
640 match self {
641 Self::AllTrue { bit_len, bit_cap } => {
642 let mut builder = BitmapBuilder::with_capacity(*bit_cap);
643 builder.extend_constant(*bit_len, true);
644 *self = Self::MayHaveFalse(builder);
645 let Self::MayHaveFalse(inner) = self else {
646 unreachable!()
647 };
648 inner
649 },
650 Self::MayHaveFalse(inner) => inner,
651 }
652 }
653}