1#![allow(unsafe_op_in_unsafe_fn)]
2use polars_buffer::SharedStorage;
3use polars_utils::IdxSize;
4use polars_utils::slice::load_padded_le_u64;
5
6use super::bitmask::BitMask;
7use crate::bitmap::{Bitmap, MutableBitmap};
8use crate::trusted_len::TrustedLen;
9
10#[derive(Default, Clone)]
12pub struct BitmapBuilder {
13 buf: u64, bit_len: usize, bit_cap: usize, set_bits_in_bytes: usize, bytes: Vec<u8>,
18}
19
20impl BitmapBuilder {
21 pub fn new() -> Self {
22 Self::default()
23 }
24
25 #[inline(always)]
26 pub fn len(&self) -> usize {
27 self.bit_len
28 }
29
30 #[inline(always)]
31 pub fn is_empty(&self) -> bool {
32 self.bit_len == 0
33 }
34
35 #[inline(always)]
36 pub fn capacity(&self) -> usize {
37 self.bit_cap
38 }
39
40 #[inline(always)]
41 pub fn set_bits(&self) -> usize {
42 self.set_bits_in_bytes + self.buf.count_ones() as usize
43 }
44
45 #[inline(always)]
46 pub fn unset_bits(&self) -> usize {
47 self.bit_len - self.set_bits()
48 }
49
50 pub fn with_capacity(bits: usize) -> Self {
51 let bytes = Vec::with_capacity(bits.div_ceil(64) * 8);
52 let words_available = bytes.capacity() / 8;
53 Self {
54 buf: 0,
55 bit_len: 0,
56 bit_cap: words_available * 64,
57 set_bits_in_bytes: 0,
58 bytes,
59 }
60 }
61
62 #[inline(always)]
63 pub fn reserve(&mut self, additional: usize) {
64 if self.bit_len + additional > self.bit_cap {
65 self.reserve_slow(additional)
66 }
67 }
68
69 #[cold]
70 #[inline(never)]
71 fn reserve_slow(&mut self, additional: usize) {
72 let bytes_needed = (self.bit_len + additional).div_ceil(64) * 8;
73 self.bytes.reserve(bytes_needed - self.bytes.len());
74 let words_available = self.bytes.capacity() / 8;
75 self.bit_cap = words_available * 64;
76 }
77
78 pub fn clear(&mut self) {
79 self.buf = 0;
80 self.bit_len = 0;
81 self.set_bits_in_bytes = 0;
82 self.bytes.clear();
83 }
84
85 #[inline(always)]
86 pub fn push(&mut self, x: bool) {
87 self.reserve(1);
88 unsafe { self.push_unchecked(x) }
89 }
90
91 #[inline(always)]
95 unsafe fn flush_word_unchecked(&mut self, w: u64) {
96 let cur_len = self.bytes.len();
97 let p = self.bytes.as_mut_ptr().add(cur_len).cast::<u64>();
98 p.write_unaligned(w.to_le());
99 self.bytes.set_len(cur_len + 8);
100 }
101
102 #[inline(always)]
105 pub unsafe fn push_unchecked(&mut self, x: bool) {
106 debug_assert!(self.bit_len < self.bit_cap);
107 self.buf |= (x as u64) << (self.bit_len % 64);
108 self.bit_len += 1;
109 if self.bit_len.is_multiple_of(64) {
110 self.flush_word_unchecked(self.buf);
111 self.set_bits_in_bytes += self.buf.count_ones() as usize;
112 self.buf = 0;
113 }
114 }
115
116 #[inline(always)]
117 pub fn extend_constant(&mut self, length: usize, value: bool) {
118 let bits_in_buf = self.bit_len % 64;
120 if bits_in_buf + length < 64 {
121 let bit_block = ((value as u64) << length) - (value as u64);
122 self.buf |= bit_block << bits_in_buf;
123 self.bit_len += length;
124 } else {
125 self.extend_constant_slow(length, value);
126 }
127 }
128
129 #[cold]
130 fn extend_constant_slow(&mut self, length: usize, value: bool) {
131 unsafe {
132 let value_spread = if value { u64::MAX } else { 0 }; self.reserve(length);
136 let bits_in_buf = self.bit_len % 64;
137 let ext_buf = self.buf | (value_spread << bits_in_buf);
138 self.flush_word_unchecked(ext_buf);
139 self.set_bits_in_bytes += ext_buf.count_ones() as usize;
140
141 let remaining_bits = length - (64 - bits_in_buf);
143 let remaining_words = remaining_bits / 64;
144 for _ in 0..remaining_words {
145 self.flush_word_unchecked(value_spread);
146 }
147 self.set_bits_in_bytes += (remaining_words * 64) & value_spread as usize;
148
149 self.buf = ((value as u64) << (remaining_bits % 64)) - (value as u64);
151 self.bit_len += length;
152 }
153 }
154
155 pub unsafe fn push_word_with_len_unchecked(&mut self, word: u64, length: usize) {
160 debug_assert!(self.bit_len + length <= self.bit_cap);
161 debug_assert!(length <= 64);
162 debug_assert!(length == 64 || (word >> length) == 0);
163 let bits_in_buf = self.bit_len % 64;
164 self.buf |= word << bits_in_buf;
165 if bits_in_buf + length >= 64 {
166 self.flush_word_unchecked(self.buf);
167 self.set_bits_in_bytes += self.buf.count_ones() as usize;
168 self.buf = if bits_in_buf > 0 {
169 word >> (64 - bits_in_buf)
170 } else {
171 0
172 };
173 }
174 self.bit_len += length;
175 }
176
177 unsafe fn extend_from_slice_unchecked(
181 &mut self,
182 mut slice: &[u8],
183 mut offset: usize,
184 mut length: usize,
185 ) {
186 if length == 0 {
187 return;
188 }
189
190 let slice_bit_offset = offset % 8;
192 if slice_bit_offset > 0 {
193 let bits_in_first_byte = (8 - slice_bit_offset).min(length);
194 let first_byte = *slice.get_unchecked(offset / 8) >> slice_bit_offset;
195 self.push_word_with_len_unchecked(
196 first_byte as u64 & ((1 << bits_in_first_byte) - 1),
197 bits_in_first_byte,
198 );
199 length -= bits_in_first_byte;
200 offset += bits_in_first_byte;
201 }
202 slice = slice.get_unchecked(offset / 8..);
203
204 let bits_in_buf = self.bit_len % 64;
206 if bits_in_buf > 0 {
207 while length >= 64 {
208 let word = u64::from_le_bytes(slice.get_unchecked(0..8).try_into().unwrap());
209 self.buf |= word << bits_in_buf;
210 self.flush_word_unchecked(self.buf);
211 self.set_bits_in_bytes += self.buf.count_ones() as usize;
212 self.buf = word >> (64 - bits_in_buf);
213 self.bit_len += 64;
214 length -= 64;
215 slice = slice.get_unchecked(8..);
216 }
217 } else {
218 while length >= 64 {
219 let word = u64::from_le_bytes(slice.get_unchecked(0..8).try_into().unwrap());
220 self.flush_word_unchecked(word);
221 self.set_bits_in_bytes += word.count_ones() as usize;
222 self.bit_len += 64;
223 length -= 64;
224 slice = slice.get_unchecked(8..);
225 }
226 }
227
228 if length > 0 {
230 let word = load_padded_le_u64(slice);
231 self.push_word_with_len_unchecked(word & ((1 << length) - 1), length);
232 }
233 }
234
235 unsafe fn extend_each_repeated_from_slice_unchecked(
239 &mut self,
240 slice: &[u8],
241 offset: usize,
242 length: usize,
243 repeats: usize,
244 ) {
245 debug_assert!(8 * slice.len() >= offset + length);
246 if repeats == 0 {
247 return;
248 }
249 if repeats == 1 {
250 return self.extend_from_slice_unchecked(slice, offset, length);
251 }
252 for bit_idx in offset..(offset + length) {
253 let bit = (*slice.get_unchecked(bit_idx / 8) >> (bit_idx % 8)) & 1 != 0;
254 self.extend_constant(repeats, bit);
255 }
256 }
257
258 pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) {
259 assert!(8 * slice.len() >= offset + length);
260 self.reserve(length);
261 unsafe {
262 self.extend_from_slice_unchecked(slice, offset, length);
263 }
264 }
265
266 pub fn extend_each_repeated_from_slice(
267 &mut self,
268 slice: &[u8],
269 offset: usize,
270 length: usize,
271 repeats: usize,
272 ) {
273 assert!(8 * slice.len() >= offset + length);
274 self.reserve(length * repeats);
275 unsafe {
276 self.extend_each_repeated_from_slice_unchecked(slice, offset, length, repeats);
277 }
278 }
279
280 pub fn extend_from_bitmap(&mut self, bitmap: &Bitmap) {
281 let (slice, offset, length) = bitmap.as_slice();
284 self.extend_from_slice(slice, offset, length);
285 }
286
287 pub fn extend_from_bitmask(&mut self, bitmap: BitMask<'_>) {
288 let (slice, offset, length) = bitmap.inner();
289 self.extend_from_slice(slice, offset, length);
290 }
291
292 pub fn subslice_extend_from_bitmap(&mut self, bitmap: &Bitmap, start: usize, length: usize) {
294 let (slice, bm_offset, bm_length) = bitmap.as_slice();
295 assert!(start + length <= bm_length);
296 self.extend_from_slice(slice, bm_offset + start, length);
297 }
298
299 pub fn subslice_extend_each_repeated_from_bitmap(
301 &mut self,
302 bitmap: &Bitmap,
303 start: usize,
304 length: usize,
305 repeats: usize,
306 ) {
307 let (slice, bm_offset, bm_length) = bitmap.as_slice();
308 assert!(start + length <= bm_length);
309 self.extend_each_repeated_from_slice(slice, bm_offset + start, length, repeats);
310 }
311
312 pub fn subslice_extend_from_opt_validity(
313 &mut self,
314 bitmap: Option<&Bitmap>,
315 start: usize,
316 length: usize,
317 ) {
318 match bitmap {
319 Some(bm) => self.subslice_extend_from_bitmap(bm, start, length),
320 None => self.extend_constant(length, true),
321 }
322 }
323
324 pub fn subslice_extend_each_repeated_from_opt_validity(
325 &mut self,
326 bitmap: Option<&Bitmap>,
327 start: usize,
328 length: usize,
329 repeats: usize,
330 ) {
331 match bitmap {
332 Some(bm) => self.subslice_extend_each_repeated_from_bitmap(bm, start, length, repeats),
333 None => self.extend_constant(length * repeats, true),
334 }
335 }
336
337 pub unsafe fn gather_extend_from_slice(
340 &mut self,
341 slice: &[u8],
342 offset: usize,
343 length: usize,
344 idxs: &[IdxSize],
345 ) {
346 assert!(8 * slice.len() >= offset + length);
347
348 self.reserve(idxs.len());
349 unsafe {
350 for idx in idxs {
351 debug_assert!((*idx as usize) < length);
352 let idx_in_slice = offset + *idx as usize;
353 let bit = (*slice.get_unchecked(idx_in_slice / 8) >> (idx_in_slice % 8)) & 1;
354 self.push_unchecked(bit != 0);
355 }
356 }
357 }
358
359 pub fn opt_gather_extend_from_slice(
360 &mut self,
361 slice: &[u8],
362 offset: usize,
363 length: usize,
364 idxs: &[IdxSize],
365 ) {
366 assert!(8 * slice.len() >= offset + length);
367
368 self.reserve(idxs.len());
369 unsafe {
370 for idx in idxs {
371 if (*idx as usize) < length {
372 let idx_in_slice = offset + *idx as usize;
373 let bit = (*slice.get_unchecked(idx_in_slice / 8) >> (idx_in_slice % 8)) & 1;
374 self.push_unchecked(bit != 0);
375 } else {
376 self.push_unchecked(false);
377 }
378 }
379 }
380 }
381
382 pub unsafe fn gather_extend_from_bitmap(&mut self, bitmap: &Bitmap, idxs: &[IdxSize]) {
385 let (slice, offset, length) = bitmap.as_slice();
386 self.gather_extend_from_slice(slice, offset, length, idxs);
387 }
388
389 pub fn opt_gather_extend_from_bitmap(&mut self, bitmap: &Bitmap, idxs: &[IdxSize]) {
390 let (slice, offset, length) = bitmap.as_slice();
391 self.opt_gather_extend_from_slice(slice, offset, length, idxs);
392 }
393
394 pub unsafe fn gather_extend_from_opt_validity(
397 &mut self,
398 bitmap: Option<&Bitmap>,
399 idxs: &[IdxSize],
400 length: usize,
401 ) {
402 if let Some(bm) = bitmap {
403 let (slice, offset, sl_length) = bm.as_slice();
404 debug_assert_eq!(sl_length, length);
405 self.gather_extend_from_slice(slice, offset, length, idxs);
406 } else {
407 self.extend_constant(length, true);
408 }
409 }
410
411 pub fn opt_gather_extend_from_opt_validity(
412 &mut self,
413 bitmap: Option<&Bitmap>,
414 idxs: &[IdxSize],
415 length: usize,
416 ) {
417 if let Some(bm) = bitmap {
418 let (slice, offset, sl_length) = bm.as_slice();
419 debug_assert_eq!(sl_length, length);
420 self.opt_gather_extend_from_slice(slice, offset, sl_length, idxs);
421 } else {
422 unsafe {
423 self.reserve(idxs.len());
424 for idx in idxs {
425 self.push_unchecked((*idx as usize) < length);
426 }
427 }
428 }
429 }
430
431 unsafe fn finish(&mut self) {
434 if !self.bit_len.is_multiple_of(64) {
435 self.bytes.extend_from_slice(&self.buf.to_le_bytes());
436 self.set_bits_in_bytes += self.buf.count_ones() as usize;
437 self.buf = 0;
438 }
439 }
440
441 pub fn into_mut(mut self) -> MutableBitmap {
443 unsafe {
444 self.finish();
445 MutableBitmap::from_vec(self.bytes, self.bit_len)
446 }
447 }
448
449 pub fn into_opt_mut_validity(mut self) -> Option<MutableBitmap> {
451 unsafe {
452 self.finish();
453 if self.set_bits_in_bytes == self.bit_len {
454 return None;
455 }
456 Some(MutableBitmap::from_vec(self.bytes, self.bit_len))
457 }
458 }
459
460 pub fn freeze(mut self) -> Bitmap {
462 unsafe {
463 self.finish();
464 let storage = SharedStorage::from_vec(self.bytes);
465 Bitmap::from_inner_unchecked(
466 storage,
467 0,
468 self.bit_len,
469 Some(self.bit_len - self.set_bits_in_bytes),
470 )
471 }
472 }
473
474 pub fn into_opt_validity(mut self) -> Option<Bitmap> {
476 unsafe {
477 self.finish();
478 if self.set_bits_in_bytes == self.bit_len {
479 return None;
480 }
481 let storage = SharedStorage::from_vec(self.bytes);
482 let bitmap = Bitmap::from_inner_unchecked(
483 storage,
484 0,
485 self.bit_len,
486 Some(self.bit_len - self.set_bits_in_bytes),
487 );
488 Some(bitmap)
489 }
490 }
491
492 pub fn extend_trusted_len_iter<I>(&mut self, iterator: I)
493 where
494 I: Iterator<Item = bool> + TrustedLen,
495 {
496 self.reserve(iterator.size_hint().1.unwrap());
497 for b in iterator {
498 unsafe {
500 self.push_unchecked(b);
501 }
502 }
503 }
504
505 #[inline]
506 pub fn from_trusted_len_iter<I>(iterator: I) -> Self
507 where
508 I: Iterator<Item = bool> + TrustedLen,
509 {
510 let mut builder = Self::new();
511 builder.extend_trusted_len_iter(iterator);
512 builder
513 }
514}
515
516pub enum OptBitmapBuilder {
520 AllTrue { bit_len: usize, bit_cap: usize },
521 MayHaveFalse(BitmapBuilder),
522}
523
524impl Default for OptBitmapBuilder {
525 fn default() -> Self {
526 Self::AllTrue {
527 bit_len: 0,
528 bit_cap: 0,
529 }
530 }
531}
532
533impl OptBitmapBuilder {
534 pub fn reserve(&mut self, additional: usize) {
535 match self {
536 Self::AllTrue { bit_len, bit_cap } => {
537 *bit_cap = usize::max(*bit_cap, *bit_len + additional);
538 },
539 Self::MayHaveFalse(inner) => inner.reserve(additional),
540 }
541 }
542
543 pub fn extend_constant(&mut self, length: usize, value: bool) {
544 match self {
545 Self::AllTrue { bit_len, bit_cap } => {
546 if value {
547 *bit_cap = usize::max(*bit_cap, *bit_len + length);
548 *bit_len += length;
549 } else {
550 self.get_builder().extend_constant(length, value);
551 }
552 },
553 Self::MayHaveFalse(inner) => inner.extend_constant(length, value),
554 }
555 }
556
557 pub fn into_opt_validity(self) -> Option<Bitmap> {
558 match self {
559 Self::AllTrue { .. } => None,
560 Self::MayHaveFalse(inner) => inner.into_opt_validity(),
561 }
562 }
563
564 pub fn subslice_extend_from_opt_validity(
565 &mut self,
566 bitmap: Option<&Bitmap>,
567 start: usize,
568 length: usize,
569 ) {
570 match bitmap {
571 Some(bm) => {
572 self.get_builder()
573 .subslice_extend_from_bitmap(bm, start, length);
574 },
575 None => {
576 self.extend_constant(length, true);
577 },
578 }
579 }
580
581 pub fn subslice_extend_each_repeated_from_opt_validity(
582 &mut self,
583 bitmap: Option<&Bitmap>,
584 start: usize,
585 length: usize,
586 repeats: usize,
587 ) {
588 match bitmap {
589 Some(bm) => {
590 self.get_builder()
591 .subslice_extend_each_repeated_from_bitmap(bm, start, length, repeats);
592 },
593 None => {
594 self.extend_constant(length * repeats, true);
595 },
596 }
597 }
598
599 pub unsafe fn gather_extend_from_opt_validity(
602 &mut self,
603 bitmap: Option<&Bitmap>,
604 idxs: &[IdxSize],
605 ) {
606 match bitmap {
607 Some(bm) => {
608 self.get_builder().gather_extend_from_bitmap(bm, idxs);
609 },
610 None => {
611 self.extend_constant(idxs.len(), true);
612 },
613 }
614 }
615
616 pub fn opt_gather_extend_from_opt_validity(
617 &mut self,
618 bitmap: Option<&Bitmap>,
619 idxs: &[IdxSize],
620 length: usize,
621 ) {
622 match bitmap {
623 Some(bm) => {
624 self.get_builder().opt_gather_extend_from_bitmap(bm, idxs);
625 },
626 None => {
627 if let Some(first_oob) = idxs.iter().position(|idx| *idx as usize >= length) {
628 let builder = self.get_builder();
629 builder.extend_constant(first_oob, true);
630 for idx in idxs.iter().skip(first_oob) {
631 builder.push((*idx as usize) < length);
632 }
633 } else {
634 self.extend_constant(idxs.len(), true);
635 }
636 },
637 }
638 }
639
640 fn get_builder(&mut self) -> &mut BitmapBuilder {
641 match self {
642 Self::AllTrue { bit_len, bit_cap } => {
643 let mut builder = BitmapBuilder::with_capacity(*bit_cap);
644 builder.extend_constant(*bit_len, true);
645 *self = Self::MayHaveFalse(builder);
646 let Self::MayHaveFalse(inner) = self else {
647 unreachable!()
648 };
649 inner
650 },
651 Self::MayHaveFalse(inner) => inner,
652 }
653 }
654}