1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors
use vortex_buffer::BitBuffer;
use vortex_buffer::BitBufferMut;
use vortex_error::VortexExpect;
use vortex_error::vortex_panic;
use vortex_mask::Mask;
use crate::dtype::Nullability;
use crate::dtype::Nullability::NonNullable;
use crate::dtype::Nullability::Nullable;
use crate::validity::Validity;
/// This is borrowed from arrow's null buffer builder, however we expose a `append_buffer`
/// method to append a boolean buffer directly.
pub struct LazyBitBufferBuilder {
inner: Option<BitBufferMut>,
len: usize,
capacity: usize,
}
impl LazyBitBufferBuilder {
/// Creates a new empty builder.
/// `capacity` is the number of bits in the null buffer.
pub fn new(capacity: usize) -> Self {
Self {
inner: None,
len: 0,
capacity,
}
}
/// Creates a builder pre-populated from a validity mask, taking ownership of the mask's buffer
/// instead of copying it where possible.
///
/// This is the counterpart to [`append_validity_mask`](Self::append_validity_mask) for callers
/// that want to *replace* the builder's contents with the mask rather than extend them: because
/// we own the mask, we can move its buffer in instead of copying it.
pub fn from_validity_mask(validity_mask: Mask) -> Self {
match validity_mask {
// An unmaterialized builder already represents `len` non-null values, so an all-valid
// mask stays lazy.
Mask::AllTrue(len) => Self {
inner: None,
len,
capacity: len,
},
Mask::AllFalse(len) => Self::from_buffer(BitBufferMut::new_unset(len)),
// Take ownership of the underlying buffer; `into_bit_buffer` and `try_into_mut` only
// copy when the buffer is shared, otherwise this is a move.
values @ Mask::Values(_) => Self::from_buffer(
values
.into_bit_buffer()
.try_into_mut()
.unwrap_or_else(|buffer| BitBufferMut::copy_from(&buffer)),
),
}
}
/// Creates a builder backed by an already-materialized buffer.
fn from_buffer(inner: BitBufferMut) -> Self {
Self {
inner: Some(inner),
len: 0,
capacity: 0,
}
}
/// Appends `n` non-null values to the builder.
#[inline]
pub fn append_n_non_nulls(&mut self, n: usize) {
if let Some(buf) = self.inner.as_mut() {
buf.append_n(true, n)
} else {
self.len += n;
}
}
/// Appends a single non-null value to the builder.
#[inline]
pub fn append_non_null(&mut self) {
if let Some(buf) = self.inner.as_mut() {
buf.append(true)
} else {
self.len += 1;
}
}
/// Appends `n` null values to the builder.
#[inline]
pub fn append_n_nulls(&mut self, n: usize) {
self.materialize_if_needed();
self.inner
.as_mut()
.vortex_expect("cannot append null to non-nullable builder")
.append_n(false, n);
}
/// Appends a single null value to the builder.
#[inline]
pub fn append_null(&mut self) {
self.materialize_if_needed();
self.inner
.as_mut()
.vortex_expect("cannot append null to non-nullable builder")
.append(false);
}
/// Appends values from a boolean buffer where `true` indicates non-null.
#[inline]
pub fn append_buffer(&mut self, bool_buffer: &BitBuffer) {
self.materialize_if_needed();
self.inner
.as_mut()
.vortex_expect("buffer just materialized")
.append_buffer(bool_buffer);
}
/// Appends values from a validity mask.
///
/// Takes the mask by reference: the `Mask::Values` case copies the underlying buffer into the
/// running null buffer, so there is nothing to gain from owning the mask.
pub fn append_validity_mask(&mut self, validity_mask: &Mask) {
match validity_mask {
Mask::AllTrue(len) => self.append_n_non_nulls(*len),
Mask::AllFalse(len) => self.append_n_nulls(*len),
Mask::Values(is_valid) => self.append_buffer(is_valid.bit_buffer()),
}
}
/// Sets the validity bit at the given index.
pub fn set_bit(&mut self, index: usize, v: bool) {
self.materialize_if_needed();
self.inner
.as_mut()
.vortex_expect("buffer just materialized")
.set_to(index, v);
}
/// Returns the current length of the builder.
pub fn len(&self) -> usize {
// self.len is the length of the builder if the inner buffer is not materialized
self.inner.as_ref().map(|i| i.len()).unwrap_or(self.len)
}
fn finish(&mut self) -> Option<BitBuffer> {
self.len = 0;
self.inner.take().map(|b| b.freeze())
}
/// Finishes the builder and returns a `Validity` based on the given nullability.
pub fn finish_with_nullability(&mut self, nullability: Nullability) -> Validity {
let nulls = self.finish();
match (nullability, nulls) {
(NonNullable, None) => Validity::NonNullable,
(Nullable, None) => Validity::AllValid,
(Nullable, Some(arr)) => Validity::from(arr),
_ => vortex_panic!("Invalid nullability/nulls combination"),
}
}
/// Ensures the builder can hold `additional` extra values.
pub fn reserve_exact(&mut self, additional: usize) {
if self.inner.is_none() {
self.capacity += additional;
} else {
self.inner
.as_mut()
.vortex_expect("buffer just materialized")
.reserve(additional);
}
}
fn materialize_if_needed(&mut self) {
if self.inner.is_none() {
self.materialize()
}
}
// This only happens once per builder
#[cold]
#[inline(never)]
fn materialize(&mut self) {
if self.inner.is_none() {
let mut bit_mut = BitBufferMut::with_capacity(self.len.max(self.capacity));
bit_mut.append_n(true, self.len);
self.inner = Some(bit_mut);
}
}
}