arrow_buffer/builder/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Buffer builders
19
20mod boolean;
21mod null;
22mod offset;
23
24pub use boolean::*;
25pub use null::*;
26pub use offset::*;
27
28use crate::{ArrowNativeType, Buffer, MutableBuffer};
29use std::marker::PhantomData;
30
31/// Builder for creating Arrow [`Buffer`] objects
32///
33/// A [`Buffer`] is the underlying data structure of Arrow's Arrays.
34///
35/// For all supported types, there are type definitions for the
36/// generic version of `BufferBuilder<T>`, e.g. `BufferBuilder`.
37///
38/// **Note it is typically faster to create buffers directly from `Vec`**.
39/// See example on [`Buffer`].
40///
41/// # See Also
42/// * [`BooleanBufferBuilder`]: for packing bits in [`BooleanBuffer`]s
43/// * [`NullBufferBuilder`]: for creating [`NullBuffer`]s of null values
44///
45/// [`BooleanBuffer`]: crate::BooleanBuffer
46/// [`NullBuffer`]: crate::NullBuffer
47///
48/// # Example:
49///
50/// ```
51/// # use arrow_buffer::builder::BufferBuilder;
52/// let mut builder = BufferBuilder::<u8>::new(100);
53/// builder.append_slice(&[42, 43, 44]);
54/// builder.append(45);
55/// let buffer = builder.finish();
56/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
57/// ```
58#[derive(Debug)]
59pub struct BufferBuilder<T: ArrowNativeType> {
60 buffer: MutableBuffer,
61 _marker: PhantomData<T>,
62}
63
64impl<T: ArrowNativeType> BufferBuilder<T> {
65 /// Creates a new builder with initial capacity for _at least_ `capacity`
66 /// elements of type `T`.
67 ///
68 /// The capacity can later be manually adjusted with the
69 /// [`reserve()`](BufferBuilder::reserve) method.
70 /// Also the
71 /// [`append()`](BufferBuilder::append),
72 /// [`append_slice()`](BufferBuilder::append_slice) and
73 /// [`advance()`](BufferBuilder::advance)
74 /// methods automatically increase the capacity if needed.
75 ///
76 /// # Example:
77 ///
78 /// ```
79 /// # use arrow_buffer::builder::BufferBuilder;
80 /// let mut builder = BufferBuilder::<u8>::new(10);
81 ///
82 /// assert!(builder.capacity() >= 10);
83 /// ```
84 #[inline]
85 pub fn new(capacity: usize) -> Self {
86 let buffer = MutableBuffer::new(capacity * std::mem::size_of::<T>());
87
88 Self {
89 buffer,
90 _marker: PhantomData,
91 }
92 }
93
94 /// Creates a new builder from a [`MutableBuffer`]
95 ///
96 /// # Safety
97 ///
98 /// - `buffer` bytes must be aligned to type `T`
99 pub unsafe fn new_from_buffer(buffer: MutableBuffer) -> Self {
100 Self {
101 buffer,
102 _marker: PhantomData,
103 }
104 }
105
106 /// Returns the current number of array elements in the internal buffer.
107 ///
108 /// # Example:
109 ///
110 /// ```
111 /// # use arrow_buffer::builder::BufferBuilder;
112 /// let mut builder = BufferBuilder::<u8>::new(10);
113 /// builder.append(42);
114 ///
115 /// assert_eq!(builder.len(), 1);
116 /// ```
117 pub fn len(&self) -> usize {
118 self.buffer.len() / std::mem::size_of::<T>()
119 }
120
121 /// Returns whether the internal buffer is empty.
122 ///
123 /// # Example:
124 ///
125 /// ```
126 /// # use arrow_buffer::builder::BufferBuilder;
127 /// let mut builder = BufferBuilder::<u8>::new(10);
128 /// builder.append(42);
129 ///
130 /// assert_eq!(builder.is_empty(), false);
131 /// ```
132 pub fn is_empty(&self) -> bool {
133 self.buffer.is_empty()
134 }
135
136 /// Returns the actual capacity (number of elements) of the internal buffer.
137 ///
138 /// Note: the internal capacity returned by this method might be larger than
139 /// what you'd expect after setting the capacity in the `new()` or `reserve()`
140 /// functions.
141 pub fn capacity(&self) -> usize {
142 let byte_capacity = self.buffer.capacity();
143 byte_capacity / std::mem::size_of::<T>()
144 }
145
146 /// Increases the number of elements in the internal buffer by `n`
147 /// and resizes the buffer as needed.
148 ///
149 /// The values of the newly added elements are 0.
150 /// This method is usually used when appending `NULL` values to the buffer
151 /// as they still require physical memory space.
152 ///
153 /// # Example:
154 ///
155 /// ```
156 /// # use arrow_buffer::builder::BufferBuilder;
157 /// let mut builder = BufferBuilder::<u8>::new(10);
158 /// builder.advance(2);
159 ///
160 /// assert_eq!(builder.len(), 2);
161 /// ```
162 #[inline]
163 pub fn advance(&mut self, i: usize) {
164 self.buffer.extend_zeros(i * std::mem::size_of::<T>());
165 }
166
167 /// Reserves memory for _at least_ `n` more elements of type `T`.
168 ///
169 /// # Example:
170 ///
171 /// ```
172 /// # use arrow_buffer::builder::BufferBuilder;
173 /// let mut builder = BufferBuilder::<u8>::new(10);
174 /// builder.reserve(10);
175 ///
176 /// assert!(builder.capacity() >= 20);
177 /// ```
178 #[inline]
179 pub fn reserve(&mut self, n: usize) {
180 self.buffer.reserve(n * std::mem::size_of::<T>());
181 }
182
183 /// Appends a value of type `T` into the builder,
184 /// growing the internal buffer as needed.
185 ///
186 /// # Example:
187 ///
188 /// ```
189 /// # use arrow_buffer::builder::BufferBuilder;
190 /// let mut builder = BufferBuilder::<u8>::new(10);
191 /// builder.append(42);
192 ///
193 /// assert_eq!(builder.len(), 1);
194 /// ```
195 #[inline]
196 pub fn append(&mut self, v: T) {
197 self.reserve(1);
198 self.buffer.push(v);
199 }
200
201 /// Appends a value of type `T` into the builder N times,
202 /// growing the internal buffer as needed.
203 ///
204 /// # Example:
205 ///
206 /// ```
207 /// # use arrow_buffer::builder::BufferBuilder;
208 /// let mut builder = BufferBuilder::<u8>::new(10);
209 /// builder.append_n(10, 42);
210 ///
211 /// assert_eq!(builder.len(), 10);
212 /// ```
213 #[inline]
214 pub fn append_n(&mut self, n: usize, v: T) {
215 self.reserve(n);
216 self.extend(std::iter::repeat_n(v, n))
217 }
218
219 /// Appends `n`, zero-initialized values
220 ///
221 /// # Example:
222 ///
223 /// ```
224 /// # use arrow_buffer::builder::BufferBuilder;
225 /// let mut builder = BufferBuilder::<u32>::new(10);
226 /// builder.append_n_zeroed(3);
227 ///
228 /// assert_eq!(builder.len(), 3);
229 /// assert_eq!(builder.as_slice(), &[0, 0, 0])
230 /// ```
231 #[inline]
232 pub fn append_n_zeroed(&mut self, n: usize) {
233 self.buffer.extend_zeros(n * std::mem::size_of::<T>());
234 }
235
236 /// Appends a slice of type `T`, growing the internal buffer as needed.
237 ///
238 /// # Example:
239 ///
240 /// ```
241 /// # use arrow_buffer::builder::BufferBuilder;
242 /// let mut builder = BufferBuilder::<u8>::new(10);
243 /// builder.append_slice(&[42, 44, 46]);
244 ///
245 /// assert_eq!(builder.len(), 3);
246 /// ```
247 #[inline]
248 pub fn append_slice(&mut self, slice: &[T]) {
249 self.buffer.extend_from_slice(slice);
250 }
251
252 /// View the contents of this buffer as a slice
253 ///
254 /// ```
255 /// # use arrow_buffer::builder::BufferBuilder;
256 /// let mut builder = BufferBuilder::<f64>::new(10);
257 /// builder.append(1.3);
258 /// builder.append_n(2, 2.3);
259 ///
260 /// assert_eq!(builder.as_slice(), &[1.3, 2.3, 2.3]);
261 /// ```
262 #[inline]
263 pub fn as_slice(&self) -> &[T] {
264 // SAFETY
265 //
266 // - MutableBuffer is aligned and initialized for len elements of T
267 // - MutableBuffer corresponds to a single allocation
268 // - MutableBuffer does not support modification whilst active immutable borrows
269 unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len()) }
270 }
271
272 /// View the contents of this buffer as a mutable slice
273 ///
274 /// # Example:
275 ///
276 /// ```
277 /// # use arrow_buffer::builder::BufferBuilder;
278 /// let mut builder = BufferBuilder::<f32>::new(10);
279 ///
280 /// builder.append_slice(&[1., 2., 3.4]);
281 /// assert_eq!(builder.as_slice(), &[1., 2., 3.4]);
282 ///
283 /// builder.as_slice_mut()[1] = 4.2;
284 /// assert_eq!(builder.as_slice(), &[1., 4.2, 3.4]);
285 /// ```
286 #[inline]
287 pub fn as_slice_mut(&mut self) -> &mut [T] {
288 // SAFETY
289 //
290 // - MutableBuffer is aligned and initialized for len elements of T
291 // - MutableBuffer corresponds to a single allocation
292 // - MutableBuffer does not support modification whilst active immutable borrows
293 unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len()) }
294 }
295
296 /// Shorten this BufferBuilder to `len` items
297 ///
298 /// If `len` is greater than the builder's current length, this has no effect
299 ///
300 /// # Example:
301 ///
302 /// ```
303 /// # use arrow_buffer::builder::BufferBuilder;
304 /// let mut builder = BufferBuilder::<u16>::new(10);
305 ///
306 /// builder.append_slice(&[42, 44, 46]);
307 /// assert_eq!(builder.as_slice(), &[42, 44, 46]);
308 ///
309 /// builder.truncate(2);
310 /// assert_eq!(builder.as_slice(), &[42, 44]);
311 ///
312 /// builder.append(12);
313 /// assert_eq!(builder.as_slice(), &[42, 44, 12]);
314 /// ```
315 #[inline]
316 pub fn truncate(&mut self, len: usize) {
317 self.buffer.truncate(len * std::mem::size_of::<T>());
318 }
319
320 /// # Safety
321 /// This requires the iterator be a trusted length. This could instead require
322 /// the iterator implement `TrustedLen` once that is stabilized.
323 #[inline]
324 pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T>) {
325 let iter = iter.into_iter();
326 let len = iter
327 .size_hint()
328 .1
329 .expect("append_trusted_len_iter expects upper bound");
330 self.reserve(len);
331 self.extend(iter);
332 }
333
334 /// Resets this builder and returns an immutable [Buffer].
335 ///
336 /// Use [`Self::build`] when you don't need to reuse this builder.
337 ///
338 /// # Example:
339 ///
340 /// ```
341 /// # use arrow_buffer::builder::BufferBuilder;
342 /// let mut builder = BufferBuilder::<u8>::new(10);
343 /// builder.append_slice(&[42, 44, 46]);
344 /// let buffer = builder.finish();
345 /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
346 /// ```
347 #[inline]
348 pub fn finish(&mut self) -> Buffer {
349 let buf = std::mem::take(&mut self.buffer);
350 buf.into()
351 }
352
353 /// Builds an immutable [Buffer] without resetting the builder.
354 ///
355 /// This consumes the builder. Use [`Self::finish`] to reuse it.
356 ///
357 /// # Example:
358 ///
359 /// ```
360 /// # use arrow_buffer::builder::BufferBuilder;
361 /// let mut builder = BufferBuilder::<u8>::new(10);
362 /// builder.append_slice(&[42, 44, 46]);
363 /// let buffer = builder.build();
364 /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
365 /// ```
366 #[inline]
367 pub fn build(self) -> Buffer {
368 self.buffer.into()
369 }
370}
371
372impl<T: ArrowNativeType> Default for BufferBuilder<T> {
373 fn default() -> Self {
374 Self::new(0)
375 }
376}
377
378impl<T: ArrowNativeType> Extend<T> for BufferBuilder<T> {
379 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
380 self.buffer.extend(iter)
381 }
382}
383
384impl<T: ArrowNativeType> From<Vec<T>> for BufferBuilder<T> {
385 fn from(value: Vec<T>) -> Self {
386 let buffer = MutableBuffer::from(value);
387 // SAFETY
388 // - buffer is aligned to T
389 unsafe { Self::new_from_buffer(buffer) }
390 }
391}
392
393impl<T: ArrowNativeType> FromIterator<T> for BufferBuilder<T> {
394 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
395 let mut builder = Self::default();
396 builder.extend(iter);
397 builder
398 }
399}
400
401#[cfg(test)]
402mod tests {
403 use super::*;
404 use std::mem;
405
406 #[test]
407 fn default() {
408 let builder = BufferBuilder::<u32>::default();
409 assert!(builder.is_empty());
410 assert!(builder.buffer.is_empty());
411 assert_eq!(builder.buffer.capacity(), 0);
412 }
413
414 #[test]
415 fn from_iter() {
416 let input = [1u16, 2, 3, 4];
417 let builder = input.into_iter().collect::<BufferBuilder<_>>();
418 assert_eq!(builder.len(), 4);
419 assert_eq!(builder.buffer.len(), 4 * mem::size_of::<u16>());
420 }
421
422 #[test]
423 fn extend() {
424 let input = [1, 2];
425 let mut builder = input.into_iter().collect::<BufferBuilder<_>>();
426 assert_eq!(builder.len(), 2);
427 builder.extend([3, 4]);
428 assert_eq!(builder.len(), 4);
429 }
430
431 #[test]
432 fn truncate_safety() {
433 let mut builder = BufferBuilder::from(vec![40, -63, 90]);
434 assert_eq!(builder.len(), 3);
435 builder.truncate(151);
436 assert_eq!(builder.len(), 3);
437 builder.advance(219);
438 assert_eq!(builder.len(), 222);
439 let slice = builder.as_slice_mut();
440 assert_eq!(slice.len(), 222);
441 }
442
443 #[test]
444 #[should_panic(expected = "buffer length overflow")]
445 fn reserve_length_overflow() {
446 let mut builder = BufferBuilder::<u8>::new(1);
447 builder.append(0);
448 builder.reserve(usize::MAX);
449 }
450
451 #[test]
452 #[should_panic(expected = "buffer length overflow")]
453 fn append_n_zeroed_length_overflow() {
454 let mut builder = BufferBuilder::<u64>::new(1);
455 builder.append_n_zeroed(1);
456 builder.append_n_zeroed(usize::MAX / mem::size_of::<u64>());
457 }
458
459 #[test]
460 #[should_panic(expected = "buffer length overflow")]
461 fn advance_length_overflow() {
462 let mut builder = BufferBuilder::<u64>::new(1);
463 builder.advance(1);
464 builder.advance(usize::MAX / mem::size_of::<u64>());
465 }
466}