Skip to main content

arrow_buffer/builder/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Buffer builders
19
20mod boolean;
21mod null;
22mod offset;
23
24pub use boolean::*;
25pub use null::*;
26pub use offset::*;
27
28use crate::{ArrowNativeType, Buffer, MutableBuffer};
29use std::marker::PhantomData;
30
31/// Builder for creating Arrow [`Buffer`] objects
32///
33/// A [`Buffer`] is the underlying data structure of Arrow's Arrays.
34///
35/// For all supported types, there are type definitions for the
36/// generic version of `BufferBuilder<T>`, e.g. `BufferBuilder`.
37///
38/// **Note it is typically faster to create buffers directly from `Vec`**.
39/// See example on [`Buffer`].
40///
41/// # See Also
42/// * [`BooleanBufferBuilder`]: for packing bits in [`BooleanBuffer`]s
43/// * [`NullBufferBuilder`]: for creating [`NullBuffer`]s of null values
44///
45/// [`BooleanBuffer`]: crate::BooleanBuffer
46/// [`NullBuffer`]: crate::NullBuffer
47///
48/// # Example:
49///
50/// ```
51/// # use arrow_buffer::builder::BufferBuilder;
52/// let mut builder = BufferBuilder::<u8>::new(100);
53/// builder.append_slice(&[42, 43, 44]);
54/// builder.append(45);
55/// let buffer = builder.finish();
56/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
57/// ```
58#[derive(Debug)]
59pub struct BufferBuilder<T: ArrowNativeType> {
60    buffer: MutableBuffer,
61    _marker: PhantomData<T>,
62}
63
64impl<T: ArrowNativeType> BufferBuilder<T> {
65    /// Creates a new builder with initial capacity for _at least_ `capacity`
66    /// elements of type `T`.
67    ///
68    /// The capacity can later be manually adjusted with the
69    /// [`reserve()`](BufferBuilder::reserve) method.
70    /// Also the
71    /// [`append()`](BufferBuilder::append),
72    /// [`append_slice()`](BufferBuilder::append_slice) and
73    /// [`advance()`](BufferBuilder::advance)
74    /// methods automatically increase the capacity if needed.
75    ///
76    /// # Example:
77    ///
78    /// ```
79    /// # use arrow_buffer::builder::BufferBuilder;
80    /// let mut builder = BufferBuilder::<u8>::new(10);
81    ///
82    /// assert!(builder.capacity() >= 10);
83    /// ```
84    #[inline]
85    pub fn new(capacity: usize) -> Self {
86        let buffer = MutableBuffer::new(capacity * std::mem::size_of::<T>());
87
88        Self {
89            buffer,
90            _marker: PhantomData,
91        }
92    }
93
94    /// Creates a new builder from a [`MutableBuffer`]
95    ///
96    /// # Safety
97    ///
98    /// - `buffer` bytes must be aligned to type `T`
99    pub unsafe fn new_from_buffer(buffer: MutableBuffer) -> Self {
100        Self {
101            buffer,
102            _marker: PhantomData,
103        }
104    }
105
106    /// Returns the current number of array elements in the internal buffer.
107    ///
108    /// # Example:
109    ///
110    /// ```
111    /// # use arrow_buffer::builder::BufferBuilder;
112    /// let mut builder = BufferBuilder::<u8>::new(10);
113    /// builder.append(42);
114    ///
115    /// assert_eq!(builder.len(), 1);
116    /// ```
117    pub fn len(&self) -> usize {
118        self.buffer.len() / std::mem::size_of::<T>()
119    }
120
121    /// Returns whether the internal buffer is empty.
122    ///
123    /// # Example:
124    ///
125    /// ```
126    /// # use arrow_buffer::builder::BufferBuilder;
127    /// let mut builder = BufferBuilder::<u8>::new(10);
128    /// builder.append(42);
129    ///
130    /// assert_eq!(builder.is_empty(), false);
131    /// ```
132    pub fn is_empty(&self) -> bool {
133        self.buffer.is_empty()
134    }
135
136    /// Returns the actual capacity (number of elements) of the internal buffer.
137    ///
138    /// Note: the internal capacity returned by this method might be larger than
139    /// what you'd expect after setting the capacity in the `new()` or `reserve()`
140    /// functions.
141    pub fn capacity(&self) -> usize {
142        let byte_capacity = self.buffer.capacity();
143        byte_capacity / std::mem::size_of::<T>()
144    }
145
146    /// Increases the number of elements in the internal buffer by `n`
147    /// and resizes the buffer as needed.
148    ///
149    /// The values of the newly added elements are 0.
150    /// This method is usually used when appending `NULL` values to the buffer
151    /// as they still require physical memory space.
152    ///
153    /// # Example:
154    ///
155    /// ```
156    /// # use arrow_buffer::builder::BufferBuilder;
157    /// let mut builder = BufferBuilder::<u8>::new(10);
158    /// builder.advance(2);
159    ///
160    /// assert_eq!(builder.len(), 2);
161    /// ```
162    #[inline]
163    pub fn advance(&mut self, i: usize) {
164        self.buffer.extend_zeros(i * std::mem::size_of::<T>());
165    }
166
167    /// Reserves memory for _at least_ `n` more elements of type `T`.
168    ///
169    /// # Example:
170    ///
171    /// ```
172    /// # use arrow_buffer::builder::BufferBuilder;
173    /// let mut builder = BufferBuilder::<u8>::new(10);
174    /// builder.reserve(10);
175    ///
176    /// assert!(builder.capacity() >= 20);
177    /// ```
178    #[inline]
179    pub fn reserve(&mut self, n: usize) {
180        self.buffer.reserve(n * std::mem::size_of::<T>());
181    }
182
183    /// Appends a value of type `T` into the builder,
184    /// growing the internal buffer as needed.
185    ///
186    /// # Example:
187    ///
188    /// ```
189    /// # use arrow_buffer::builder::BufferBuilder;
190    /// let mut builder = BufferBuilder::<u8>::new(10);
191    /// builder.append(42);
192    ///
193    /// assert_eq!(builder.len(), 1);
194    /// ```
195    #[inline]
196    pub fn append(&mut self, v: T) {
197        self.reserve(1);
198        self.buffer.push(v);
199    }
200
201    /// Appends a value of type `T` into the builder N times,
202    /// growing the internal buffer as needed.
203    ///
204    /// # Example:
205    ///
206    /// ```
207    /// # use arrow_buffer::builder::BufferBuilder;
208    /// let mut builder = BufferBuilder::<u8>::new(10);
209    /// builder.append_n(10, 42);
210    ///
211    /// assert_eq!(builder.len(), 10);
212    /// ```
213    #[inline]
214    pub fn append_n(&mut self, n: usize, v: T) {
215        self.reserve(n);
216        self.extend(std::iter::repeat_n(v, n))
217    }
218
219    /// Appends `n`, zero-initialized values
220    ///
221    /// # Example:
222    ///
223    /// ```
224    /// # use arrow_buffer::builder::BufferBuilder;
225    /// let mut builder = BufferBuilder::<u32>::new(10);
226    /// builder.append_n_zeroed(3);
227    ///
228    /// assert_eq!(builder.len(), 3);
229    /// assert_eq!(builder.as_slice(), &[0, 0, 0])
230    /// ```
231    #[inline]
232    pub fn append_n_zeroed(&mut self, n: usize) {
233        self.buffer.extend_zeros(n * std::mem::size_of::<T>());
234    }
235
236    /// Appends a slice of type `T`, growing the internal buffer as needed.
237    ///
238    /// # Example:
239    ///
240    /// ```
241    /// # use arrow_buffer::builder::BufferBuilder;
242    /// let mut builder = BufferBuilder::<u8>::new(10);
243    /// builder.append_slice(&[42, 44, 46]);
244    ///
245    /// assert_eq!(builder.len(), 3);
246    /// ```
247    #[inline]
248    pub fn append_slice(&mut self, slice: &[T]) {
249        self.buffer.extend_from_slice(slice);
250    }
251
252    /// View the contents of this buffer as a slice
253    ///
254    /// ```
255    /// # use arrow_buffer::builder::BufferBuilder;
256    /// let mut builder = BufferBuilder::<f64>::new(10);
257    /// builder.append(1.3);
258    /// builder.append_n(2, 2.3);
259    ///
260    /// assert_eq!(builder.as_slice(), &[1.3, 2.3, 2.3]);
261    /// ```
262    #[inline]
263    pub fn as_slice(&self) -> &[T] {
264        // SAFETY
265        //
266        // - MutableBuffer is aligned and initialized for len elements of T
267        // - MutableBuffer corresponds to a single allocation
268        // - MutableBuffer does not support modification whilst active immutable borrows
269        unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len()) }
270    }
271
272    /// View the contents of this buffer as a mutable slice
273    ///
274    /// # Example:
275    ///
276    /// ```
277    /// # use arrow_buffer::builder::BufferBuilder;
278    /// let mut builder = BufferBuilder::<f32>::new(10);
279    ///
280    /// builder.append_slice(&[1., 2., 3.4]);
281    /// assert_eq!(builder.as_slice(), &[1., 2., 3.4]);
282    ///
283    /// builder.as_slice_mut()[1] = 4.2;
284    /// assert_eq!(builder.as_slice(), &[1., 4.2, 3.4]);
285    /// ```
286    #[inline]
287    pub fn as_slice_mut(&mut self) -> &mut [T] {
288        // SAFETY
289        //
290        // - MutableBuffer is aligned and initialized for len elements of T
291        // - MutableBuffer corresponds to a single allocation
292        // - MutableBuffer does not support modification whilst active immutable borrows
293        unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len()) }
294    }
295
296    /// Shorten this BufferBuilder to `len` items
297    ///
298    /// If `len` is greater than the builder's current length, this has no effect
299    ///
300    /// # Example:
301    ///
302    /// ```
303    /// # use arrow_buffer::builder::BufferBuilder;
304    /// let mut builder = BufferBuilder::<u16>::new(10);
305    ///
306    /// builder.append_slice(&[42, 44, 46]);
307    /// assert_eq!(builder.as_slice(), &[42, 44, 46]);
308    ///
309    /// builder.truncate(2);
310    /// assert_eq!(builder.as_slice(), &[42, 44]);
311    ///
312    /// builder.append(12);
313    /// assert_eq!(builder.as_slice(), &[42, 44, 12]);
314    /// ```
315    #[inline]
316    pub fn truncate(&mut self, len: usize) {
317        self.buffer.truncate(len * std::mem::size_of::<T>());
318    }
319
320    /// # Safety
321    /// This requires the iterator be a trusted length. This could instead require
322    /// the iterator implement `TrustedLen` once that is stabilized.
323    #[inline]
324    pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T>) {
325        let iter = iter.into_iter();
326        let len = iter
327            .size_hint()
328            .1
329            .expect("append_trusted_len_iter expects upper bound");
330        self.reserve(len);
331        self.extend(iter);
332    }
333
334    /// Resets this builder and returns an immutable [Buffer].
335    ///
336    /// Use [`Self::build`] when you don't need to reuse this builder.
337    ///
338    /// # Example:
339    ///
340    /// ```
341    /// # use arrow_buffer::builder::BufferBuilder;
342    /// let mut builder = BufferBuilder::<u8>::new(10);
343    /// builder.append_slice(&[42, 44, 46]);
344    /// let buffer = builder.finish();
345    /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
346    /// ```
347    #[inline]
348    pub fn finish(&mut self) -> Buffer {
349        let buf = std::mem::take(&mut self.buffer);
350        buf.into()
351    }
352
353    /// Builds an immutable [Buffer] without resetting the builder.
354    ///
355    /// This consumes the builder. Use [`Self::finish`] to reuse it.
356    ///
357    /// # Example:
358    ///
359    /// ```
360    /// # use arrow_buffer::builder::BufferBuilder;
361    /// let mut builder = BufferBuilder::<u8>::new(10);
362    /// builder.append_slice(&[42, 44, 46]);
363    /// let buffer = builder.build();
364    /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
365    /// ```
366    #[inline]
367    pub fn build(self) -> Buffer {
368        self.buffer.into()
369    }
370}
371
372impl<T: ArrowNativeType> Default for BufferBuilder<T> {
373    fn default() -> Self {
374        Self::new(0)
375    }
376}
377
378impl<T: ArrowNativeType> Extend<T> for BufferBuilder<T> {
379    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
380        self.buffer.extend(iter)
381    }
382}
383
384impl<T: ArrowNativeType> From<Vec<T>> for BufferBuilder<T> {
385    fn from(value: Vec<T>) -> Self {
386        let buffer = MutableBuffer::from(value);
387        // SAFETY
388        // - buffer is aligned to T
389        unsafe { Self::new_from_buffer(buffer) }
390    }
391}
392
393impl<T: ArrowNativeType> FromIterator<T> for BufferBuilder<T> {
394    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
395        let mut builder = Self::default();
396        builder.extend(iter);
397        builder
398    }
399}
400
401#[cfg(test)]
402mod tests {
403    use super::*;
404    use std::mem;
405
406    #[test]
407    fn default() {
408        let builder = BufferBuilder::<u32>::default();
409        assert!(builder.is_empty());
410        assert!(builder.buffer.is_empty());
411        assert_eq!(builder.buffer.capacity(), 0);
412    }
413
414    #[test]
415    fn from_iter() {
416        let input = [1u16, 2, 3, 4];
417        let builder = input.into_iter().collect::<BufferBuilder<_>>();
418        assert_eq!(builder.len(), 4);
419        assert_eq!(builder.buffer.len(), 4 * mem::size_of::<u16>());
420    }
421
422    #[test]
423    fn extend() {
424        let input = [1, 2];
425        let mut builder = input.into_iter().collect::<BufferBuilder<_>>();
426        assert_eq!(builder.len(), 2);
427        builder.extend([3, 4]);
428        assert_eq!(builder.len(), 4);
429    }
430
431    #[test]
432    fn truncate_safety() {
433        let mut builder = BufferBuilder::from(vec![40, -63, 90]);
434        assert_eq!(builder.len(), 3);
435        builder.truncate(151);
436        assert_eq!(builder.len(), 3);
437        builder.advance(219);
438        assert_eq!(builder.len(), 222);
439        let slice = builder.as_slice_mut();
440        assert_eq!(slice.len(), 222);
441    }
442
443    #[test]
444    #[should_panic(expected = "buffer length overflow")]
445    fn reserve_length_overflow() {
446        let mut builder = BufferBuilder::<u8>::new(1);
447        builder.append(0);
448        builder.reserve(usize::MAX);
449    }
450
451    #[test]
452    #[should_panic(expected = "buffer length overflow")]
453    fn append_n_zeroed_length_overflow() {
454        let mut builder = BufferBuilder::<u64>::new(1);
455        builder.append_n_zeroed(1);
456        builder.append_n_zeroed(usize::MAX / mem::size_of::<u64>());
457    }
458
459    #[test]
460    #[should_panic(expected = "buffer length overflow")]
461    fn advance_length_overflow() {
462        let mut builder = BufferBuilder::<u64>::new(1);
463        builder.advance(1);
464        builder.advance(usize::MAX / mem::size_of::<u64>());
465    }
466}