Skip to main content

arrow_buffer/builder/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Buffer builders
19
20mod boolean;
21mod null;
22mod offset;
23
24pub use boolean::*;
25pub use null::*;
26pub use offset::*;
27
28use crate::{ArrowNativeType, Buffer, MutableBuffer};
29use std::marker::PhantomData;
30
31/// Builder for creating a [Buffer] object.
32///
33/// A [Buffer] is the underlying data structure of Arrow's Arrays.
34///
35/// For all supported types, there are type definitions for the
36/// generic version of `BufferBuilder<T>`, e.g. `BufferBuilder`.
37///
38/// # Example:
39///
40/// ```
41/// # use arrow_buffer::builder::BufferBuilder;
42///
43/// let mut builder = BufferBuilder::<u8>::new(100);
44/// builder.append_slice(&[42, 43, 44]);
45/// builder.append(45);
46/// let buffer = builder.finish();
47///
48/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
49/// ```
50#[derive(Debug)]
51pub struct BufferBuilder<T: ArrowNativeType> {
52    buffer: MutableBuffer,
53    len: usize,
54    _marker: PhantomData<T>,
55}
56
57impl<T: ArrowNativeType> BufferBuilder<T> {
58    /// Creates a new builder with initial capacity for _at least_ `capacity`
59    /// elements of type `T`.
60    ///
61    /// The capacity can later be manually adjusted with the
62    /// [`reserve()`](BufferBuilder::reserve) method.
63    /// Also the
64    /// [`append()`](BufferBuilder::append),
65    /// [`append_slice()`](BufferBuilder::append_slice) and
66    /// [`advance()`](BufferBuilder::advance)
67    /// methods automatically increase the capacity if needed.
68    ///
69    /// # Example:
70    ///
71    /// ```
72    /// # use arrow_buffer::builder::BufferBuilder;
73    /// let mut builder = BufferBuilder::<u8>::new(10);
74    ///
75    /// assert!(builder.capacity() >= 10);
76    /// ```
77    #[inline]
78    pub fn new(capacity: usize) -> Self {
79        let buffer = MutableBuffer::new(capacity * std::mem::size_of::<T>());
80
81        Self {
82            buffer,
83            len: 0,
84            _marker: PhantomData,
85        }
86    }
87
88    /// Creates a new builder from a [`MutableBuffer`]
89    ///
90    /// # Safety
91    ///
92    /// - `buffer` bytes must be aligned to type `T`
93    pub unsafe fn new_from_buffer(buffer: MutableBuffer) -> Self {
94        let buffer_len = buffer.len();
95        Self {
96            buffer,
97            len: buffer_len / std::mem::size_of::<T>(),
98            _marker: PhantomData,
99        }
100    }
101
102    /// Returns the current number of array elements in the internal buffer.
103    ///
104    /// # Example:
105    ///
106    /// ```
107    /// # use arrow_buffer::builder::BufferBuilder;
108    /// let mut builder = BufferBuilder::<u8>::new(10);
109    /// builder.append(42);
110    ///
111    /// assert_eq!(builder.len(), 1);
112    /// ```
113    pub fn len(&self) -> usize {
114        self.len
115    }
116
117    /// Returns whether the internal buffer is empty.
118    ///
119    /// # Example:
120    ///
121    /// ```
122    /// # use arrow_buffer::builder::BufferBuilder;
123    /// let mut builder = BufferBuilder::<u8>::new(10);
124    /// builder.append(42);
125    ///
126    /// assert_eq!(builder.is_empty(), false);
127    /// ```
128    pub fn is_empty(&self) -> bool {
129        self.len == 0
130    }
131
132    /// Returns the actual capacity (number of elements) of the internal buffer.
133    ///
134    /// Note: the internal capacity returned by this method might be larger than
135    /// what you'd expect after setting the capacity in the `new()` or `reserve()`
136    /// functions.
137    pub fn capacity(&self) -> usize {
138        let byte_capacity = self.buffer.capacity();
139        byte_capacity / std::mem::size_of::<T>()
140    }
141
142    /// Increases the number of elements in the internal buffer by `n`
143    /// and resizes the buffer as needed.
144    ///
145    /// The values of the newly added elements are 0.
146    /// This method is usually used when appending `NULL` values to the buffer
147    /// as they still require physical memory space.
148    ///
149    /// # Example:
150    ///
151    /// ```
152    /// # use arrow_buffer::builder::BufferBuilder;
153    /// let mut builder = BufferBuilder::<u8>::new(10);
154    /// builder.advance(2);
155    ///
156    /// assert_eq!(builder.len(), 2);
157    /// ```
158    #[inline]
159    pub fn advance(&mut self, i: usize) {
160        self.buffer.extend_zeros(i * std::mem::size_of::<T>());
161        self.len += i;
162    }
163
164    /// Reserves memory for _at least_ `n` more elements of type `T`.
165    ///
166    /// # Example:
167    ///
168    /// ```
169    /// # use arrow_buffer::builder::BufferBuilder;
170    /// let mut builder = BufferBuilder::<u8>::new(10);
171    /// builder.reserve(10);
172    ///
173    /// assert!(builder.capacity() >= 20);
174    /// ```
175    #[inline]
176    pub fn reserve(&mut self, n: usize) {
177        self.buffer.reserve(n * std::mem::size_of::<T>());
178    }
179
180    /// Appends a value of type `T` into the builder,
181    /// growing the internal buffer as needed.
182    ///
183    /// # Example:
184    ///
185    /// ```
186    /// # use arrow_buffer::builder::BufferBuilder;
187    /// let mut builder = BufferBuilder::<u8>::new(10);
188    /// builder.append(42);
189    ///
190    /// assert_eq!(builder.len(), 1);
191    /// ```
192    #[inline]
193    pub fn append(&mut self, v: T) {
194        self.reserve(1);
195        self.buffer.push(v);
196        self.len += 1;
197    }
198
199    /// Appends a value of type `T` into the builder N times,
200    /// growing the internal buffer as needed.
201    ///
202    /// # Example:
203    ///
204    /// ```
205    /// # use arrow_buffer::builder::BufferBuilder;
206    /// let mut builder = BufferBuilder::<u8>::new(10);
207    /// builder.append_n(10, 42);
208    ///
209    /// assert_eq!(builder.len(), 10);
210    /// ```
211    #[inline]
212    pub fn append_n(&mut self, n: usize, v: T) {
213        self.reserve(n);
214        self.extend(std::iter::repeat_n(v, n))
215    }
216
217    /// Appends `n`, zero-initialized values
218    ///
219    /// # Example:
220    ///
221    /// ```
222    /// # use arrow_buffer::builder::BufferBuilder;
223    /// let mut builder = BufferBuilder::<u32>::new(10);
224    /// builder.append_n_zeroed(3);
225    ///
226    /// assert_eq!(builder.len(), 3);
227    /// assert_eq!(builder.as_slice(), &[0, 0, 0])
228    /// ```
229    #[inline]
230    pub fn append_n_zeroed(&mut self, n: usize) {
231        self.buffer.extend_zeros(n * std::mem::size_of::<T>());
232        self.len += n;
233    }
234
235    /// Appends a slice of type `T`, growing the internal buffer as needed.
236    ///
237    /// # Example:
238    ///
239    /// ```
240    /// # use arrow_buffer::builder::BufferBuilder;
241    /// let mut builder = BufferBuilder::<u8>::new(10);
242    /// builder.append_slice(&[42, 44, 46]);
243    ///
244    /// assert_eq!(builder.len(), 3);
245    /// ```
246    #[inline]
247    pub fn append_slice(&mut self, slice: &[T]) {
248        self.buffer.extend_from_slice(slice);
249        self.len += slice.len();
250    }
251
252    /// View the contents of this buffer as a slice
253    ///
254    /// ```
255    /// # use arrow_buffer::builder::BufferBuilder;
256    /// let mut builder = BufferBuilder::<f64>::new(10);
257    /// builder.append(1.3);
258    /// builder.append_n(2, 2.3);
259    ///
260    /// assert_eq!(builder.as_slice(), &[1.3, 2.3, 2.3]);
261    /// ```
262    #[inline]
263    pub fn as_slice(&self) -> &[T] {
264        // SAFETY
265        //
266        // - MutableBuffer is aligned and initialized for len elements of T
267        // - MutableBuffer corresponds to a single allocation
268        // - MutableBuffer does not support modification whilst active immutable borrows
269        unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len) }
270    }
271
272    /// View the contents of this buffer as a mutable slice
273    ///
274    /// # Example:
275    ///
276    /// ```
277    /// # use arrow_buffer::builder::BufferBuilder;
278    /// let mut builder = BufferBuilder::<f32>::new(10);
279    ///
280    /// builder.append_slice(&[1., 2., 3.4]);
281    /// assert_eq!(builder.as_slice(), &[1., 2., 3.4]);
282    ///
283    /// builder.as_slice_mut()[1] = 4.2;
284    /// assert_eq!(builder.as_slice(), &[1., 4.2, 3.4]);
285    /// ```
286    #[inline]
287    pub fn as_slice_mut(&mut self) -> &mut [T] {
288        // SAFETY
289        //
290        // - MutableBuffer is aligned and initialized for len elements of T
291        // - MutableBuffer corresponds to a single allocation
292        // - MutableBuffer does not support modification whilst active immutable borrows
293        unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len) }
294    }
295
296    /// Shorten this BufferBuilder to `len` items
297    ///
298    /// If `len` is greater than the builder's current length, this has no effect
299    ///
300    /// # Example:
301    ///
302    /// ```
303    /// # use arrow_buffer::builder::BufferBuilder;
304    /// let mut builder = BufferBuilder::<u16>::new(10);
305    ///
306    /// builder.append_slice(&[42, 44, 46]);
307    /// assert_eq!(builder.as_slice(), &[42, 44, 46]);
308    ///
309    /// builder.truncate(2);
310    /// assert_eq!(builder.as_slice(), &[42, 44]);
311    ///
312    /// builder.append(12);
313    /// assert_eq!(builder.as_slice(), &[42, 44, 12]);
314    /// ```
315    #[inline]
316    pub fn truncate(&mut self, len: usize) {
317        self.buffer.truncate(len * std::mem::size_of::<T>());
318        self.len = self.len.min(len);
319    }
320
321    /// # Safety
322    /// This requires the iterator be a trusted length. This could instead require
323    /// the iterator implement `TrustedLen` once that is stabilized.
324    #[inline]
325    pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T>) {
326        let iter = iter.into_iter();
327        let len = iter
328            .size_hint()
329            .1
330            .expect("append_trusted_len_iter expects upper bound");
331        self.reserve(len);
332        self.extend(iter);
333    }
334
335    /// Resets this builder and returns an immutable [Buffer].
336    ///
337    /// # Example:
338    ///
339    /// ```
340    /// # use arrow_buffer::builder::BufferBuilder;
341    ///
342    /// let mut builder = BufferBuilder::<u8>::new(10);
343    /// builder.append_slice(&[42, 44, 46]);
344    ///
345    /// let buffer = builder.finish();
346    ///
347    /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
348    /// ```
349    #[inline]
350    pub fn finish(&mut self) -> Buffer {
351        let buf = std::mem::take(&mut self.buffer);
352        self.len = 0;
353        buf.into()
354    }
355}
356
357impl<T: ArrowNativeType> Default for BufferBuilder<T> {
358    fn default() -> Self {
359        Self::new(0)
360    }
361}
362
363impl<T: ArrowNativeType> Extend<T> for BufferBuilder<T> {
364    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
365        self.buffer.extend(iter.into_iter().inspect(|_| {
366            self.len += 1;
367        }))
368    }
369}
370
371impl<T: ArrowNativeType> From<Vec<T>> for BufferBuilder<T> {
372    fn from(value: Vec<T>) -> Self {
373        let buffer = MutableBuffer::from(value);
374        // SAFETY
375        // - buffer is aligned to T
376        unsafe { Self::new_from_buffer(buffer) }
377    }
378}
379
380impl<T: ArrowNativeType> FromIterator<T> for BufferBuilder<T> {
381    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
382        let mut builder = Self::default();
383        builder.extend(iter);
384        builder
385    }
386}
387
388#[cfg(test)]
389mod tests {
390    use super::*;
391    use std::mem;
392
393    #[test]
394    fn default() {
395        let builder = BufferBuilder::<u32>::default();
396        assert!(builder.is_empty());
397        assert!(builder.buffer.is_empty());
398        assert_eq!(builder.buffer.capacity(), 0);
399    }
400
401    #[test]
402    fn from_iter() {
403        let input = [1u16, 2, 3, 4];
404        let builder = input.into_iter().collect::<BufferBuilder<_>>();
405        assert_eq!(builder.len(), 4);
406        assert_eq!(builder.buffer.len(), 4 * mem::size_of::<u16>());
407    }
408
409    #[test]
410    fn extend() {
411        let input = [1, 2];
412        let mut builder = input.into_iter().collect::<BufferBuilder<_>>();
413        assert_eq!(builder.len(), 2);
414        builder.extend([3, 4]);
415        assert_eq!(builder.len(), 4);
416    }
417
418    #[test]
419    fn truncate_safety() {
420        let mut builder = BufferBuilder::from(vec![40, -63, 90]);
421        assert_eq!(builder.len(), 3);
422        builder.truncate(151);
423        assert_eq!(builder.len(), 3);
424        builder.advance(219);
425        assert_eq!(builder.len(), 222);
426        let slice = builder.as_slice_mut();
427        assert_eq!(slice.len(), 222);
428    }
429}