arrow_buffer/builder/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Buffer builders
19
20mod boolean;
21mod null;
22mod offset;
23
24pub use boolean::*;
25pub use null::*;
26pub use offset::*;
27
28use crate::{ArrowNativeType, Buffer, MutableBuffer};
29use std::marker::PhantomData;
30
31/// Builder for creating a [Buffer] object.
32///
33/// A [Buffer] is the underlying data structure of Arrow's Arrays.
34///
35/// For all supported types, there are type definitions for the
36/// generic version of `BufferBuilder<T>`, e.g. `BufferBuilder`.
37///
38/// # Example:
39///
40/// ```
41/// # use arrow_buffer::builder::BufferBuilder;
42///
43/// let mut builder = BufferBuilder::<u8>::new(100);
44/// builder.append_slice(&[42, 43, 44]);
45/// builder.append(45);
46/// let buffer = builder.finish();
47///
48/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
49/// ```
50#[derive(Debug)]
51pub struct BufferBuilder<T: ArrowNativeType> {
52 buffer: MutableBuffer,
53 len: usize,
54 _marker: PhantomData<T>,
55}
56
57impl<T: ArrowNativeType> BufferBuilder<T> {
58 /// Creates a new builder with initial capacity for _at least_ `capacity`
59 /// elements of type `T`.
60 ///
61 /// The capacity can later be manually adjusted with the
62 /// [`reserve()`](BufferBuilder::reserve) method.
63 /// Also the
64 /// [`append()`](BufferBuilder::append),
65 /// [`append_slice()`](BufferBuilder::append_slice) and
66 /// [`advance()`](BufferBuilder::advance)
67 /// methods automatically increase the capacity if needed.
68 ///
69 /// # Example:
70 ///
71 /// ```
72 /// # use arrow_buffer::builder::BufferBuilder;
73 /// let mut builder = BufferBuilder::<u8>::new(10);
74 ///
75 /// assert!(builder.capacity() >= 10);
76 /// ```
77 #[inline]
78 pub fn new(capacity: usize) -> Self {
79 let buffer = MutableBuffer::new(capacity * std::mem::size_of::<T>());
80
81 Self {
82 buffer,
83 len: 0,
84 _marker: PhantomData,
85 }
86 }
87
88 /// Creates a new builder from a [`MutableBuffer`]
89 ///
90 /// # Safety
91 ///
92 /// - `buffer` bytes must be aligned to type `T`
93 pub unsafe fn new_from_buffer(buffer: MutableBuffer) -> Self {
94 let buffer_len = buffer.len();
95 Self {
96 buffer,
97 len: buffer_len / std::mem::size_of::<T>(),
98 _marker: PhantomData,
99 }
100 }
101
102 /// Returns the current number of array elements in the internal buffer.
103 ///
104 /// # Example:
105 ///
106 /// ```
107 /// # use arrow_buffer::builder::BufferBuilder;
108 /// let mut builder = BufferBuilder::<u8>::new(10);
109 /// builder.append(42);
110 ///
111 /// assert_eq!(builder.len(), 1);
112 /// ```
113 pub fn len(&self) -> usize {
114 self.len
115 }
116
117 /// Returns whether the internal buffer is empty.
118 ///
119 /// # Example:
120 ///
121 /// ```
122 /// # use arrow_buffer::builder::BufferBuilder;
123 /// let mut builder = BufferBuilder::<u8>::new(10);
124 /// builder.append(42);
125 ///
126 /// assert_eq!(builder.is_empty(), false);
127 /// ```
128 pub fn is_empty(&self) -> bool {
129 self.len == 0
130 }
131
132 /// Returns the actual capacity (number of elements) of the internal buffer.
133 ///
134 /// Note: the internal capacity returned by this method might be larger than
135 /// what you'd expect after setting the capacity in the `new()` or `reserve()`
136 /// functions.
137 pub fn capacity(&self) -> usize {
138 let byte_capacity = self.buffer.capacity();
139 byte_capacity / std::mem::size_of::<T>()
140 }
141
142 /// Increases the number of elements in the internal buffer by `n`
143 /// and resizes the buffer as needed.
144 ///
145 /// The values of the newly added elements are 0.
146 /// This method is usually used when appending `NULL` values to the buffer
147 /// as they still require physical memory space.
148 ///
149 /// # Example:
150 ///
151 /// ```
152 /// # use arrow_buffer::builder::BufferBuilder;
153 /// let mut builder = BufferBuilder::<u8>::new(10);
154 /// builder.advance(2);
155 ///
156 /// assert_eq!(builder.len(), 2);
157 /// ```
158 #[inline]
159 pub fn advance(&mut self, i: usize) {
160 self.buffer.extend_zeros(i * std::mem::size_of::<T>());
161 self.len += i;
162 }
163
164 /// Reserves memory for _at least_ `n` more elements of type `T`.
165 ///
166 /// # Example:
167 ///
168 /// ```
169 /// # use arrow_buffer::builder::BufferBuilder;
170 /// let mut builder = BufferBuilder::<u8>::new(10);
171 /// builder.reserve(10);
172 ///
173 /// assert!(builder.capacity() >= 20);
174 /// ```
175 #[inline]
176 pub fn reserve(&mut self, n: usize) {
177 self.buffer.reserve(n * std::mem::size_of::<T>());
178 }
179
180 /// Appends a value of type `T` into the builder,
181 /// growing the internal buffer as needed.
182 ///
183 /// # Example:
184 ///
185 /// ```
186 /// # use arrow_buffer::builder::BufferBuilder;
187 /// let mut builder = BufferBuilder::<u8>::new(10);
188 /// builder.append(42);
189 ///
190 /// assert_eq!(builder.len(), 1);
191 /// ```
192 #[inline]
193 pub fn append(&mut self, v: T) {
194 self.reserve(1);
195 self.buffer.push(v);
196 self.len += 1;
197 }
198
199 /// Appends a value of type `T` into the builder N times,
200 /// growing the internal buffer as needed.
201 ///
202 /// # Example:
203 ///
204 /// ```
205 /// # use arrow_buffer::builder::BufferBuilder;
206 /// let mut builder = BufferBuilder::<u8>::new(10);
207 /// builder.append_n(10, 42);
208 ///
209 /// assert_eq!(builder.len(), 10);
210 /// ```
211 #[inline]
212 pub fn append_n(&mut self, n: usize, v: T) {
213 self.reserve(n);
214 self.extend(std::iter::repeat_n(v, n))
215 }
216
217 /// Appends `n`, zero-initialized values
218 ///
219 /// # Example:
220 ///
221 /// ```
222 /// # use arrow_buffer::builder::BufferBuilder;
223 /// let mut builder = BufferBuilder::<u32>::new(10);
224 /// builder.append_n_zeroed(3);
225 ///
226 /// assert_eq!(builder.len(), 3);
227 /// assert_eq!(builder.as_slice(), &[0, 0, 0])
228 /// ```
229 #[inline]
230 pub fn append_n_zeroed(&mut self, n: usize) {
231 self.buffer.extend_zeros(n * std::mem::size_of::<T>());
232 self.len += n;
233 }
234
235 /// Appends a slice of type `T`, growing the internal buffer as needed.
236 ///
237 /// # Example:
238 ///
239 /// ```
240 /// # use arrow_buffer::builder::BufferBuilder;
241 /// let mut builder = BufferBuilder::<u8>::new(10);
242 /// builder.append_slice(&[42, 44, 46]);
243 ///
244 /// assert_eq!(builder.len(), 3);
245 /// ```
246 #[inline]
247 pub fn append_slice(&mut self, slice: &[T]) {
248 self.buffer.extend_from_slice(slice);
249 self.len += slice.len();
250 }
251
252 /// View the contents of this buffer as a slice
253 ///
254 /// ```
255 /// # use arrow_buffer::builder::BufferBuilder;
256 /// let mut builder = BufferBuilder::<f64>::new(10);
257 /// builder.append(1.3);
258 /// builder.append_n(2, 2.3);
259 ///
260 /// assert_eq!(builder.as_slice(), &[1.3, 2.3, 2.3]);
261 /// ```
262 #[inline]
263 pub fn as_slice(&self) -> &[T] {
264 // SAFETY
265 //
266 // - MutableBuffer is aligned and initialized for len elements of T
267 // - MutableBuffer corresponds to a single allocation
268 // - MutableBuffer does not support modification whilst active immutable borrows
269 unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len) }
270 }
271
272 /// View the contents of this buffer as a mutable slice
273 ///
274 /// # Example:
275 ///
276 /// ```
277 /// # use arrow_buffer::builder::BufferBuilder;
278 /// let mut builder = BufferBuilder::<f32>::new(10);
279 ///
280 /// builder.append_slice(&[1., 2., 3.4]);
281 /// assert_eq!(builder.as_slice(), &[1., 2., 3.4]);
282 ///
283 /// builder.as_slice_mut()[1] = 4.2;
284 /// assert_eq!(builder.as_slice(), &[1., 4.2, 3.4]);
285 /// ```
286 #[inline]
287 pub fn as_slice_mut(&mut self) -> &mut [T] {
288 // SAFETY
289 //
290 // - MutableBuffer is aligned and initialized for len elements of T
291 // - MutableBuffer corresponds to a single allocation
292 // - MutableBuffer does not support modification whilst active immutable borrows
293 unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len) }
294 }
295
296 /// Shorten this BufferBuilder to `len` items
297 ///
298 /// If `len` is greater than the builder's current length, this has no effect
299 ///
300 /// # Example:
301 ///
302 /// ```
303 /// # use arrow_buffer::builder::BufferBuilder;
304 /// let mut builder = BufferBuilder::<u16>::new(10);
305 ///
306 /// builder.append_slice(&[42, 44, 46]);
307 /// assert_eq!(builder.as_slice(), &[42, 44, 46]);
308 ///
309 /// builder.truncate(2);
310 /// assert_eq!(builder.as_slice(), &[42, 44]);
311 ///
312 /// builder.append(12);
313 /// assert_eq!(builder.as_slice(), &[42, 44, 12]);
314 /// ```
315 #[inline]
316 pub fn truncate(&mut self, len: usize) {
317 self.buffer.truncate(len * std::mem::size_of::<T>());
318 self.len = self.len.min(len);
319 }
320
321 /// # Safety
322 /// This requires the iterator be a trusted length. This could instead require
323 /// the iterator implement `TrustedLen` once that is stabilized.
324 #[inline]
325 pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T>) {
326 let iter = iter.into_iter();
327 let len = iter
328 .size_hint()
329 .1
330 .expect("append_trusted_len_iter expects upper bound");
331 self.reserve(len);
332 self.extend(iter);
333 }
334
335 /// Resets this builder and returns an immutable [Buffer].
336 ///
337 /// # Example:
338 ///
339 /// ```
340 /// # use arrow_buffer::builder::BufferBuilder;
341 ///
342 /// let mut builder = BufferBuilder::<u8>::new(10);
343 /// builder.append_slice(&[42, 44, 46]);
344 ///
345 /// let buffer = builder.finish();
346 ///
347 /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
348 /// ```
349 #[inline]
350 pub fn finish(&mut self) -> Buffer {
351 let buf = std::mem::take(&mut self.buffer);
352 self.len = 0;
353 buf.into()
354 }
355}
356
357impl<T: ArrowNativeType> Default for BufferBuilder<T> {
358 fn default() -> Self {
359 Self::new(0)
360 }
361}
362
363impl<T: ArrowNativeType> Extend<T> for BufferBuilder<T> {
364 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
365 self.buffer.extend(iter.into_iter().inspect(|_| {
366 self.len += 1;
367 }))
368 }
369}
370
371impl<T: ArrowNativeType> From<Vec<T>> for BufferBuilder<T> {
372 fn from(value: Vec<T>) -> Self {
373 let buffer = MutableBuffer::from(value);
374 // SAFETY
375 // - buffer is aligned to T
376 unsafe { Self::new_from_buffer(buffer) }
377 }
378}
379
380impl<T: ArrowNativeType> FromIterator<T> for BufferBuilder<T> {
381 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
382 let mut builder = Self::default();
383 builder.extend(iter);
384 builder
385 }
386}
387
388#[cfg(test)]
389mod tests {
390 use super::*;
391 use std::mem;
392
393 #[test]
394 fn default() {
395 let builder = BufferBuilder::<u32>::default();
396 assert!(builder.is_empty());
397 assert!(builder.buffer.is_empty());
398 assert_eq!(builder.buffer.capacity(), 0);
399 }
400
401 #[test]
402 fn from_iter() {
403 let input = [1u16, 2, 3, 4];
404 let builder = input.into_iter().collect::<BufferBuilder<_>>();
405 assert_eq!(builder.len(), 4);
406 assert_eq!(builder.buffer.len(), 4 * mem::size_of::<u16>());
407 }
408
409 #[test]
410 fn extend() {
411 let input = [1, 2];
412 let mut builder = input.into_iter().collect::<BufferBuilder<_>>();
413 assert_eq!(builder.len(), 2);
414 builder.extend([3, 4]);
415 assert_eq!(builder.len(), 4);
416 }
417
418 #[test]
419 fn truncate_safety() {
420 let mut builder = BufferBuilder::from(vec![40, -63, 90]);
421 assert_eq!(builder.len(), 3);
422 builder.truncate(151);
423 assert_eq!(builder.len(), 3);
424 builder.advance(219);
425 assert_eq!(builder.len(), 222);
426 let slice = builder.as_slice_mut();
427 assert_eq!(slice.len(), 222);
428 }
429}