arrow_array/builder/
generic_bytes_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::ArrayBuilder;
19use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
20use crate::{Array, ArrayRef, GenericByteArray, OffsetSizeTrait};
21use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, NullBufferBuilder, ScalarBuffer};
22use arrow_data::ArrayDataBuilder;
23use std::any::Any;
24use std::sync::Arc;
25
26/// Builder for [`GenericByteArray`]
27///
28/// For building strings, see docs on [`GenericStringBuilder`].
29/// For building binary, see docs on [`GenericBinaryBuilder`].
30pub struct GenericByteBuilder<T: ByteArrayType> {
31    value_builder: Vec<u8>,
32    offsets_builder: Vec<T::Offset>,
33    null_buffer_builder: NullBufferBuilder,
34}
35
36impl<T: ByteArrayType> GenericByteBuilder<T> {
37    /// Creates a new [`GenericByteBuilder`].
38    pub fn new() -> Self {
39        Self::with_capacity(1024, 1024)
40    }
41
42    /// Creates a new [`GenericByteBuilder`].
43    ///
44    /// - `item_capacity` is the number of items to pre-allocate.
45    ///   The size of the preallocated buffer of offsets is the number of items plus one.
46    /// - `data_capacity` is the total number of bytes of data to pre-allocate
47    ///   (for all items, not per item).
48    pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
49        let mut offsets_builder = Vec::with_capacity(item_capacity + 1);
50        offsets_builder.push(T::Offset::from_usize(0).unwrap());
51        Self {
52            value_builder: Vec::with_capacity(data_capacity),
53            offsets_builder,
54            null_buffer_builder: NullBufferBuilder::new(item_capacity),
55        }
56    }
57
58    /// Creates a new  [`GenericByteBuilder`] from buffers.
59    ///
60    /// # Safety
61    ///
62    /// This doesn't verify buffer contents as it assumes the buffers are from
63    /// existing and valid [`GenericByteArray`].
64    pub unsafe fn new_from_buffer(
65        offsets_buffer: MutableBuffer,
66        value_buffer: MutableBuffer,
67        null_buffer: Option<MutableBuffer>,
68    ) -> Self {
69        let offsets_builder: Vec<T::Offset> =
70            ScalarBuffer::<T::Offset>::from(offsets_buffer).into();
71        let value_builder: Vec<u8> = ScalarBuffer::<u8>::from(value_buffer).into();
72
73        let null_buffer_builder = null_buffer
74            .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1))
75            .unwrap_or_else(|| NullBufferBuilder::new_with_len(offsets_builder.len() - 1));
76
77        Self {
78            offsets_builder,
79            value_builder,
80            null_buffer_builder,
81        }
82    }
83
84    #[inline]
85    fn next_offset(&self) -> T::Offset {
86        T::Offset::from_usize(self.value_builder.len()).expect("byte array offset overflow")
87    }
88
89    /// Appends a value into the builder.
90    ///
91    /// See the [GenericStringBuilder] documentation for examples of
92    /// incrementally building string values with multiple `write!` calls.
93    ///
94    /// # Panics
95    ///
96    /// Panics if the resulting length of [`Self::values_slice`] would exceed
97    /// `T::Offset::MAX` bytes.
98    ///
99    /// For example, this can happen with [`StringArray`] or [`BinaryArray`]
100    /// where the total length of all values exceeds 2GB
101    ///
102    /// [`StringArray`]: crate::StringArray
103    /// [`BinaryArray`]: crate::BinaryArray
104    #[inline]
105    pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
106        self.value_builder
107            .extend_from_slice(value.as_ref().as_ref());
108        self.null_buffer_builder.append(true);
109        self.offsets_builder.push(self.next_offset());
110    }
111
112    /// Append an `Option` value into the builder.
113    ///
114    /// - A `None` value will append a null value.
115    /// - A `Some` value will append the value.
116    ///
117    /// See [`Self::append_value`] for more panic information.
118    #[inline]
119    pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
120        match value {
121            None => self.append_null(),
122            Some(v) => self.append_value(v),
123        };
124    }
125
126    /// Append a null value into the builder.
127    #[inline]
128    pub fn append_null(&mut self) {
129        self.null_buffer_builder.append(false);
130        self.offsets_builder.push(self.next_offset());
131    }
132
133    /// Appends `n` `null`s into the builder.
134    #[inline]
135    pub fn append_nulls(&mut self, n: usize) {
136        self.null_buffer_builder.append_n_nulls(n);
137        let next_offset = self.next_offset();
138        self.offsets_builder
139            .extend(std::iter::repeat_n(next_offset, n));
140    }
141
142    /// Appends array values and null to this builder as is
143    /// (this means that underlying null values are copied as is).
144    #[inline]
145    pub fn append_array(&mut self, array: &GenericByteArray<T>) {
146        if array.len() == 0 {
147            return;
148        }
149
150        let offsets = array.offsets();
151
152        // If the offsets are contiguous, we can append them directly avoiding the need to align
153        // for example, when the first appended array is not sliced (starts at offset 0)
154        if self.next_offset() == offsets[0] {
155            self.offsets_builder.extend_from_slice(&offsets[1..]);
156        } else {
157            // Shifting all the offsets
158            let shift: T::Offset = self.next_offset() - offsets[0];
159
160            // Creating intermediate offsets instead of pushing each offset is faster
161            // (even if we make MutableBuffer to avoid updating length on each push
162            //  and reserve the necessary capacity, it's still slower)
163            let mut intermediate = Vec::with_capacity(offsets.len() - 1);
164
165            for &offset in &offsets[1..] {
166                intermediate.push(offset + shift)
167            }
168
169            self.offsets_builder.extend_from_slice(&intermediate);
170        }
171
172        // Append underlying values, starting from the first offset and ending at the last offset
173        self.value_builder.extend_from_slice(
174            &array.values().as_slice()[offsets[0].as_usize()..offsets[array.len()].as_usize()],
175        );
176
177        if let Some(null_buffer) = array.nulls() {
178            self.null_buffer_builder.append_buffer(null_buffer);
179        } else {
180            self.null_buffer_builder.append_n_non_nulls(array.len());
181        }
182    }
183
184    /// Builds the [`GenericByteArray`] and reset this builder.
185    pub fn finish(&mut self) -> GenericByteArray<T> {
186        let array_type = T::DATA_TYPE;
187        let array_builder = ArrayDataBuilder::new(array_type)
188            .len(self.len())
189            .add_buffer(std::mem::take(&mut self.offsets_builder).into())
190            .add_buffer(std::mem::take(&mut self.value_builder).into())
191            .nulls(self.null_buffer_builder.finish());
192
193        self.offsets_builder.push(self.next_offset());
194        let array_data = unsafe { array_builder.build_unchecked() };
195        GenericByteArray::from(array_data)
196    }
197
198    /// Builds the [`GenericByteArray`] without resetting the builder.
199    pub fn finish_cloned(&self) -> GenericByteArray<T> {
200        let array_type = T::DATA_TYPE;
201        let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
202        let value_buffer = Buffer::from_slice_ref(self.value_builder.as_slice());
203        let array_builder = ArrayDataBuilder::new(array_type)
204            .len(self.len())
205            .add_buffer(offset_buffer)
206            .add_buffer(value_buffer)
207            .nulls(self.null_buffer_builder.finish_cloned());
208
209        let array_data = unsafe { array_builder.build_unchecked() };
210        GenericByteArray::from(array_data)
211    }
212
213    /// Returns the current values buffer as a slice
214    pub fn values_slice(&self) -> &[u8] {
215        self.value_builder.as_slice()
216    }
217
218    /// Returns the current offsets buffer as a slice
219    pub fn offsets_slice(&self) -> &[T::Offset] {
220        self.offsets_builder.as_slice()
221    }
222
223    /// Returns the current null buffer as a slice
224    pub fn validity_slice(&self) -> Option<&[u8]> {
225        self.null_buffer_builder.as_slice()
226    }
227
228    /// Returns the current null buffer as a mutable slice
229    pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
230        self.null_buffer_builder.as_slice_mut()
231    }
232}
233
234impl<T: ByteArrayType> std::fmt::Debug for GenericByteBuilder<T> {
235    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
236        write!(f, "{}{}Builder", T::Offset::PREFIX, T::PREFIX)?;
237        f.debug_struct("")
238            .field("value_builder", &self.value_builder)
239            .field("offsets_builder", &self.offsets_builder)
240            .field("null_buffer_builder", &self.null_buffer_builder)
241            .finish()
242    }
243}
244
245impl<T: ByteArrayType> Default for GenericByteBuilder<T> {
246    fn default() -> Self {
247        Self::new()
248    }
249}
250
251impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
252    /// Returns the number of binary slots in the builder
253    fn len(&self) -> usize {
254        self.null_buffer_builder.len()
255    }
256
257    /// Builds the array and reset this builder.
258    fn finish(&mut self) -> ArrayRef {
259        Arc::new(self.finish())
260    }
261
262    /// Builds the array without resetting the builder.
263    fn finish_cloned(&self) -> ArrayRef {
264        Arc::new(self.finish_cloned())
265    }
266
267    /// Returns the builder as a non-mutable `Any` reference.
268    fn as_any(&self) -> &dyn Any {
269        self
270    }
271
272    /// Returns the builder as a mutable `Any` reference.
273    fn as_any_mut(&mut self) -> &mut dyn Any {
274        self
275    }
276
277    /// Returns the boxed builder as a box of `Any`.
278    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
279        self
280    }
281}
282
283impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBuilder<T> {
284    #[inline]
285    fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
286        for v in iter {
287            self.append_option(v)
288        }
289    }
290}
291
292/// Array builder for [`GenericStringArray`][crate::GenericStringArray]
293///
294/// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
295/// [`GenericByteBuilder::append_null`].
296///
297/// This builder also implements [`std::fmt::Write`] with any written data
298/// included in the next appended value. This allows using [`std::fmt::Display`]
299/// with standard Rust idioms like `write!` and `writeln!` to write data
300/// directly to the builder without intermediate allocations.
301///
302/// # Example writing strings with `append_value`
303/// ```
304/// # use arrow_array::builder::GenericStringBuilder;
305/// let mut builder = GenericStringBuilder::<i32>::new();
306///
307/// // Write one string value
308/// builder.append_value("foobarbaz");
309///
310/// // Write a second string
311/// builder.append_value("v2");
312///
313/// let array = builder.finish();
314/// assert_eq!(array.value(0), "foobarbaz");
315/// assert_eq!(array.value(1), "v2");
316/// ```
317///
318/// # Example incrementally writing strings with `std::fmt::Write`
319///
320/// ```
321/// # use std::fmt::Write;
322/// # use arrow_array::builder::GenericStringBuilder;
323/// let mut builder = GenericStringBuilder::<i32>::new();
324///
325/// // Write data in multiple `write!` calls
326/// write!(builder, "foo").unwrap();
327/// write!(builder, "bar").unwrap();
328/// // The next call to append_value finishes the current string
329/// // including all previously written strings.
330/// builder.append_value("baz");
331///
332/// // Write second value with a single write call
333/// write!(builder, "v2").unwrap();
334/// // finish the value by calling append_value with an empty string
335/// builder.append_value("");
336///
337/// let array = builder.finish();
338/// assert_eq!(array.value(0), "foobarbaz");
339/// assert_eq!(array.value(1), "v2");
340/// ```
341pub type GenericStringBuilder<O> = GenericByteBuilder<GenericStringType<O>>;
342
343impl<O: OffsetSizeTrait> std::fmt::Write for GenericStringBuilder<O> {
344    fn write_str(&mut self, s: &str) -> std::fmt::Result {
345        self.value_builder.extend_from_slice(s.as_bytes());
346        Ok(())
347    }
348}
349
350///  Array builder for [`GenericBinaryArray`][crate::GenericBinaryArray]
351///
352/// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
353/// [`GenericByteBuilder::append_null`].
354///
355/// # Example
356/// ```
357/// # use arrow_array::builder::GenericBinaryBuilder;
358/// let mut builder = GenericBinaryBuilder::<i32>::new();
359///
360/// // Write data
361/// builder.append_value("foo");
362///
363/// // Write second value
364/// builder.append_value(&[0,1,2]);
365///
366/// let array = builder.finish();
367/// // binary values
368/// assert_eq!(array.value(0), b"foo");
369/// assert_eq!(array.value(1), b"\x00\x01\x02");
370/// ```
371///
372/// # Example incrementally writing bytes with `write_bytes`
373///
374/// ```
375/// # use std::io::Write;
376/// # use arrow_array::builder::GenericBinaryBuilder;
377/// let mut builder = GenericBinaryBuilder::<i32>::new();
378///
379/// // Write data in multiple `write_bytes` calls
380/// write!(builder, "foo").unwrap();
381/// write!(builder, "bar").unwrap();
382/// // The next call to append_value finishes the current string
383/// // including all previously written strings.
384/// builder.append_value("baz");
385///
386/// // Write second value with a single write call
387/// write!(builder, "v2").unwrap();
388/// // finish the value by calling append_value with an empty string
389/// builder.append_value("");
390///
391/// let array = builder.finish();
392/// assert_eq!(array.value(0), "foobarbaz".as_bytes());
393/// assert_eq!(array.value(1), "v2".as_bytes());
394/// ```
395pub type GenericBinaryBuilder<O> = GenericByteBuilder<GenericBinaryType<O>>;
396
397impl<O: OffsetSizeTrait> std::io::Write for GenericBinaryBuilder<O> {
398    fn write(&mut self, bs: &[u8]) -> std::io::Result<usize> {
399        self.value_builder.extend_from_slice(bs);
400        Ok(bs.len())
401    }
402
403    fn flush(&mut self) -> std::io::Result<()> {
404        Ok(())
405    }
406}
407
408#[cfg(test)]
409mod tests {
410    use super::*;
411    use crate::array::Array;
412    use crate::GenericStringArray;
413    use arrow_buffer::NullBuffer;
414    use std::fmt::Write as _;
415    use std::io::Write as _;
416
417    fn _test_generic_binary_builder<O: OffsetSizeTrait>() {
418        let mut builder = GenericBinaryBuilder::<O>::new();
419
420        builder.append_value(b"hello");
421        builder.append_value(b"");
422        builder.append_null();
423        builder.append_value(b"rust");
424
425        let array = builder.finish();
426
427        assert_eq!(4, array.len());
428        assert_eq!(1, array.null_count());
429        assert_eq!(b"hello", array.value(0));
430        assert_eq!([] as [u8; 0], array.value(1));
431        assert!(array.is_null(2));
432        assert_eq!(b"rust", array.value(3));
433        assert_eq!(O::from_usize(5).unwrap(), array.value_offsets()[2]);
434        assert_eq!(O::from_usize(4).unwrap(), array.value_length(3));
435    }
436
437    #[test]
438    fn test_binary_builder() {
439        _test_generic_binary_builder::<i32>()
440    }
441
442    #[test]
443    fn test_large_binary_builder() {
444        _test_generic_binary_builder::<i64>()
445    }
446
447    fn _test_generic_binary_builder_all_nulls<O: OffsetSizeTrait>() {
448        let mut builder = GenericBinaryBuilder::<O>::new();
449        builder.append_null();
450        builder.append_null();
451        builder.append_null();
452        builder.append_nulls(2);
453        assert_eq!(5, builder.len());
454        assert!(!builder.is_empty());
455
456        let array = builder.finish();
457        assert_eq!(5, array.null_count());
458        assert_eq!(5, array.len());
459        assert!(array.is_null(0));
460        assert!(array.is_null(1));
461        assert!(array.is_null(2));
462        assert!(array.is_null(3));
463        assert!(array.is_null(4));
464    }
465
466    #[test]
467    fn test_binary_builder_all_nulls() {
468        _test_generic_binary_builder_all_nulls::<i32>()
469    }
470
471    #[test]
472    fn test_large_binary_builder_all_nulls() {
473        _test_generic_binary_builder_all_nulls::<i64>()
474    }
475
476    fn _test_generic_binary_builder_reset<O: OffsetSizeTrait>() {
477        let mut builder = GenericBinaryBuilder::<O>::new();
478
479        builder.append_value(b"hello");
480        builder.append_value(b"");
481        builder.append_null();
482        builder.append_value(b"rust");
483        builder.finish();
484
485        assert!(builder.is_empty());
486
487        builder.append_value(b"parquet");
488        builder.append_null();
489        builder.append_value(b"arrow");
490        builder.append_value(b"");
491        builder.append_nulls(2);
492        builder.append_value(b"hi");
493        let array = builder.finish();
494
495        assert_eq!(7, array.len());
496        assert_eq!(3, array.null_count());
497        assert_eq!(b"parquet", array.value(0));
498        assert!(array.is_null(1));
499        assert!(array.is_null(4));
500        assert!(array.is_null(5));
501        assert_eq!(b"arrow", array.value(2));
502        assert_eq!(b"", array.value(1));
503        assert_eq!(b"hi", array.value(6));
504
505        assert_eq!(O::zero(), array.value_offsets()[0]);
506        assert_eq!(O::from_usize(7).unwrap(), array.value_offsets()[2]);
507        assert_eq!(O::from_usize(14).unwrap(), array.value_offsets()[7]);
508        assert_eq!(O::from_usize(5).unwrap(), array.value_length(2));
509    }
510
511    #[test]
512    fn test_binary_builder_reset() {
513        _test_generic_binary_builder_reset::<i32>()
514    }
515
516    #[test]
517    fn test_large_binary_builder_reset() {
518        _test_generic_binary_builder_reset::<i64>()
519    }
520
521    fn _test_generic_string_array_builder<O: OffsetSizeTrait>() {
522        let mut builder = GenericStringBuilder::<O>::new();
523        let owned = "arrow".to_owned();
524
525        builder.append_value("hello");
526        builder.append_value("");
527        builder.append_value(&owned);
528        builder.append_null();
529        builder.append_option(Some("rust"));
530        builder.append_option(None::<&str>);
531        builder.append_option(None::<String>);
532        builder.append_nulls(2);
533        builder.append_value("parquet");
534        assert_eq!(10, builder.len());
535
536        assert_eq!(
537            GenericStringArray::<O>::from(vec![
538                Some("hello"),
539                Some(""),
540                Some("arrow"),
541                None,
542                Some("rust"),
543                None,
544                None,
545                None,
546                None,
547                Some("parquet")
548            ]),
549            builder.finish()
550        );
551    }
552
553    #[test]
554    fn test_string_array_builder() {
555        _test_generic_string_array_builder::<i32>()
556    }
557
558    #[test]
559    fn test_large_string_array_builder() {
560        _test_generic_string_array_builder::<i64>()
561    }
562
563    fn _test_generic_string_array_builder_finish<O: OffsetSizeTrait>() {
564        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
565
566        builder.append_value("hello");
567        builder.append_value("rust");
568        builder.append_null();
569
570        builder.finish();
571        assert!(builder.is_empty());
572        assert_eq!(&[O::zero()], builder.offsets_slice());
573
574        builder.append_value("arrow");
575        builder.append_value("parquet");
576        let arr = builder.finish();
577        // array should not have null buffer because there is not `null` value.
578        assert!(arr.nulls().is_none());
579        assert_eq!(GenericStringArray::<O>::from(vec!["arrow", "parquet"]), arr,)
580    }
581
582    #[test]
583    fn test_string_array_builder_finish() {
584        _test_generic_string_array_builder_finish::<i32>()
585    }
586
587    #[test]
588    fn test_large_string_array_builder_finish() {
589        _test_generic_string_array_builder_finish::<i64>()
590    }
591
592    fn _test_generic_string_array_builder_finish_cloned<O: OffsetSizeTrait>() {
593        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
594
595        builder.append_value("hello");
596        builder.append_value("rust");
597        builder.append_null();
598
599        let mut arr = builder.finish_cloned();
600        assert!(!builder.is_empty());
601        assert_eq!(3, arr.len());
602
603        builder.append_value("arrow");
604        builder.append_value("parquet");
605        arr = builder.finish();
606
607        assert!(arr.nulls().is_some());
608        assert_eq!(&[O::zero()], builder.offsets_slice());
609        assert_eq!(5, arr.len());
610    }
611
612    #[test]
613    fn test_string_array_builder_finish_cloned() {
614        _test_generic_string_array_builder_finish_cloned::<i32>()
615    }
616
617    #[test]
618    fn test_large_string_array_builder_finish_cloned() {
619        _test_generic_string_array_builder_finish_cloned::<i64>()
620    }
621
622    #[test]
623    fn test_extend() {
624        let mut builder = GenericStringBuilder::<i32>::new();
625        builder.extend(["a", "b", "c", "", "a", "b", "c"].into_iter().map(Some));
626        builder.extend(["d", "cupcakes", "hello"].into_iter().map(Some));
627        let array = builder.finish();
628        assert_eq!(array.value_offsets(), &[0, 1, 2, 3, 3, 4, 5, 6, 7, 15, 20]);
629        assert_eq!(array.value_data(), b"abcabcdcupcakeshello");
630    }
631
632    #[test]
633    fn test_write_str() {
634        let mut builder = GenericStringBuilder::<i32>::new();
635        write!(builder, "foo").unwrap();
636        builder.append_value("");
637        writeln!(builder, "bar").unwrap();
638        builder.append_value("");
639        write!(builder, "fiz").unwrap();
640        write!(builder, "buz").unwrap();
641        builder.append_value("");
642        let a = builder.finish();
643        let r: Vec<_> = a.iter().flatten().collect();
644        assert_eq!(r, &["foo", "bar\n", "fizbuz"])
645    }
646
647    #[test]
648    fn test_write_bytes() {
649        let mut builder = GenericBinaryBuilder::<i32>::new();
650        write!(builder, "foo").unwrap();
651        builder.append_value("");
652        writeln!(builder, "bar").unwrap();
653        builder.append_value("");
654        write!(builder, "fiz").unwrap();
655        write!(builder, "buz").unwrap();
656        builder.append_value("");
657        let a = builder.finish();
658        let r: Vec<_> = a.iter().flatten().collect();
659        assert_eq!(
660            r,
661            &["foo".as_bytes(), "bar\n".as_bytes(), "fizbuz".as_bytes()]
662        )
663    }
664
665    #[test]
666    fn test_append_array_without_nulls() {
667        let input = vec![
668            "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing", "well",
669            "thank", "you", "for", "asking",
670        ];
671        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
672        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
673        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
674
675        let mut builder = GenericStringBuilder::<i32>::new();
676        builder.append_array(&arr1);
677        builder.append_array(&arr2);
678        builder.append_array(&arr3);
679
680        let actual = builder.finish();
681        let expected = GenericStringArray::<i32>::from(input);
682
683        assert_eq!(actual, expected);
684    }
685
686    #[test]
687    fn test_append_array_with_nulls() {
688        let input = vec![
689            Some("hello"),
690            None,
691            Some("how"),
692            None,
693            None,
694            None,
695            None,
696            Some("I"),
697            Some("am"),
698            Some("doing"),
699            Some("well"),
700        ];
701        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
702        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
703        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
704
705        let mut builder = GenericStringBuilder::<i32>::new();
706        builder.append_array(&arr1);
707        builder.append_array(&arr2);
708        builder.append_array(&arr3);
709
710        let actual = builder.finish();
711        let expected = GenericStringArray::<i32>::from(input);
712
713        assert_eq!(actual, expected);
714    }
715
716    #[test]
717    fn test_append_empty_array() {
718        let arr = GenericStringArray::<i32>::from(Vec::<&str>::new());
719        let mut builder = GenericStringBuilder::<i32>::new();
720        builder.append_array(&arr);
721        let result = builder.finish();
722        assert_eq!(result.len(), 0);
723    }
724
725    #[test]
726    fn test_append_array_with_offset_not_starting_at_0() {
727        let input = vec![
728            Some("hello"),
729            None,
730            Some("how"),
731            None,
732            None,
733            None,
734            None,
735            Some("I"),
736            Some("am"),
737            Some("doing"),
738            Some("well"),
739        ];
740        let full_array = GenericStringArray::<i32>::from(input);
741        let sliced = full_array.slice(1, 4);
742
743        assert_ne!(sliced.offsets()[0].as_usize(), 0);
744        assert_ne!(sliced.offsets().last(), full_array.offsets().last());
745
746        let mut builder = GenericStringBuilder::<i32>::new();
747        builder.append_array(&sliced);
748        let actual = builder.finish();
749
750        let expected = GenericStringArray::<i32>::from(vec![None, Some("how"), None, None]);
751
752        assert_eq!(actual, expected);
753    }
754
755    #[test]
756    fn test_append_underlying_null_values_added_as_is() {
757        let input_1_array_with_nulls = {
758            let input = vec![
759                "hello", "world", "how", "are", "you", "doing", "today", "I", "am",
760            ];
761            let (offsets, buffer, _) = GenericStringArray::<i32>::from(input).into_parts();
762
763            GenericStringArray::<i32>::new(
764                offsets,
765                buffer,
766                Some(NullBuffer::from(&[
767                    true, false, true, false, false, true, true, true, false,
768                ])),
769            )
770        };
771        let input_2_array_with_nulls = {
772            let input = vec!["doing", "well", "thank", "you", "for", "asking"];
773            let (offsets, buffer, _) = GenericStringArray::<i32>::from(input).into_parts();
774
775            GenericStringArray::<i32>::new(
776                offsets,
777                buffer,
778                Some(NullBuffer::from(&[false, false, true, false, true, true])),
779            )
780        };
781
782        let mut builder = GenericStringBuilder::<i32>::new();
783        builder.append_array(&input_1_array_with_nulls);
784        builder.append_array(&input_2_array_with_nulls);
785
786        let actual = builder.finish();
787        let expected = GenericStringArray::<i32>::from(vec![
788            Some("hello"),
789            None, // world
790            Some("how"),
791            None, // are
792            None, // you
793            Some("doing"),
794            Some("today"),
795            Some("I"),
796            None, // am
797            None, // doing
798            None, // well
799            Some("thank"),
800            None, // "you",
801            Some("for"),
802            Some("asking"),
803        ]);
804
805        assert_eq!(actual, expected);
806
807        let expected_underlying_buffer = Buffer::from(
808            [
809                "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing",
810                "well", "thank", "you", "for", "asking",
811            ]
812            .join("")
813            .as_bytes(),
814        );
815        assert_eq!(actual.values(), &expected_underlying_buffer);
816    }
817
818    #[test]
819    fn append_array_with_continues_indices() {
820        let input = vec![
821            "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing", "well",
822            "thank", "you", "for", "asking",
823        ];
824        let full_array = GenericStringArray::<i32>::from(input);
825        let slice1 = full_array.slice(0, 3);
826        let slice2 = full_array.slice(3, 4);
827        let slice3 = full_array.slice(7, full_array.len() - 7);
828
829        let mut builder = GenericStringBuilder::<i32>::new();
830        builder.append_array(&slice1);
831        builder.append_array(&slice2);
832        builder.append_array(&slice3);
833
834        let actual = builder.finish();
835
836        assert_eq!(actual, full_array);
837    }
838}