arrow_array/builder/
generic_bytes_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::{ArrayBuilder, BufferBuilder, UInt8BufferBuilder};
19use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
20use crate::{Array, ArrayRef, GenericByteArray, OffsetSizeTrait};
21use arrow_buffer::NullBufferBuilder;
22use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
23use arrow_data::ArrayDataBuilder;
24use std::any::Any;
25use std::sync::Arc;
26
27/// Builder for [`GenericByteArray`]
28///
29/// For building strings, see docs on [`GenericStringBuilder`].
30/// For building binary, see docs on [`GenericBinaryBuilder`].
31pub struct GenericByteBuilder<T: ByteArrayType> {
32    value_builder: UInt8BufferBuilder,
33    offsets_builder: BufferBuilder<T::Offset>,
34    null_buffer_builder: NullBufferBuilder,
35}
36
37impl<T: ByteArrayType> GenericByteBuilder<T> {
38    /// Creates a new [`GenericByteBuilder`].
39    pub fn new() -> Self {
40        Self::with_capacity(1024, 1024)
41    }
42
43    /// Creates a new [`GenericByteBuilder`].
44    ///
45    /// - `item_capacity` is the number of items to pre-allocate.
46    ///   The size of the preallocated buffer of offsets is the number of items plus one.
47    /// - `data_capacity` is the total number of bytes of data to pre-allocate
48    ///   (for all items, not per item).
49    pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
50        let mut offsets_builder = BufferBuilder::<T::Offset>::new(item_capacity + 1);
51        offsets_builder.append(T::Offset::from_usize(0).unwrap());
52        Self {
53            value_builder: UInt8BufferBuilder::new(data_capacity),
54            offsets_builder,
55            null_buffer_builder: NullBufferBuilder::new(item_capacity),
56        }
57    }
58
59    /// Creates a new  [`GenericByteBuilder`] from buffers.
60    ///
61    /// # Safety
62    ///
63    /// This doesn't verify buffer contents as it assumes the buffers are from
64    /// existing and valid [`GenericByteArray`].
65    pub unsafe fn new_from_buffer(
66        offsets_buffer: MutableBuffer,
67        value_buffer: MutableBuffer,
68        null_buffer: Option<MutableBuffer>,
69    ) -> Self {
70        let offsets_builder = BufferBuilder::<T::Offset>::new_from_buffer(offsets_buffer);
71        let value_builder = BufferBuilder::<u8>::new_from_buffer(value_buffer);
72
73        let null_buffer_builder = null_buffer
74            .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1))
75            .unwrap_or_else(|| NullBufferBuilder::new_with_len(offsets_builder.len() - 1));
76
77        Self {
78            offsets_builder,
79            value_builder,
80            null_buffer_builder,
81        }
82    }
83
84    #[inline]
85    fn next_offset(&self) -> T::Offset {
86        T::Offset::from_usize(self.value_builder.len()).expect("byte array offset overflow")
87    }
88
89    /// Appends a value into the builder.
90    ///
91    /// See the [GenericStringBuilder] documentation for examples of
92    /// incrementally building string values with multiple `write!` calls.
93    ///
94    /// # Panics
95    ///
96    /// Panics if the resulting length of [`Self::values_slice`] would exceed
97    /// `T::Offset::MAX` bytes.
98    ///
99    /// For example, this can happen with [`StringArray`] or [`BinaryArray`]
100    /// where the total length of all values exceeds 2GB
101    ///
102    /// [`StringArray`]: crate::StringArray
103    /// [`BinaryArray`]: crate::BinaryArray
104    #[inline]
105    pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
106        self.value_builder.append_slice(value.as_ref().as_ref());
107        self.null_buffer_builder.append(true);
108        self.offsets_builder.append(self.next_offset());
109    }
110
111    /// Append an `Option` value into the builder.
112    ///
113    /// - A `None` value will append a null value.
114    /// - A `Some` value will append the value.
115    ///
116    /// See [`Self::append_value`] for more panic information.
117    #[inline]
118    pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
119        match value {
120            None => self.append_null(),
121            Some(v) => self.append_value(v),
122        };
123    }
124
125    /// Append a null value into the builder.
126    #[inline]
127    pub fn append_null(&mut self) {
128        self.null_buffer_builder.append(false);
129        self.offsets_builder.append(self.next_offset());
130    }
131
132    /// Appends `n` `null`s into the builder.
133    #[inline]
134    pub fn append_nulls(&mut self, n: usize) {
135        self.null_buffer_builder.append_n_nulls(n);
136        let next_offset = self.next_offset();
137        self.offsets_builder.append_n(n, next_offset);
138    }
139
140    /// Appends array values and null to this builder as is
141    /// (this means that underlying null values are copied as is).
142    #[inline]
143    pub fn append_array(&mut self, array: &GenericByteArray<T>) {
144        if array.len() == 0 {
145            return;
146        }
147
148        let offsets = array.offsets();
149
150        // If the offsets are contiguous, we can append them directly avoiding the need to align
151        // for example, when the first appended array is not sliced (starts at offset 0)
152        if self.next_offset() == offsets[0] {
153            self.offsets_builder.append_slice(&offsets[1..]);
154        } else {
155            // Shifting all the offsets
156            let shift: T::Offset = self.next_offset() - offsets[0];
157
158            // Creating intermediate offsets instead of pushing each offset is faster
159            // (even if we make MutableBuffer to avoid updating length on each push
160            //  and reserve the necessary capacity, it's still slower)
161            let mut intermediate = Vec::with_capacity(offsets.len() - 1);
162
163            for &offset in &offsets[1..] {
164                intermediate.push(offset + shift)
165            }
166
167            self.offsets_builder.append_slice(&intermediate);
168        }
169
170        // Append underlying values, starting from the first offset and ending at the last offset
171        self.value_builder.append_slice(
172            &array.values().as_slice()[offsets[0].as_usize()..offsets[array.len()].as_usize()],
173        );
174
175        if let Some(null_buffer) = array.nulls() {
176            self.null_buffer_builder.append_buffer(null_buffer);
177        } else {
178            self.null_buffer_builder.append_n_non_nulls(array.len());
179        }
180    }
181
182    /// Builds the [`GenericByteArray`] and reset this builder.
183    pub fn finish(&mut self) -> GenericByteArray<T> {
184        let array_type = T::DATA_TYPE;
185        let array_builder = ArrayDataBuilder::new(array_type)
186            .len(self.len())
187            .add_buffer(self.offsets_builder.finish())
188            .add_buffer(self.value_builder.finish())
189            .nulls(self.null_buffer_builder.finish());
190
191        self.offsets_builder.append(self.next_offset());
192        let array_data = unsafe { array_builder.build_unchecked() };
193        GenericByteArray::from(array_data)
194    }
195
196    /// Builds the [`GenericByteArray`] without resetting the builder.
197    pub fn finish_cloned(&self) -> GenericByteArray<T> {
198        let array_type = T::DATA_TYPE;
199        let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
200        let value_buffer = Buffer::from_slice_ref(self.value_builder.as_slice());
201        let array_builder = ArrayDataBuilder::new(array_type)
202            .len(self.len())
203            .add_buffer(offset_buffer)
204            .add_buffer(value_buffer)
205            .nulls(self.null_buffer_builder.finish_cloned());
206
207        let array_data = unsafe { array_builder.build_unchecked() };
208        GenericByteArray::from(array_data)
209    }
210
211    /// Returns the current values buffer as a slice
212    pub fn values_slice(&self) -> &[u8] {
213        self.value_builder.as_slice()
214    }
215
216    /// Returns the current offsets buffer as a slice
217    pub fn offsets_slice(&self) -> &[T::Offset] {
218        self.offsets_builder.as_slice()
219    }
220
221    /// Returns the current null buffer as a slice
222    pub fn validity_slice(&self) -> Option<&[u8]> {
223        self.null_buffer_builder.as_slice()
224    }
225
226    /// Returns the current null buffer as a mutable slice
227    pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
228        self.null_buffer_builder.as_slice_mut()
229    }
230}
231
232impl<T: ByteArrayType> std::fmt::Debug for GenericByteBuilder<T> {
233    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
234        write!(f, "{}{}Builder", T::Offset::PREFIX, T::PREFIX)?;
235        f.debug_struct("")
236            .field("value_builder", &self.value_builder)
237            .field("offsets_builder", &self.offsets_builder)
238            .field("null_buffer_builder", &self.null_buffer_builder)
239            .finish()
240    }
241}
242
243impl<T: ByteArrayType> Default for GenericByteBuilder<T> {
244    fn default() -> Self {
245        Self::new()
246    }
247}
248
249impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
250    /// Returns the number of binary slots in the builder
251    fn len(&self) -> usize {
252        self.null_buffer_builder.len()
253    }
254
255    /// Builds the array and reset this builder.
256    fn finish(&mut self) -> ArrayRef {
257        Arc::new(self.finish())
258    }
259
260    /// Builds the array without resetting the builder.
261    fn finish_cloned(&self) -> ArrayRef {
262        Arc::new(self.finish_cloned())
263    }
264
265    /// Returns the builder as a non-mutable `Any` reference.
266    fn as_any(&self) -> &dyn Any {
267        self
268    }
269
270    /// Returns the builder as a mutable `Any` reference.
271    fn as_any_mut(&mut self) -> &mut dyn Any {
272        self
273    }
274
275    /// Returns the boxed builder as a box of `Any`.
276    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
277        self
278    }
279}
280
281impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBuilder<T> {
282    #[inline]
283    fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
284        for v in iter {
285            self.append_option(v)
286        }
287    }
288}
289
290/// Array builder for [`GenericStringArray`][crate::GenericStringArray]
291///
292/// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
293/// [`GenericByteBuilder::append_null`].
294///
295/// This builder also implements [`std::fmt::Write`] with any written data
296/// included in the next appended value. This allows using [`std::fmt::Display`]
297/// with standard Rust idioms like `write!` and `writeln!` to write data
298/// directly to the builder without intermediate allocations.
299///
300/// # Example writing strings with `append_value`
301/// ```
302/// # use arrow_array::builder::GenericStringBuilder;
303/// let mut builder = GenericStringBuilder::<i32>::new();
304///
305/// // Write one string value
306/// builder.append_value("foobarbaz");
307///
308/// // Write a second string
309/// builder.append_value("v2");
310///
311/// let array = builder.finish();
312/// assert_eq!(array.value(0), "foobarbaz");
313/// assert_eq!(array.value(1), "v2");
314/// ```
315///
316/// # Example incrementally writing strings with `std::fmt::Write`
317///
318/// ```
319/// # use std::fmt::Write;
320/// # use arrow_array::builder::GenericStringBuilder;
321/// let mut builder = GenericStringBuilder::<i32>::new();
322///
323/// // Write data in multiple `write!` calls
324/// write!(builder, "foo").unwrap();
325/// write!(builder, "bar").unwrap();
326/// // The next call to append_value finishes the current string
327/// // including all previously written strings.
328/// builder.append_value("baz");
329///
330/// // Write second value with a single write call
331/// write!(builder, "v2").unwrap();
332/// // finish the value by calling append_value with an empty string
333/// builder.append_value("");
334///
335/// let array = builder.finish();
336/// assert_eq!(array.value(0), "foobarbaz");
337/// assert_eq!(array.value(1), "v2");
338/// ```
339pub type GenericStringBuilder<O> = GenericByteBuilder<GenericStringType<O>>;
340
341impl<O: OffsetSizeTrait> std::fmt::Write for GenericStringBuilder<O> {
342    fn write_str(&mut self, s: &str) -> std::fmt::Result {
343        self.value_builder.append_slice(s.as_bytes());
344        Ok(())
345    }
346}
347
348///  Array builder for [`GenericBinaryArray`][crate::GenericBinaryArray]
349///
350/// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
351/// [`GenericByteBuilder::append_null`].
352///
353/// # Example
354/// ```
355/// # use arrow_array::builder::GenericBinaryBuilder;
356/// let mut builder = GenericBinaryBuilder::<i32>::new();
357///
358/// // Write data
359/// builder.append_value("foo");
360///
361/// // Write second value
362/// builder.append_value(&[0,1,2]);
363///
364/// let array = builder.finish();
365/// // binary values
366/// assert_eq!(array.value(0), b"foo");
367/// assert_eq!(array.value(1), b"\x00\x01\x02");
368/// ```
369///
370/// # Example incrementally writing bytes with `write_bytes`
371///
372/// ```
373/// # use std::io::Write;
374/// # use arrow_array::builder::GenericBinaryBuilder;
375/// let mut builder = GenericBinaryBuilder::<i32>::new();
376///
377/// // Write data in multiple `write_bytes` calls
378/// write!(builder, "foo").unwrap();
379/// write!(builder, "bar").unwrap();
380/// // The next call to append_value finishes the current string
381/// // including all previously written strings.
382/// builder.append_value("baz");
383///
384/// // Write second value with a single write call
385/// write!(builder, "v2").unwrap();
386/// // finish the value by calling append_value with an empty string
387/// builder.append_value("");
388///
389/// let array = builder.finish();
390/// assert_eq!(array.value(0), "foobarbaz".as_bytes());
391/// assert_eq!(array.value(1), "v2".as_bytes());
392/// ```
393pub type GenericBinaryBuilder<O> = GenericByteBuilder<GenericBinaryType<O>>;
394
395impl<O: OffsetSizeTrait> std::io::Write for GenericBinaryBuilder<O> {
396    fn write(&mut self, bs: &[u8]) -> std::io::Result<usize> {
397        self.value_builder.append_slice(bs);
398        Ok(bs.len())
399    }
400
401    fn flush(&mut self) -> std::io::Result<()> {
402        Ok(())
403    }
404}
405
406#[cfg(test)]
407mod tests {
408    use super::*;
409    use crate::array::Array;
410    use crate::GenericStringArray;
411    use arrow_buffer::NullBuffer;
412    use std::fmt::Write as _;
413    use std::io::Write as _;
414
415    fn _test_generic_binary_builder<O: OffsetSizeTrait>() {
416        let mut builder = GenericBinaryBuilder::<O>::new();
417
418        builder.append_value(b"hello");
419        builder.append_value(b"");
420        builder.append_null();
421        builder.append_value(b"rust");
422
423        let array = builder.finish();
424
425        assert_eq!(4, array.len());
426        assert_eq!(1, array.null_count());
427        assert_eq!(b"hello", array.value(0));
428        assert_eq!([] as [u8; 0], array.value(1));
429        assert!(array.is_null(2));
430        assert_eq!(b"rust", array.value(3));
431        assert_eq!(O::from_usize(5).unwrap(), array.value_offsets()[2]);
432        assert_eq!(O::from_usize(4).unwrap(), array.value_length(3));
433    }
434
435    #[test]
436    fn test_binary_builder() {
437        _test_generic_binary_builder::<i32>()
438    }
439
440    #[test]
441    fn test_large_binary_builder() {
442        _test_generic_binary_builder::<i64>()
443    }
444
445    fn _test_generic_binary_builder_all_nulls<O: OffsetSizeTrait>() {
446        let mut builder = GenericBinaryBuilder::<O>::new();
447        builder.append_null();
448        builder.append_null();
449        builder.append_null();
450        builder.append_nulls(2);
451        assert_eq!(5, builder.len());
452        assert!(!builder.is_empty());
453
454        let array = builder.finish();
455        assert_eq!(5, array.null_count());
456        assert_eq!(5, array.len());
457        assert!(array.is_null(0));
458        assert!(array.is_null(1));
459        assert!(array.is_null(2));
460        assert!(array.is_null(3));
461        assert!(array.is_null(4));
462    }
463
464    #[test]
465    fn test_binary_builder_all_nulls() {
466        _test_generic_binary_builder_all_nulls::<i32>()
467    }
468
469    #[test]
470    fn test_large_binary_builder_all_nulls() {
471        _test_generic_binary_builder_all_nulls::<i64>()
472    }
473
474    fn _test_generic_binary_builder_reset<O: OffsetSizeTrait>() {
475        let mut builder = GenericBinaryBuilder::<O>::new();
476
477        builder.append_value(b"hello");
478        builder.append_value(b"");
479        builder.append_null();
480        builder.append_value(b"rust");
481        builder.finish();
482
483        assert!(builder.is_empty());
484
485        builder.append_value(b"parquet");
486        builder.append_null();
487        builder.append_value(b"arrow");
488        builder.append_value(b"");
489        builder.append_nulls(2);
490        builder.append_value(b"hi");
491        let array = builder.finish();
492
493        assert_eq!(7, array.len());
494        assert_eq!(3, array.null_count());
495        assert_eq!(b"parquet", array.value(0));
496        assert!(array.is_null(1));
497        assert!(array.is_null(4));
498        assert!(array.is_null(5));
499        assert_eq!(b"arrow", array.value(2));
500        assert_eq!(b"", array.value(1));
501        assert_eq!(b"hi", array.value(6));
502
503        assert_eq!(O::zero(), array.value_offsets()[0]);
504        assert_eq!(O::from_usize(7).unwrap(), array.value_offsets()[2]);
505        assert_eq!(O::from_usize(14).unwrap(), array.value_offsets()[7]);
506        assert_eq!(O::from_usize(5).unwrap(), array.value_length(2));
507    }
508
509    #[test]
510    fn test_binary_builder_reset() {
511        _test_generic_binary_builder_reset::<i32>()
512    }
513
514    #[test]
515    fn test_large_binary_builder_reset() {
516        _test_generic_binary_builder_reset::<i64>()
517    }
518
519    fn _test_generic_string_array_builder<O: OffsetSizeTrait>() {
520        let mut builder = GenericStringBuilder::<O>::new();
521        let owned = "arrow".to_owned();
522
523        builder.append_value("hello");
524        builder.append_value("");
525        builder.append_value(&owned);
526        builder.append_null();
527        builder.append_option(Some("rust"));
528        builder.append_option(None::<&str>);
529        builder.append_option(None::<String>);
530        builder.append_nulls(2);
531        builder.append_value("parquet");
532        assert_eq!(10, builder.len());
533
534        assert_eq!(
535            GenericStringArray::<O>::from(vec![
536                Some("hello"),
537                Some(""),
538                Some("arrow"),
539                None,
540                Some("rust"),
541                None,
542                None,
543                None,
544                None,
545                Some("parquet")
546            ]),
547            builder.finish()
548        );
549    }
550
551    #[test]
552    fn test_string_array_builder() {
553        _test_generic_string_array_builder::<i32>()
554    }
555
556    #[test]
557    fn test_large_string_array_builder() {
558        _test_generic_string_array_builder::<i64>()
559    }
560
561    fn _test_generic_string_array_builder_finish<O: OffsetSizeTrait>() {
562        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
563
564        builder.append_value("hello");
565        builder.append_value("rust");
566        builder.append_null();
567
568        builder.finish();
569        assert!(builder.is_empty());
570        assert_eq!(&[O::zero()], builder.offsets_slice());
571
572        builder.append_value("arrow");
573        builder.append_value("parquet");
574        let arr = builder.finish();
575        // array should not have null buffer because there is not `null` value.
576        assert!(arr.nulls().is_none());
577        assert_eq!(GenericStringArray::<O>::from(vec!["arrow", "parquet"]), arr,)
578    }
579
580    #[test]
581    fn test_string_array_builder_finish() {
582        _test_generic_string_array_builder_finish::<i32>()
583    }
584
585    #[test]
586    fn test_large_string_array_builder_finish() {
587        _test_generic_string_array_builder_finish::<i64>()
588    }
589
590    fn _test_generic_string_array_builder_finish_cloned<O: OffsetSizeTrait>() {
591        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
592
593        builder.append_value("hello");
594        builder.append_value("rust");
595        builder.append_null();
596
597        let mut arr = builder.finish_cloned();
598        assert!(!builder.is_empty());
599        assert_eq!(3, arr.len());
600
601        builder.append_value("arrow");
602        builder.append_value("parquet");
603        arr = builder.finish();
604
605        assert!(arr.nulls().is_some());
606        assert_eq!(&[O::zero()], builder.offsets_slice());
607        assert_eq!(5, arr.len());
608    }
609
610    #[test]
611    fn test_string_array_builder_finish_cloned() {
612        _test_generic_string_array_builder_finish_cloned::<i32>()
613    }
614
615    #[test]
616    fn test_large_string_array_builder_finish_cloned() {
617        _test_generic_string_array_builder_finish_cloned::<i64>()
618    }
619
620    #[test]
621    fn test_extend() {
622        let mut builder = GenericStringBuilder::<i32>::new();
623        builder.extend(["a", "b", "c", "", "a", "b", "c"].into_iter().map(Some));
624        builder.extend(["d", "cupcakes", "hello"].into_iter().map(Some));
625        let array = builder.finish();
626        assert_eq!(array.value_offsets(), &[0, 1, 2, 3, 3, 4, 5, 6, 7, 15, 20]);
627        assert_eq!(array.value_data(), b"abcabcdcupcakeshello");
628    }
629
630    #[test]
631    fn test_write_str() {
632        let mut builder = GenericStringBuilder::<i32>::new();
633        write!(builder, "foo").unwrap();
634        builder.append_value("");
635        writeln!(builder, "bar").unwrap();
636        builder.append_value("");
637        write!(builder, "fiz").unwrap();
638        write!(builder, "buz").unwrap();
639        builder.append_value("");
640        let a = builder.finish();
641        let r: Vec<_> = a.iter().flatten().collect();
642        assert_eq!(r, &["foo", "bar\n", "fizbuz"])
643    }
644
645    #[test]
646    fn test_write_bytes() {
647        let mut builder = GenericBinaryBuilder::<i32>::new();
648        write!(builder, "foo").unwrap();
649        builder.append_value("");
650        writeln!(builder, "bar").unwrap();
651        builder.append_value("");
652        write!(builder, "fiz").unwrap();
653        write!(builder, "buz").unwrap();
654        builder.append_value("");
655        let a = builder.finish();
656        let r: Vec<_> = a.iter().flatten().collect();
657        assert_eq!(
658            r,
659            &["foo".as_bytes(), "bar\n".as_bytes(), "fizbuz".as_bytes()]
660        )
661    }
662
663    #[test]
664    fn test_append_array_without_nulls() {
665        let input = vec![
666            "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing", "well",
667            "thank", "you", "for", "asking",
668        ];
669        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
670        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
671        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
672
673        let mut builder = GenericStringBuilder::<i32>::new();
674        builder.append_array(&arr1);
675        builder.append_array(&arr2);
676        builder.append_array(&arr3);
677
678        let actual = builder.finish();
679        let expected = GenericStringArray::<i32>::from(input);
680
681        assert_eq!(actual, expected);
682    }
683
684    #[test]
685    fn test_append_array_with_nulls() {
686        let input = vec![
687            Some("hello"),
688            None,
689            Some("how"),
690            None,
691            None,
692            None,
693            None,
694            Some("I"),
695            Some("am"),
696            Some("doing"),
697            Some("well"),
698        ];
699        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
700        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
701        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
702
703        let mut builder = GenericStringBuilder::<i32>::new();
704        builder.append_array(&arr1);
705        builder.append_array(&arr2);
706        builder.append_array(&arr3);
707
708        let actual = builder.finish();
709        let expected = GenericStringArray::<i32>::from(input);
710
711        assert_eq!(actual, expected);
712    }
713
714    #[test]
715    fn test_append_empty_array() {
716        let arr = GenericStringArray::<i32>::from(Vec::<&str>::new());
717        let mut builder = GenericStringBuilder::<i32>::new();
718        builder.append_array(&arr);
719        let result = builder.finish();
720        assert_eq!(result.len(), 0);
721    }
722
723    #[test]
724    fn test_append_array_with_offset_not_starting_at_0() {
725        let input = vec![
726            Some("hello"),
727            None,
728            Some("how"),
729            None,
730            None,
731            None,
732            None,
733            Some("I"),
734            Some("am"),
735            Some("doing"),
736            Some("well"),
737        ];
738        let full_array = GenericStringArray::<i32>::from(input);
739        let sliced = full_array.slice(1, 4);
740
741        assert_ne!(sliced.offsets()[0].as_usize(), 0);
742        assert_ne!(sliced.offsets().last(), full_array.offsets().last());
743
744        let mut builder = GenericStringBuilder::<i32>::new();
745        builder.append_array(&sliced);
746        let actual = builder.finish();
747
748        let expected = GenericStringArray::<i32>::from(vec![None, Some("how"), None, None]);
749
750        assert_eq!(actual, expected);
751    }
752
753    #[test]
754    fn test_append_underlying_null_values_added_as_is() {
755        let input_1_array_with_nulls = {
756            let input = vec![
757                "hello", "world", "how", "are", "you", "doing", "today", "I", "am",
758            ];
759            let (offsets, buffer, _) = GenericStringArray::<i32>::from(input).into_parts();
760
761            GenericStringArray::<i32>::new(
762                offsets,
763                buffer,
764                Some(NullBuffer::from(&[
765                    true, false, true, false, false, true, true, true, false,
766                ])),
767            )
768        };
769        let input_2_array_with_nulls = {
770            let input = vec!["doing", "well", "thank", "you", "for", "asking"];
771            let (offsets, buffer, _) = GenericStringArray::<i32>::from(input).into_parts();
772
773            GenericStringArray::<i32>::new(
774                offsets,
775                buffer,
776                Some(NullBuffer::from(&[false, false, true, false, true, true])),
777            )
778        };
779
780        let mut builder = GenericStringBuilder::<i32>::new();
781        builder.append_array(&input_1_array_with_nulls);
782        builder.append_array(&input_2_array_with_nulls);
783
784        let actual = builder.finish();
785        let expected = GenericStringArray::<i32>::from(vec![
786            Some("hello"),
787            None, // world
788            Some("how"),
789            None, // are
790            None, // you
791            Some("doing"),
792            Some("today"),
793            Some("I"),
794            None, // am
795            None, // doing
796            None, // well
797            Some("thank"),
798            None, // "you",
799            Some("for"),
800            Some("asking"),
801        ]);
802
803        assert_eq!(actual, expected);
804
805        let expected_underlying_buffer = Buffer::from(
806            [
807                "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing",
808                "well", "thank", "you", "for", "asking",
809            ]
810            .join("")
811            .as_bytes(),
812        );
813        assert_eq!(actual.values(), &expected_underlying_buffer);
814    }
815
816    #[test]
817    fn append_array_with_continues_indices() {
818        let input = vec![
819            "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing", "well",
820            "thank", "you", "for", "asking",
821        ];
822        let full_array = GenericStringArray::<i32>::from(input);
823        let slice1 = full_array.slice(0, 3);
824        let slice2 = full_array.slice(3, 4);
825        let slice3 = full_array.slice(7, full_array.len() - 7);
826
827        let mut builder = GenericStringBuilder::<i32>::new();
828        builder.append_array(&slice1);
829        builder.append_array(&slice2);
830        builder.append_array(&slice3);
831
832        let actual = builder.finish();
833
834        assert_eq!(actual, full_array);
835    }
836}