1use crate::builder::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder};
19use crate::types::{ArrowDictionaryKeyType, ByteArrayType, GenericBinaryType, GenericStringType};
20use crate::{
21 Array, ArrayRef, DictionaryArray, GenericByteArray, PrimitiveArray, TypedDictionaryArray,
22};
23use arrow_buffer::ArrowNativeType;
24use arrow_schema::{ArrowError, DataType};
25use hashbrown::HashTable;
26use num_traits::NumCast;
27use std::any::Any;
28use std::sync::Arc;
29
30#[derive(Debug)]
36pub struct GenericByteDictionaryBuilder<K, T>
37where
38 K: ArrowDictionaryKeyType,
39 T: ByteArrayType,
40{
41 state: ahash::RandomState,
42 dedup: HashTable<usize>,
43
44 keys_builder: PrimitiveBuilder<K>,
45 values_builder: GenericByteBuilder<T>,
46}
47
48impl<K, T> Default for GenericByteDictionaryBuilder<K, T>
49where
50 K: ArrowDictionaryKeyType,
51 T: ByteArrayType,
52{
53 fn default() -> Self {
54 Self::new()
55 }
56}
57
58impl<K, T> GenericByteDictionaryBuilder<K, T>
59where
60 K: ArrowDictionaryKeyType,
61 T: ByteArrayType,
62{
63 pub fn new() -> Self {
65 let keys_builder = PrimitiveBuilder::new();
66 let values_builder = GenericByteBuilder::<T>::new();
67 Self {
68 state: Default::default(),
69 dedup: HashTable::with_capacity(keys_builder.capacity()),
70 keys_builder,
71 values_builder,
72 }
73 }
74
75 pub fn with_capacity(
81 keys_capacity: usize,
82 value_capacity: usize,
83 data_capacity: usize,
84 ) -> Self {
85 Self {
86 state: Default::default(),
87 dedup: Default::default(),
88 keys_builder: PrimitiveBuilder::with_capacity(keys_capacity),
89 values_builder: GenericByteBuilder::<T>::with_capacity(value_capacity, data_capacity),
90 }
91 }
92
93 pub fn new_with_dictionary(
117 keys_capacity: usize,
118 dictionary_values: &GenericByteArray<T>,
119 ) -> Result<Self, ArrowError> {
120 let state = ahash::RandomState::default();
121 let dict_len = dictionary_values.len();
122
123 let mut dedup = HashTable::with_capacity(dict_len);
124
125 let values_len = dictionary_values.value_data().len();
126 let mut values_builder = GenericByteBuilder::<T>::with_capacity(dict_len, values_len);
127
128 K::Native::from_usize(dictionary_values.len())
129 .ok_or(ArrowError::DictionaryKeyOverflowError)?;
130
131 for (idx, maybe_value) in dictionary_values.iter().enumerate() {
132 match maybe_value {
133 Some(value) => {
134 let value_bytes: &[u8] = value.as_ref();
135 let hash = state.hash_one(value_bytes);
136
137 dedup
138 .entry(
139 hash,
140 |idx: &usize| value_bytes == get_bytes(&values_builder, *idx),
141 |idx: &usize| state.hash_one(get_bytes(&values_builder, *idx)),
142 )
143 .or_insert(idx);
144
145 values_builder.append_value(value);
146 }
147 None => values_builder.append_null(),
148 }
149 }
150
151 Ok(Self {
152 state,
153 dedup,
154 keys_builder: PrimitiveBuilder::with_capacity(keys_capacity),
155 values_builder,
156 })
157 }
158
159 pub fn try_new_from_builder<K2>(
187 mut source: GenericByteDictionaryBuilder<K2, T>,
188 ) -> Result<Self, ArrowError>
189 where
190 K::Native: NumCast,
191 K2: ArrowDictionaryKeyType,
192 K2::Native: NumCast,
193 {
194 let state = source.state;
195 let dedup = source.dedup;
196 let values_builder = source.values_builder;
197
198 let source_keys = source.keys_builder.finish();
199 let new_keys: PrimitiveArray<K> = source_keys.try_unary(|value| {
200 num_traits::cast::cast::<K2::Native, K::Native>(value).ok_or_else(|| {
201 ArrowError::CastError(format!(
202 "Can't cast dictionary keys from source type {:?} to type {:?}",
203 K2::DATA_TYPE,
204 K::DATA_TYPE
205 ))
206 })
207 })?;
208
209 drop(source_keys);
213
214 Ok(Self {
215 state,
216 dedup,
217 keys_builder: new_keys
218 .into_builder()
219 .expect("underlying buffer has no references"),
220 values_builder,
221 })
222 }
223}
224
225impl<K, T> ArrayBuilder for GenericByteDictionaryBuilder<K, T>
226where
227 K: ArrowDictionaryKeyType,
228 T: ByteArrayType,
229{
230 fn as_any(&self) -> &dyn Any {
232 self
233 }
234
235 fn as_any_mut(&mut self) -> &mut dyn Any {
237 self
238 }
239
240 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
242 self
243 }
244
245 fn len(&self) -> usize {
247 self.keys_builder.len()
248 }
249
250 fn finish(&mut self) -> ArrayRef {
252 Arc::new(self.finish())
253 }
254
255 fn finish_cloned(&self) -> ArrayRef {
257 Arc::new(self.finish_cloned())
258 }
259
260 fn finish_preserve_values(&mut self) -> ArrayRef {
261 Arc::new(self.finish_preserve_values())
262 }
263}
264
265impl<K, T> GenericByteDictionaryBuilder<K, T>
266where
267 K: ArrowDictionaryKeyType,
268 T: ByteArrayType,
269{
270 fn get_or_insert_key(&mut self, value: impl AsRef<T::Native>) -> Result<K::Native, ArrowError> {
271 let value_native: &T::Native = value.as_ref();
272 let value_bytes: &[u8] = value_native.as_ref();
273
274 let state = &self.state;
275 let storage = &mut self.values_builder;
276 let hash = state.hash_one(value_bytes);
277
278 let idx = *self
279 .dedup
280 .entry(
281 hash,
282 |idx| value_bytes == get_bytes(storage, *idx),
283 |idx| state.hash_one(get_bytes(storage, *idx)),
284 )
285 .or_insert_with(|| {
286 let idx = storage.len();
287 storage.append_value(value);
288 idx
289 })
290 .get();
291
292 let key = K::Native::from_usize(idx).ok_or(ArrowError::DictionaryKeyOverflowError)?;
293
294 Ok(key)
295 }
296
297 pub fn append(&mut self, value: impl AsRef<T::Native>) -> Result<K::Native, ArrowError> {
303 let key = self.get_or_insert_key(value)?;
304 self.keys_builder.append_value(key);
305 Ok(key)
306 }
307
308 pub fn append_n(
313 &mut self,
314 value: impl AsRef<T::Native>,
315 count: usize,
316 ) -> Result<K::Native, ArrowError> {
317 let key = self.get_or_insert_key(value)?;
318 self.keys_builder.append_value_n(key, count);
319 Ok(key)
320 }
321
322 pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
328 self.append(value).expect("dictionary key overflow");
329 }
330
331 pub fn append_values(&mut self, value: impl AsRef<T::Native>, count: usize) {
338 self.append_n(value, count)
339 .expect("dictionary key overflow");
340 }
341
342 #[inline]
344 pub fn append_null(&mut self) {
345 self.keys_builder.append_null()
346 }
347
348 #[inline]
350 pub fn append_nulls(&mut self, n: usize) {
351 self.keys_builder.append_nulls(n)
352 }
353
354 #[inline]
360 pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
361 match value {
362 None => self.append_null(),
363 Some(v) => self.append_value(v),
364 };
365 }
366
367 pub fn append_options(&mut self, value: Option<impl AsRef<T::Native>>, count: usize) {
374 match value {
375 None => self.keys_builder.append_nulls(count),
376 Some(v) => self.append_values(v, count),
377 };
378 }
379
380 pub fn extend_dictionary(
388 &mut self,
389 dictionary: &TypedDictionaryArray<K, GenericByteArray<T>>,
390 ) -> Result<(), ArrowError> {
391 let values = dictionary.values();
392
393 let v_len = values.len();
394 let k_len = dictionary.keys().len();
395 if v_len == 0 && k_len == 0 {
396 return Ok(());
397 }
398
399 if v_len == 0 {
401 self.append_nulls(k_len);
402 return Ok(());
403 }
404
405 if k_len == 0 {
406 return Err(ArrowError::InvalidArgumentError(
407 "Dictionary keys should not be empty when values are not empty".to_string(),
408 ));
409 }
410
411 let mapped_values = values
413 .iter()
414 .map(|dict_value| {
416 dict_value
417 .map(|dict_value| self.get_or_insert_key(dict_value))
418 .transpose()
419 })
420 .collect::<Result<Vec<_>, _>>()?;
421
422 dictionary.keys().iter().for_each(|key| match key {
424 None => self.append_null(),
425 Some(original_dict_index) => {
426 let index = original_dict_index.as_usize().min(v_len - 1);
427 match mapped_values[index] {
428 None => self.append_null(),
429 Some(mapped_value) => self.keys_builder.append_value(mapped_value),
430 }
431 }
432 });
433
434 Ok(())
435 }
436
437 pub fn finish(&mut self) -> DictionaryArray<K> {
439 self.dedup.clear();
440 let values = self.values_builder.finish();
441 let keys = self.keys_builder.finish();
442
443 let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
444
445 let builder = keys
446 .into_data()
447 .into_builder()
448 .data_type(data_type)
449 .child_data(vec![values.into_data()]);
450
451 DictionaryArray::from(unsafe { builder.build_unchecked() })
452 }
453
454 pub fn finish_cloned(&self) -> DictionaryArray<K> {
456 let values = self.values_builder.finish_cloned();
457 let keys = self.keys_builder.finish_cloned();
458
459 let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
460
461 let builder = keys
462 .into_data()
463 .into_builder()
464 .data_type(data_type)
465 .child_data(vec![values.into_data()]);
466
467 DictionaryArray::from(unsafe { builder.build_unchecked() })
468 }
469
470 pub fn finish_preserve_values(&mut self) -> DictionaryArray<K> {
488 let values = self.values_builder.finish_cloned();
489 let keys = self.keys_builder.finish();
490
491 let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
492
493 let builder = keys
494 .into_data()
495 .into_builder()
496 .data_type(data_type)
497 .child_data(vec![values.into_data()]);
498
499 DictionaryArray::from(unsafe { builder.build_unchecked() })
500 }
501
502 pub fn validity_slice(&self) -> Option<&[u8]> {
504 self.keys_builder.validity_slice()
505 }
506}
507
508impl<K: ArrowDictionaryKeyType, T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>>
509 for GenericByteDictionaryBuilder<K, T>
510{
511 #[inline]
512 fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
513 for v in iter {
514 self.append_option(v)
515 }
516 }
517}
518
519fn get_bytes<T: ByteArrayType>(values: &GenericByteBuilder<T>, idx: usize) -> &[u8] {
520 let offsets = values.offsets_slice();
521 let values = values.values_slice();
522
523 let end_offset = offsets[idx + 1].as_usize();
524 let start_offset = offsets[idx].as_usize();
525
526 &values[start_offset..end_offset]
527}
528
529pub type StringDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericStringType<i32>>;
562
563pub type LargeStringDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericStringType<i64>>;
565
566pub type BinaryDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericBinaryType<i32>>;
600
601pub type LargeBinaryDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericBinaryType<i64>>;
603
604#[cfg(test)]
605mod tests {
606 use super::*;
607
608 use crate::array::Int8Array;
609 use crate::cast::AsArray;
610 use crate::types::{Int8Type, Int16Type, Int32Type, UInt8Type, UInt16Type, Utf8Type};
611 use crate::{ArrowPrimitiveType, BinaryArray, StringArray};
612
613 fn test_bytes_dictionary_builder<T>(values: Vec<&T::Native>)
614 where
615 T: ByteArrayType,
616 <T as ByteArrayType>::Native: PartialEq,
617 <T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
618 {
619 let mut builder = GenericByteDictionaryBuilder::<Int8Type, T>::new();
620 builder.append(values[0]).unwrap();
621 builder.append_null();
622 builder.append(values[1]).unwrap();
623 builder.append(values[1]).unwrap();
624 builder.append(values[0]).unwrap();
625 let array = builder.finish();
626
627 assert_eq!(
628 array.keys(),
629 &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
630 );
631
632 let av = array.values();
634 let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
635
636 assert_eq!(*ava.value(0), *values[0]);
637 assert_eq!(*ava.value(1), *values[1]);
638 }
639
640 #[test]
641 fn test_string_dictionary_builder() {
642 test_bytes_dictionary_builder::<GenericStringType<i32>>(vec!["abc", "def"]);
643 }
644
645 #[test]
646 fn test_binary_dictionary_builder() {
647 test_bytes_dictionary_builder::<GenericBinaryType<i32>>(vec![b"abc", b"def"]);
648 }
649
650 fn test_bytes_dictionary_builder_finish_cloned<T>(values: Vec<&T::Native>)
651 where
652 T: ByteArrayType,
653 <T as ByteArrayType>::Native: PartialEq,
654 <T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
655 {
656 let mut builder = GenericByteDictionaryBuilder::<Int8Type, T>::new();
657
658 builder.append(values[0]).unwrap();
659 builder.append_null();
660 builder.append(values[1]).unwrap();
661 builder.append(values[1]).unwrap();
662 builder.append(values[0]).unwrap();
663 let mut array = builder.finish_cloned();
664
665 assert_eq!(
666 array.keys(),
667 &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
668 );
669
670 let av = array.values();
672 let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
673
674 assert_eq!(ava.value(0), values[0]);
675 assert_eq!(ava.value(1), values[1]);
676
677 builder.append(values[0]).unwrap();
678 builder.append(values[2]).unwrap();
679 builder.append(values[1]).unwrap();
680
681 array = builder.finish();
682
683 assert_eq!(
684 array.keys(),
685 &Int8Array::from(vec![
686 Some(0),
687 None,
688 Some(1),
689 Some(1),
690 Some(0),
691 Some(0),
692 Some(2),
693 Some(1)
694 ])
695 );
696
697 let av2 = array.values();
699 let ava2: &GenericByteArray<T> =
700 av2.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
701
702 assert_eq!(ava2.value(0), values[0]);
703 assert_eq!(ava2.value(1), values[1]);
704 assert_eq!(ava2.value(2), values[2]);
705 }
706
707 #[test]
708 fn test_string_dictionary_builder_finish_cloned() {
709 test_bytes_dictionary_builder_finish_cloned::<GenericStringType<i32>>(vec![
710 "abc", "def", "ghi",
711 ]);
712 }
713
714 #[test]
715 fn test_binary_dictionary_builder_finish_cloned() {
716 test_bytes_dictionary_builder_finish_cloned::<GenericBinaryType<i32>>(vec![
717 b"abc", b"def", b"ghi",
718 ]);
719 }
720
721 fn _test_try_new_from_builder_generic_for_key_types<K1, K2, T>(values: Vec<&T::Native>)
722 where
723 K1: ArrowDictionaryKeyType,
724 K1::Native: NumCast,
725 K2: ArrowDictionaryKeyType,
726 K2::Native: NumCast + From<u8>,
727 T: ByteArrayType,
728 <T as ByteArrayType>::Native: PartialEq + AsRef<<T as ByteArrayType>::Native>,
729 {
730 let mut source = GenericByteDictionaryBuilder::<K1, T>::new();
731 source.append(values[0]).unwrap();
732 source.append(values[1]).unwrap();
733 source.append_null();
734 source.append(values[2]).unwrap();
735
736 let mut result =
737 GenericByteDictionaryBuilder::<K2, T>::try_new_from_builder(source).unwrap();
738 let array = result.finish();
739
740 let mut expected_keys_builder = PrimitiveBuilder::<K2>::new();
741 expected_keys_builder
742 .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(0u8));
743 expected_keys_builder
744 .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(1u8));
745 expected_keys_builder.append_null();
746 expected_keys_builder
747 .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(2u8));
748 let expected_keys = expected_keys_builder.finish();
749 assert_eq!(array.keys(), &expected_keys);
750
751 let av = array.values();
752 let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
753 assert_eq!(ava.value(0), values[0]);
754 assert_eq!(ava.value(1), values[1]);
755 assert_eq!(ava.value(2), values[2]);
756 }
757
758 fn test_try_new_from_builder<T>(values: Vec<&T::Native>)
759 where
760 T: ByteArrayType,
761 <T as ByteArrayType>::Native: PartialEq + AsRef<<T as ByteArrayType>::Native>,
762 {
763 _test_try_new_from_builder_generic_for_key_types::<UInt8Type, UInt16Type, T>(
765 values.clone(),
766 );
767 _test_try_new_from_builder_generic_for_key_types::<UInt16Type, UInt8Type, T>(
769 values.clone(),
770 );
771 _test_try_new_from_builder_generic_for_key_types::<Int8Type, Int16Type, T>(values.clone());
773 _test_try_new_from_builder_generic_for_key_types::<Int32Type, Int16Type, T>(values.clone());
775 _test_try_new_from_builder_generic_for_key_types::<UInt8Type, Int16Type, T>(values.clone());
777 _test_try_new_from_builder_generic_for_key_types::<Int8Type, UInt8Type, T>(values.clone());
778 _test_try_new_from_builder_generic_for_key_types::<Int8Type, UInt16Type, T>(values.clone());
779 _test_try_new_from_builder_generic_for_key_types::<Int32Type, Int16Type, T>(values.clone());
780 }
781
782 #[test]
783 fn test_string_dictionary_builder_try_new_from_builder() {
784 test_try_new_from_builder::<GenericStringType<i32>>(vec!["abc", "def", "ghi"]);
785 }
786
787 #[test]
788 fn test_binary_dictionary_builder_try_new_from_builder() {
789 test_try_new_from_builder::<GenericBinaryType<i32>>(vec![b"abc", b"def", b"ghi"]);
790 }
791
792 #[test]
793 fn test_try_new_from_builder_cast_fails() {
794 let mut source_builder = StringDictionaryBuilder::<UInt16Type>::new();
795 for i in 0..257 {
796 source_builder.append_value(format!("val{i}"));
797 }
798
799 let result = StringDictionaryBuilder::<UInt8Type>::try_new_from_builder(source_builder);
802 assert!(result.is_err());
803 if let Err(e) = result {
804 assert!(matches!(e, ArrowError::CastError(_)));
805 assert_eq!(
806 e.to_string(),
807 "Cast error: Can't cast dictionary keys from source type UInt16 to type UInt8"
808 );
809 }
810 }
811
812 fn test_bytes_dictionary_builder_with_existing_dictionary<T>(
813 dictionary: GenericByteArray<T>,
814 values: Vec<&T::Native>,
815 ) where
816 T: ByteArrayType,
817 <T as ByteArrayType>::Native: PartialEq,
818 <T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
819 {
820 let mut builder =
821 GenericByteDictionaryBuilder::<Int8Type, T>::new_with_dictionary(6, &dictionary)
822 .unwrap();
823 builder.append(values[0]).unwrap();
824 builder.append_null();
825 builder.append(values[1]).unwrap();
826 builder.append(values[1]).unwrap();
827 builder.append(values[0]).unwrap();
828 builder.append(values[2]).unwrap();
829 let array = builder.finish();
830
831 assert_eq!(
832 array.keys(),
833 &Int8Array::from(vec![Some(2), None, Some(1), Some(1), Some(2), Some(3)])
834 );
835
836 let av = array.values();
838 let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
839
840 assert!(!ava.is_valid(0));
841 assert_eq!(ava.value(1), values[1]);
842 assert_eq!(ava.value(2), values[0]);
843 assert_eq!(ava.value(3), values[2]);
844 }
845
846 #[test]
847 fn test_string_dictionary_builder_with_existing_dictionary() {
848 test_bytes_dictionary_builder_with_existing_dictionary::<GenericStringType<i32>>(
849 StringArray::from(vec![None, Some("def"), Some("abc")]),
850 vec!["abc", "def", "ghi"],
851 );
852 }
853
854 #[test]
855 fn test_binary_dictionary_builder_with_existing_dictionary() {
856 let values: Vec<Option<&[u8]>> = vec![None, Some(b"def"), Some(b"abc")];
857 test_bytes_dictionary_builder_with_existing_dictionary::<GenericBinaryType<i32>>(
858 BinaryArray::from(values),
859 vec![b"abc", b"def", b"ghi"],
860 );
861 }
862
863 fn test_bytes_dictionary_builder_with_reserved_null_value<T>(
864 dictionary: GenericByteArray<T>,
865 values: Vec<&T::Native>,
866 ) where
867 T: ByteArrayType,
868 <T as ByteArrayType>::Native: PartialEq,
869 <T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
870 {
871 let mut builder =
872 GenericByteDictionaryBuilder::<Int16Type, T>::new_with_dictionary(4, &dictionary)
873 .unwrap();
874 builder.append(values[0]).unwrap();
875 builder.append_null();
876 builder.append(values[1]).unwrap();
877 builder.append(values[0]).unwrap();
878 let array = builder.finish();
879
880 assert!(array.is_null(1));
881 assert!(!array.is_valid(1));
882
883 let keys = array.keys();
884
885 assert_eq!(keys.value(0), 1);
886 assert!(keys.is_null(1));
887 assert_eq!(keys.value(1), 0);
889 assert_eq!(keys.value(2), 2);
890 assert_eq!(keys.value(3), 1);
891 }
892
893 #[test]
894 fn test_string_dictionary_builder_with_reserved_null_value() {
895 let v: Vec<Option<&str>> = vec![None];
896 test_bytes_dictionary_builder_with_reserved_null_value::<GenericStringType<i32>>(
897 StringArray::from(v),
898 vec!["abc", "def"],
899 );
900 }
901
902 #[test]
903 fn test_binary_dictionary_builder_with_reserved_null_value() {
904 let values: Vec<Option<&[u8]>> = vec![None];
905 test_bytes_dictionary_builder_with_reserved_null_value::<GenericBinaryType<i32>>(
906 BinaryArray::from(values),
907 vec![b"abc", b"def"],
908 );
909 }
910
911 #[test]
912 fn test_extend() {
913 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
914 builder.extend(["a", "b", "c", "a", "b", "c"].into_iter().map(Some));
915 builder.extend(["c", "d", "a"].into_iter().map(Some));
916 let dict = builder.finish();
917 assert_eq!(dict.keys().values(), &[0, 1, 2, 0, 1, 2, 2, 3, 0]);
918 assert_eq!(dict.values().len(), 4);
919 }
920
921 #[test]
922 fn test_extend_dictionary() {
923 let some_dict = {
924 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
925 builder.extend(["a", "b", "c", "a", "b", "c"].into_iter().map(Some));
926 builder.extend([None::<&str>]);
927 builder.extend(["c", "d", "a"].into_iter().map(Some));
928 builder.append_null();
929 builder.finish()
930 };
931
932 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
933 builder.extend(["e", "e", "f", "e", "d"].into_iter().map(Some));
934 builder
935 .extend_dictionary(&some_dict.downcast_dict().unwrap())
936 .unwrap();
937 let dict = builder.finish();
938
939 assert_eq!(dict.values().len(), 6);
940
941 let values = dict
942 .downcast_dict::<GenericByteArray<Utf8Type>>()
943 .unwrap()
944 .into_iter()
945 .collect::<Vec<_>>();
946
947 assert_eq!(
948 values,
949 [
950 Some("e"),
951 Some("e"),
952 Some("f"),
953 Some("e"),
954 Some("d"),
955 Some("a"),
956 Some("b"),
957 Some("c"),
958 Some("a"),
959 Some("b"),
960 Some("c"),
961 None,
962 Some("c"),
963 Some("d"),
964 Some("a"),
965 None
966 ]
967 );
968 }
969 #[test]
970 fn test_extend_dictionary_with_null_in_mapped_value() {
971 let some_dict = {
972 let mut values_builder = GenericByteBuilder::<Utf8Type>::new();
973 let mut keys_builder = PrimitiveBuilder::<Int32Type>::new();
974
975 values_builder.append_null();
977 keys_builder.append_value(0);
978 values_builder.append_value("I like worm hugs");
979 keys_builder.append_value(1);
980
981 let values = values_builder.finish();
982 let keys = keys_builder.finish();
983
984 let data_type = DataType::Dictionary(
985 Box::new(Int32Type::DATA_TYPE),
986 Box::new(Utf8Type::DATA_TYPE),
987 );
988
989 let builder = keys
990 .into_data()
991 .into_builder()
992 .data_type(data_type)
993 .child_data(vec![values.into_data()]);
994
995 DictionaryArray::from(unsafe { builder.build_unchecked() })
996 };
997
998 let some_dict_values = some_dict.values().as_string::<i32>();
999 assert_eq!(
1000 some_dict_values.into_iter().collect::<Vec<_>>(),
1001 &[None, Some("I like worm hugs")]
1002 );
1003
1004 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
1005 builder
1006 .extend_dictionary(&some_dict.downcast_dict().unwrap())
1007 .unwrap();
1008 let dict = builder.finish();
1009
1010 assert_eq!(dict.values().len(), 1);
1011
1012 let values = dict
1013 .downcast_dict::<GenericByteArray<Utf8Type>>()
1014 .unwrap()
1015 .into_iter()
1016 .collect::<Vec<_>>();
1017
1018 assert_eq!(values, [None, Some("I like worm hugs")]);
1019 }
1020
1021 #[test]
1022 fn test_extend_all_null_dictionary() {
1023 let some_dict = {
1024 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
1025 builder.append_nulls(2);
1026 builder.finish()
1027 };
1028
1029 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
1030 builder
1031 .extend_dictionary(&some_dict.downcast_dict().unwrap())
1032 .unwrap();
1033 let dict = builder.finish();
1034
1035 assert_eq!(dict.values().len(), 0);
1036
1037 let values = dict
1038 .downcast_dict::<GenericByteArray<Utf8Type>>()
1039 .unwrap()
1040 .into_iter()
1041 .collect::<Vec<_>>();
1042
1043 assert_eq!(values, [None, None]);
1044 }
1045
1046 #[test]
1047 fn test_finish_preserve_values() {
1048 let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
1050 builder.append("a").unwrap();
1051 builder.append("b").unwrap();
1052 builder.append("c").unwrap();
1053 let dict = builder.finish_preserve_values();
1054 assert_eq!(dict.keys().values(), &[0, 1, 2]);
1055 assert_eq!(dict.values().len(), 3);
1056 let values = dict
1057 .downcast_dict::<GenericByteArray<Utf8Type>>()
1058 .unwrap()
1059 .into_iter()
1060 .collect::<Vec<_>>();
1061 assert_eq!(values, [Some("a"), Some("b"), Some("c")]);
1062
1063 builder.append("d").unwrap();
1065 builder.append("e").unwrap();
1066 let dict2 = builder.finish_preserve_values();
1067
1068 assert_eq!(dict2.keys().values(), &[3, 4]);
1071 let values = dict2
1072 .downcast_dict::<GenericByteArray<Utf8Type>>()
1073 .unwrap()
1074 .into_iter()
1075 .collect::<Vec<_>>();
1076 assert_eq!(values, [Some("d"), Some("e")]);
1077
1078 assert_eq!(dict2.values().len(), 5);
1080 let all_values = dict2
1081 .values()
1082 .as_any()
1083 .downcast_ref::<StringArray>()
1084 .unwrap()
1085 .into_iter()
1086 .collect::<Vec<_>>();
1087 assert_eq!(
1088 all_values,
1089 [Some("a"), Some("b"), Some("c"), Some("d"), Some("e"),]
1090 );
1091 }
1092}