1use crate::builder::{PrimitiveDictionaryBuilder, StringDictionaryBuilder};
19use crate::cast::AsArray;
20use crate::iterator::ArrayIter;
21use crate::types::*;
22use crate::{
23 Array, ArrayAccessor, ArrayRef, ArrowNativeTypeOp, PrimitiveArray, Scalar, StringArray,
24 make_array,
25};
26use arrow_buffer::bit_util::set_bit;
27use arrow_buffer::buffer::NullBuffer;
28use arrow_buffer::{ArrowNativeType, BooleanBuffer, BooleanBufferBuilder};
29use arrow_data::ArrayData;
30use arrow_schema::{ArrowError, DataType};
31use std::any::Any;
32use std::sync::Arc;
33
34pub type Int8DictionaryArray = DictionaryArray<Int8Type>;
49
50pub type Int16DictionaryArray = DictionaryArray<Int16Type>;
65
66pub type Int32DictionaryArray = DictionaryArray<Int32Type>;
81
82pub type Int64DictionaryArray = DictionaryArray<Int64Type>;
97
98pub type UInt8DictionaryArray = DictionaryArray<UInt8Type>;
113
114pub type UInt16DictionaryArray = DictionaryArray<UInt16Type>;
129
130pub type UInt32DictionaryArray = DictionaryArray<UInt32Type>;
145
146pub type UInt64DictionaryArray = DictionaryArray<UInt64Type>;
161
162pub struct DictionaryArray<K: ArrowDictionaryKeyType> {
244 data_type: DataType,
245
246 keys: PrimitiveArray<K>,
251
252 values: ArrayRef,
254
255 is_ordered: bool,
257}
258
259impl<K: ArrowDictionaryKeyType> Clone for DictionaryArray<K> {
260 fn clone(&self) -> Self {
261 Self {
262 data_type: self.data_type.clone(),
263 keys: self.keys.clone(),
264 values: self.values.clone(),
265 is_ordered: self.is_ordered,
266 }
267 }
268}
269
270impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
271 pub fn new(keys: PrimitiveArray<K>, values: ArrayRef) -> Self {
279 Self::try_new(keys, values).unwrap()
280 }
281
282 pub fn try_new(keys: PrimitiveArray<K>, values: ArrayRef) -> Result<Self, ArrowError> {
290 let data_type = DataType::Dictionary(
291 Box::new(keys.data_type().clone()),
292 Box::new(values.data_type().clone()),
293 );
294
295 let zero = K::Native::usize_as(0);
296 let values_len = values.len();
297
298 if let Some((idx, v)) =
299 keys.values().iter().enumerate().find(|(idx, v)| {
300 (v.is_lt(zero) || v.as_usize() >= values_len) && keys.is_valid(*idx)
301 })
302 {
303 return Err(ArrowError::InvalidArgumentError(format!(
304 "Invalid dictionary key {v:?} at index {idx}, expected 0 <= key < {values_len}",
305 )));
306 }
307
308 Ok(Self {
309 data_type,
310 keys,
311 values,
312 is_ordered: false,
313 })
314 }
315
316 pub fn new_scalar<T: Array + 'static>(value: Scalar<T>) -> Scalar<Self> {
318 Scalar::new(Self::new(
319 PrimitiveArray::new(vec![K::Native::usize_as(0)].into(), None),
320 Arc::new(value.into_inner()),
321 ))
322 }
323
324 pub unsafe fn new_unchecked(keys: PrimitiveArray<K>, values: ArrayRef) -> Self {
330 if cfg!(feature = "force_validate") {
331 return Self::new(keys, values);
332 }
333
334 let data_type = DataType::Dictionary(
335 Box::new(keys.data_type().clone()),
336 Box::new(values.data_type().clone()),
337 );
338
339 Self {
340 data_type,
341 keys,
342 values,
343 is_ordered: false,
344 }
345 }
346
347 pub fn into_parts(self) -> (PrimitiveArray<K>, ArrayRef) {
349 (self.keys, self.values)
350 }
351
352 pub fn keys(&self) -> &PrimitiveArray<K> {
354 &self.keys
355 }
356
357 pub fn lookup_key(&self, value: &str) -> Option<K::Native> {
363 let rd_buf: &StringArray = self.values.as_any().downcast_ref::<StringArray>().unwrap();
364
365 (0..rd_buf.len())
366 .position(|i| rd_buf.value(i) == value)
367 .and_then(K::Native::from_usize)
368 }
369
370 pub fn values(&self) -> &ArrayRef {
372 &self.values
373 }
374
375 pub fn value_type(&self) -> DataType {
377 self.values.data_type().clone()
378 }
379
380 pub fn len(&self) -> usize {
382 self.keys.len()
383 }
384
385 pub fn is_empty(&self) -> bool {
387 self.keys.is_empty()
388 }
389
390 pub fn is_ordered(&self) -> bool {
392 self.is_ordered
393 }
394
395 pub fn keys_iter(&self) -> impl Iterator<Item = Option<usize>> + '_ {
397 self.keys.iter().map(|key| key.map(|k| k.as_usize()))
398 }
399
400 pub fn key(&self, i: usize) -> Option<usize> {
403 self.keys.is_valid(i).then(|| self.keys.value(i).as_usize())
404 }
405
406 pub fn slice(&self, offset: usize, length: usize) -> Self {
408 Self {
409 data_type: self.data_type.clone(),
410 keys: self.keys.slice(offset, length),
411 values: self.values.clone(),
412 is_ordered: self.is_ordered,
413 }
414 }
415
416 pub fn downcast_dict<V: 'static>(&self) -> Option<TypedDictionaryArray<'_, K, V>> {
430 let values = self.values.as_any().downcast_ref()?;
431 Some(TypedDictionaryArray {
432 dictionary: self,
433 values,
434 })
435 }
436
437 pub fn with_values(&self, values: ArrayRef) -> Self {
475 assert!(values.len() >= self.values.len());
476 let data_type =
477 DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
478 Self {
479 data_type,
480 keys: self.keys.clone(),
481 values,
482 is_ordered: false,
483 }
484 }
485
486 #[allow(clippy::result_large_err)]
489 pub fn into_primitive_dict_builder<V>(self) -> Result<PrimitiveDictionaryBuilder<K, V>, Self>
490 where
491 V: ArrowPrimitiveType,
492 {
493 if !self.value_type().is_primitive() {
494 return Err(self);
495 }
496
497 let key_array = self.keys().clone();
498 let value_array = self.values().as_primitive::<V>().clone();
499
500 drop(self.keys);
501 drop(self.values);
502
503 let key_builder = key_array.into_builder();
504 let value_builder = value_array.into_builder();
505
506 match (key_builder, value_builder) {
507 (Ok(key_builder), Ok(value_builder)) => Ok(unsafe {
508 PrimitiveDictionaryBuilder::new_from_builders(key_builder, value_builder)
509 }),
510 (Err(key_array), Ok(mut value_builder)) => {
511 Err(Self::try_new(key_array, Arc::new(value_builder.finish())).unwrap())
512 }
513 (Ok(mut key_builder), Err(value_array)) => {
514 Err(Self::try_new(key_builder.finish(), Arc::new(value_array)).unwrap())
515 }
516 (Err(key_array), Err(value_array)) => {
517 Err(Self::try_new(key_array, Arc::new(value_array)).unwrap())
518 }
519 }
520 }
521
522 #[allow(clippy::result_large_err)]
546 pub fn unary_mut<F, V>(self, op: F) -> Result<DictionaryArray<K>, DictionaryArray<K>>
547 where
548 V: ArrowPrimitiveType,
549 F: Fn(V::Native) -> V::Native,
550 {
551 let mut builder: PrimitiveDictionaryBuilder<K, V> = self.into_primitive_dict_builder()?;
552 builder
553 .values_slice_mut()
554 .iter_mut()
555 .for_each(|v| *v = op(*v));
556 Ok(builder.finish())
557 }
558
559 pub fn occupancy(&self) -> BooleanBuffer {
564 let len = self.values.len();
565 let mut builder = BooleanBufferBuilder::new(len);
566 builder.resize(len);
567 let slice = builder.as_slice_mut();
568 match self.keys.nulls().filter(|n| n.null_count() > 0) {
569 Some(n) => {
570 let v = self.keys.values();
571 n.valid_indices()
572 .for_each(|idx| set_bit(slice, v[idx].as_usize()))
573 }
574 None => {
575 let v = self.keys.values();
576 v.iter().for_each(|v| set_bit(slice, v.as_usize()))
577 }
578 }
579 builder.finish()
580 }
581}
582
583impl<T: ArrowDictionaryKeyType> From<ArrayData> for DictionaryArray<T> {
585 fn from(data: ArrayData) -> Self {
586 assert_eq!(
587 data.buffers().len(),
588 1,
589 "DictionaryArray data should contain a single buffer only (keys)."
590 );
591 assert_eq!(
592 data.child_data().len(),
593 1,
594 "DictionaryArray should contain a single child array (values)."
595 );
596
597 if let DataType::Dictionary(key_data_type, _) = data.data_type() {
598 assert_eq!(
599 &T::DATA_TYPE,
600 key_data_type.as_ref(),
601 "DictionaryArray's data type must match, expected {} got {}",
602 T::DATA_TYPE,
603 key_data_type
604 );
605
606 let values = make_array(data.child_data()[0].clone());
607 let data_type = data.data_type().clone();
608
609 let keys = PrimitiveArray::<T>::from(unsafe {
614 data.into_builder()
615 .data_type(T::DATA_TYPE)
616 .child_data(vec![])
617 .build_unchecked()
618 });
619
620 Self {
621 data_type,
622 keys,
623 values,
624 is_ordered: false,
625 }
626 } else {
627 panic!("DictionaryArray must have Dictionary data type.")
628 }
629 }
630}
631
632impl<T: ArrowDictionaryKeyType> From<DictionaryArray<T>> for ArrayData {
633 fn from(array: DictionaryArray<T>) -> Self {
634 let builder = array
635 .keys
636 .into_data()
637 .into_builder()
638 .data_type(array.data_type)
639 .child_data(vec![array.values.to_data()]);
640
641 unsafe { builder.build_unchecked() }
642 }
643}
644
645impl<'a, T: ArrowDictionaryKeyType> FromIterator<Option<&'a str>> for DictionaryArray<T> {
662 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
663 let it = iter.into_iter();
664 let (lower, _) = it.size_hint();
665 let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024);
666 builder.extend(it);
667 builder.finish()
668 }
669}
670
671impl<'a, T: ArrowDictionaryKeyType> FromIterator<&'a str> for DictionaryArray<T> {
686 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
687 let it = iter.into_iter();
688 let (lower, _) = it.size_hint();
689 let mut builder = StringDictionaryBuilder::with_capacity(lower, 256, 1024);
690 it.for_each(|i| {
691 builder
692 .append(i)
693 .expect("Unable to append a value to a dictionary array.");
694 });
695
696 builder.finish()
697 }
698}
699
700unsafe impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
702 fn as_any(&self) -> &dyn Any {
703 self
704 }
705
706 fn to_data(&self) -> ArrayData {
707 self.clone().into()
708 }
709
710 fn into_data(self) -> ArrayData {
711 self.into()
712 }
713
714 fn data_type(&self) -> &DataType {
715 &self.data_type
716 }
717
718 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
719 Arc::new(self.slice(offset, length))
720 }
721
722 fn len(&self) -> usize {
723 self.keys.len()
724 }
725
726 fn is_empty(&self) -> bool {
727 self.keys.is_empty()
728 }
729
730 fn shrink_to_fit(&mut self) {
731 self.keys.shrink_to_fit();
732 self.values.shrink_to_fit();
733 }
734
735 fn offset(&self) -> usize {
736 self.keys.offset()
737 }
738
739 fn nulls(&self) -> Option<&NullBuffer> {
740 self.keys.nulls()
741 }
742
743 fn logical_nulls(&self) -> Option<NullBuffer> {
744 match self.values.logical_nulls() {
745 None => self.nulls().cloned(),
746 Some(value_nulls) => {
747 let mut builder = BooleanBufferBuilder::new(self.len());
748 match self.keys.nulls() {
749 Some(n) => builder.append_buffer(n.inner()),
750 None => builder.append_n(self.len(), true),
751 }
752 for (idx, k) in self.keys.values().iter().enumerate() {
753 let k = k.as_usize();
754 if k < value_nulls.len() && value_nulls.is_null(k) {
756 builder.set_bit(idx, false);
757 }
758 }
759 Some(builder.finish().into())
760 }
761 }
762 }
763
764 fn logical_null_count(&self) -> usize {
765 match (self.keys.nulls(), self.values.logical_nulls()) {
766 (None, None) => 0,
767 (Some(key_nulls), None) => key_nulls.null_count(),
768 (None, Some(value_nulls)) => self
769 .keys
770 .values()
771 .iter()
772 .filter(|k| value_nulls.is_null(k.as_usize()))
773 .count(),
774 (Some(key_nulls), Some(value_nulls)) => self
775 .keys
776 .values()
777 .iter()
778 .enumerate()
779 .filter(|(idx, k)| key_nulls.is_null(*idx) || value_nulls.is_null(k.as_usize()))
780 .count(),
781 }
782 }
783
784 fn is_nullable(&self) -> bool {
785 !self.is_empty() && (self.nulls().is_some() || self.values.is_nullable())
786 }
787
788 fn get_buffer_memory_size(&self) -> usize {
789 self.keys.get_buffer_memory_size() + self.values.get_buffer_memory_size()
790 }
791
792 fn get_array_memory_size(&self) -> usize {
793 std::mem::size_of::<Self>()
794 + self.keys.get_buffer_memory_size()
795 + self.values.get_array_memory_size()
796 }
797}
798
799impl<T: ArrowDictionaryKeyType> std::fmt::Debug for DictionaryArray<T> {
800 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
801 writeln!(
802 f,
803 "DictionaryArray {{keys: {:?} values: {:?}}}",
804 self.keys, self.values
805 )
806 }
807}
808
809pub struct TypedDictionaryArray<'a, K: ArrowDictionaryKeyType, V> {
827 dictionary: &'a DictionaryArray<K>,
829 values: &'a V,
831}
832
833impl<K: ArrowDictionaryKeyType, V> Clone for TypedDictionaryArray<'_, K, V> {
835 fn clone(&self) -> Self {
836 *self
837 }
838}
839
840impl<K: ArrowDictionaryKeyType, V> Copy for TypedDictionaryArray<'_, K, V> {}
841
842impl<K: ArrowDictionaryKeyType, V> std::fmt::Debug for TypedDictionaryArray<'_, K, V> {
843 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
844 writeln!(f, "TypedDictionaryArray({:?})", self.dictionary)
845 }
846}
847
848impl<'a, K: ArrowDictionaryKeyType, V> TypedDictionaryArray<'a, K, V> {
849 pub fn keys(&self) -> &'a PrimitiveArray<K> {
851 self.dictionary.keys()
852 }
853
854 pub fn values(&self) -> &'a V {
856 self.values
857 }
858}
859
860unsafe impl<K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'_, K, V> {
861 fn as_any(&self) -> &dyn Any {
862 self.dictionary
863 }
864
865 fn to_data(&self) -> ArrayData {
866 self.dictionary.to_data()
867 }
868
869 fn into_data(self) -> ArrayData {
870 self.dictionary.into_data()
871 }
872
873 fn data_type(&self) -> &DataType {
874 self.dictionary.data_type()
875 }
876
877 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
878 Arc::new(self.dictionary.slice(offset, length))
879 }
880
881 fn len(&self) -> usize {
882 self.dictionary.len()
883 }
884
885 fn is_empty(&self) -> bool {
886 self.dictionary.is_empty()
887 }
888
889 fn offset(&self) -> usize {
890 self.dictionary.offset()
891 }
892
893 fn nulls(&self) -> Option<&NullBuffer> {
894 self.dictionary.nulls()
895 }
896
897 fn logical_nulls(&self) -> Option<NullBuffer> {
898 self.dictionary.logical_nulls()
899 }
900
901 fn logical_null_count(&self) -> usize {
902 self.dictionary.logical_null_count()
903 }
904
905 fn is_nullable(&self) -> bool {
906 self.dictionary.is_nullable()
907 }
908
909 fn get_buffer_memory_size(&self) -> usize {
910 self.dictionary.get_buffer_memory_size()
911 }
912
913 fn get_array_memory_size(&self) -> usize {
914 self.dictionary.get_array_memory_size()
915 }
916}
917
918impl<K, V> IntoIterator for TypedDictionaryArray<'_, K, V>
919where
920 K: ArrowDictionaryKeyType,
921 Self: ArrayAccessor,
922{
923 type Item = Option<<Self as ArrayAccessor>::Item>;
924 type IntoIter = ArrayIter<Self>;
925
926 fn into_iter(self) -> Self::IntoIter {
927 ArrayIter::new(self)
928 }
929}
930
931impl<'a, K, V> ArrayAccessor for TypedDictionaryArray<'a, K, V>
932where
933 K: ArrowDictionaryKeyType,
934 V: Sync + Send,
935 &'a V: ArrayAccessor,
936 <&'a V as ArrayAccessor>::Item: Default,
937{
938 type Item = <&'a V as ArrayAccessor>::Item;
939
940 fn value(&self, index: usize) -> Self::Item {
941 assert!(
942 index < self.len(),
943 "Trying to access an element at index {} from a TypedDictionaryArray of length {}",
944 index,
945 self.len()
946 );
947 unsafe { self.value_unchecked(index) }
948 }
949
950 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
951 let val = unsafe { self.dictionary.keys.value_unchecked(index) };
952 let value_idx = val.as_usize();
953
954 match value_idx < self.values.len() {
957 true => unsafe { self.values.value_unchecked(value_idx) },
958 false => Default::default(),
959 }
960 }
961}
962
963pub trait AnyDictionaryArray: Array {
1008 fn keys(&self) -> &dyn Array;
1010
1011 fn values(&self) -> &ArrayRef;
1013
1014 fn normalized_keys(&self) -> Vec<usize>;
1023
1024 fn with_values(&self, values: ArrayRef) -> ArrayRef;
1028}
1029
1030impl<K: ArrowDictionaryKeyType> AnyDictionaryArray for DictionaryArray<K> {
1031 fn keys(&self) -> &dyn Array {
1032 &self.keys
1033 }
1034
1035 fn values(&self) -> &ArrayRef {
1036 self.values()
1037 }
1038
1039 fn normalized_keys(&self) -> Vec<usize> {
1040 let v_len = self.values().len();
1041 assert_ne!(v_len, 0);
1042 let iter = self.keys().values().iter();
1043 iter.map(|x| x.as_usize().min(v_len - 1)).collect()
1044 }
1045
1046 fn with_values(&self, values: ArrayRef) -> ArrayRef {
1047 Arc::new(self.with_values(values))
1048 }
1049}
1050
1051#[cfg(test)]
1052mod tests {
1053 use super::*;
1054 use crate::cast::as_dictionary_array;
1055 use crate::{Int8Array, Int16Array, Int32Array, RunArray};
1056 use arrow_buffer::{Buffer, ToByteSlice};
1057
1058 #[test]
1059 fn test_dictionary_array() {
1060 let value_data = ArrayData::builder(DataType::Int8)
1062 .len(8)
1063 .add_buffer(Buffer::from(
1064 [10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
1065 ))
1066 .build()
1067 .unwrap();
1068
1069 let keys = Buffer::from([2_i16, 3, 4].to_byte_slice());
1071
1072 let key_type = DataType::Int16;
1074 let value_type = DataType::Int8;
1075 let dict_data_type = DataType::Dictionary(Box::new(key_type), Box::new(value_type));
1076 let dict_data = ArrayData::builder(dict_data_type.clone())
1077 .len(3)
1078 .add_buffer(keys.clone())
1079 .add_child_data(value_data.clone())
1080 .build()
1081 .unwrap();
1082 let dict_array = Int16DictionaryArray::from(dict_data);
1083
1084 let values = dict_array.values();
1085 assert_eq!(value_data, values.to_data());
1086 assert_eq!(DataType::Int8, dict_array.value_type());
1087 assert_eq!(3, dict_array.len());
1088
1089 assert_eq!(0, dict_array.null_count());
1091 assert_eq!(0, dict_array.values().null_count());
1092 assert_eq!(dict_array.keys(), &Int16Array::from(vec![2_i16, 3, 4]));
1093
1094 let dict_data = ArrayData::builder(dict_data_type)
1096 .len(2)
1097 .offset(1)
1098 .add_buffer(keys)
1099 .add_child_data(value_data.clone())
1100 .build()
1101 .unwrap();
1102 let dict_array = Int16DictionaryArray::from(dict_data);
1103
1104 let values = dict_array.values();
1105 assert_eq!(value_data, values.to_data());
1106 assert_eq!(DataType::Int8, dict_array.value_type());
1107 assert_eq!(2, dict_array.len());
1108 assert_eq!(dict_array.keys(), &Int16Array::from(vec![3_i16, 4]));
1109 }
1110
1111 #[test]
1112 fn test_dictionary_builder_append_many() {
1113 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::new();
1114
1115 builder.append(1).unwrap();
1116 builder.append_n(2, 2).unwrap();
1117 builder.append_options(None, 2);
1118 builder.append_options(Some(3), 3);
1119
1120 let array = builder.finish();
1121
1122 let values = array
1123 .values()
1124 .as_primitive::<UInt32Type>()
1125 .iter()
1126 .map(Option::unwrap)
1127 .collect::<Vec<_>>();
1128 assert_eq!(values, &[1, 2, 3]);
1129 let keys = array.keys().iter().collect::<Vec<_>>();
1130 assert_eq!(
1131 keys,
1132 &[
1133 Some(0),
1134 Some(1),
1135 Some(1),
1136 None,
1137 None,
1138 Some(2),
1139 Some(2),
1140 Some(2)
1141 ]
1142 );
1143 }
1144
1145 #[test]
1146 fn test_string_dictionary_builder_append_many() {
1147 let mut builder = StringDictionaryBuilder::<Int8Type>::new();
1148
1149 builder.append("a").unwrap();
1150 builder.append_n("b", 2).unwrap();
1151 builder.append_options(None::<&str>, 2);
1152 builder.append_options(Some("c"), 3);
1153
1154 let array = builder.finish();
1155
1156 let values = array
1157 .values()
1158 .as_string::<i32>()
1159 .iter()
1160 .map(Option::unwrap)
1161 .collect::<Vec<_>>();
1162 assert_eq!(values, &["a", "b", "c"]);
1163 let keys = array.keys().iter().collect::<Vec<_>>();
1164 assert_eq!(
1165 keys,
1166 &[
1167 Some(0),
1168 Some(1),
1169 Some(1),
1170 None,
1171 None,
1172 Some(2),
1173 Some(2),
1174 Some(2)
1175 ]
1176 );
1177 }
1178
1179 #[test]
1180 fn test_dictionary_array_fmt_debug() {
1181 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
1182 builder.append(12345678).unwrap();
1183 builder.append_null();
1184 builder.append(22345678).unwrap();
1185 let array = builder.finish();
1186 assert_eq!(
1187 "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n 0,\n null,\n 1,\n] values: PrimitiveArray<UInt32>\n[\n 12345678,\n 22345678,\n]}\n",
1188 format!("{array:?}")
1189 );
1190
1191 let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(20, 2);
1192 for _ in 0..20 {
1193 builder.append(1).unwrap();
1194 }
1195 let array = builder.finish();
1196 assert_eq!(
1197 "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n] values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
1198 format!("{array:?}")
1199 );
1200 }
1201
1202 #[test]
1203 fn test_dictionary_array_from_iter() {
1204 let test = vec!["a", "a", "b", "c"];
1205 let array: DictionaryArray<Int8Type> = test
1206 .iter()
1207 .map(|&x| if x == "b" { None } else { Some(x) })
1208 .collect();
1209 assert_eq!(
1210 "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n 0,\n 0,\n null,\n 1,\n] values: StringArray\n[\n \"a\",\n \"c\",\n]}\n",
1211 format!("{array:?}")
1212 );
1213
1214 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1215 assert_eq!(
1216 "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n 0,\n 0,\n 1,\n 2,\n] values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
1217 format!("{array:?}")
1218 );
1219 }
1220
1221 #[test]
1222 fn test_dictionary_array_reverse_lookup_key() {
1223 let test = vec!["a", "a", "b", "c"];
1224 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1225
1226 assert_eq!(array.lookup_key("c"), Some(2));
1227
1228 let test = vec!["t3", "t3", "t2", "t2", "t1", "t3", "t4", "t1", "t0"];
1230 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1231
1232 assert_eq!(array.lookup_key("t1"), Some(2));
1233 assert_eq!(array.lookup_key("non-existent"), None);
1234 }
1235
1236 #[test]
1237 fn test_dictionary_keys_as_primitive_array() {
1238 let test = vec!["a", "b", "c", "a"];
1239 let array: DictionaryArray<Int8Type> = test.into_iter().collect();
1240
1241 let keys = array.keys();
1242 assert_eq!(&DataType::Int8, keys.data_type());
1243 assert_eq!(0, keys.null_count());
1244 assert_eq!(&[0, 1, 2, 0], keys.values());
1245 }
1246
1247 #[test]
1248 fn test_dictionary_keys_as_primitive_array_with_null() {
1249 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
1250 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1251
1252 let keys = array.keys();
1253 assert_eq!(&DataType::Int32, keys.data_type());
1254 assert_eq!(3, keys.null_count());
1255
1256 assert!(keys.is_valid(0));
1257 assert!(!keys.is_valid(1));
1258 assert!(keys.is_valid(2));
1259 assert!(!keys.is_valid(3));
1260 assert!(!keys.is_valid(4));
1261 assert!(keys.is_valid(5));
1262
1263 assert_eq!(0, keys.value(0));
1264 assert_eq!(1, keys.value(2));
1265 assert_eq!(0, keys.value(5));
1266 }
1267
1268 #[test]
1269 fn test_dictionary_all_nulls() {
1270 let test = vec![None, None, None];
1271 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1272 array
1273 .into_data()
1274 .validate_full()
1275 .expect("All null array has valid array data");
1276 }
1277
1278 #[test]
1279 fn test_dictionary_iter() {
1280 let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1282 let keys = Int16Array::from_iter_values([2_i16, 3, 4]);
1283
1284 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1286
1287 let mut key_iter = dict_array.keys_iter();
1288 assert_eq!(2, key_iter.next().unwrap().unwrap());
1289 assert_eq!(3, key_iter.next().unwrap().unwrap());
1290 assert_eq!(4, key_iter.next().unwrap().unwrap());
1291 assert!(key_iter.next().is_none());
1292
1293 let mut iter = dict_array
1294 .values()
1295 .as_any()
1296 .downcast_ref::<Int8Array>()
1297 .unwrap()
1298 .take_iter(dict_array.keys_iter());
1299
1300 assert_eq!(12, iter.next().unwrap().unwrap());
1301 assert_eq!(13, iter.next().unwrap().unwrap());
1302 assert_eq!(14, iter.next().unwrap().unwrap());
1303 assert!(iter.next().is_none());
1304 }
1305
1306 #[test]
1307 fn test_dictionary_iter_with_null() {
1308 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
1309 let array: DictionaryArray<Int32Type> = test.into_iter().collect();
1310
1311 let mut iter = array
1312 .values()
1313 .as_any()
1314 .downcast_ref::<StringArray>()
1315 .unwrap()
1316 .take_iter(array.keys_iter());
1317
1318 assert_eq!("a", iter.next().unwrap().unwrap());
1319 assert!(iter.next().unwrap().is_none());
1320 assert_eq!("b", iter.next().unwrap().unwrap());
1321 assert!(iter.next().unwrap().is_none());
1322 assert!(iter.next().unwrap().is_none());
1323 assert_eq!("a", iter.next().unwrap().unwrap());
1324 assert!(iter.next().is_none());
1325 }
1326
1327 #[test]
1328 fn test_dictionary_key() {
1329 let keys = Int8Array::from(vec![Some(2), None, Some(1)]);
1330 let values = StringArray::from(vec!["foo", "bar", "baz", "blarg"]);
1331
1332 let array = DictionaryArray::new(keys, Arc::new(values));
1333 assert_eq!(array.key(0), Some(2));
1334 assert_eq!(array.key(1), None);
1335 assert_eq!(array.key(2), Some(1));
1336 }
1337
1338 #[test]
1339 fn test_try_new() {
1340 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1341 .into_iter()
1342 .collect();
1343 let keys: Int32Array = [Some(0), Some(2), None, Some(1)].into_iter().collect();
1344
1345 let array = DictionaryArray::new(keys, Arc::new(values));
1346 assert_eq!(array.keys().data_type(), &DataType::Int32);
1347 assert_eq!(array.values().data_type(), &DataType::Utf8);
1348
1349 assert_eq!(array.null_count(), 1);
1350 assert_eq!(array.logical_null_count(), 1);
1351
1352 assert!(array.keys().is_valid(0));
1353 assert!(array.keys().is_valid(1));
1354 assert!(array.keys().is_null(2));
1355 assert!(array.keys().is_valid(3));
1356
1357 assert_eq!(array.keys().value(0), 0);
1358 assert_eq!(array.keys().value(1), 2);
1359 assert_eq!(array.keys().value(3), 1);
1360
1361 assert_eq!(
1362 "DictionaryArray {keys: PrimitiveArray<Int32>\n[\n 0,\n 2,\n null,\n 1,\n] values: StringArray\n[\n \"foo\",\n \"bar\",\n \"baz\",\n]}\n",
1363 format!("{array:?}")
1364 );
1365 }
1366
1367 #[test]
1368 #[should_panic(expected = "Invalid dictionary key 3 at index 1, expected 0 <= key < 2")]
1369 fn test_try_new_index_too_large() {
1370 let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
1371 let keys: Int32Array = [Some(0), Some(3)].into_iter().collect();
1373 DictionaryArray::new(keys, Arc::new(values));
1374 }
1375
1376 #[test]
1377 #[should_panic(expected = "Invalid dictionary key -100 at index 0, expected 0 <= key < 2")]
1378 fn test_try_new_index_too_small() {
1379 let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
1380 let keys: Int32Array = [Some(-100)].into_iter().collect();
1381 DictionaryArray::new(keys, Arc::new(values));
1382 }
1383
1384 #[test]
1385 #[should_panic(expected = "DictionaryArray's data type must match, expected Int64 got Int32")]
1386 fn test_from_array_data_validation() {
1387 let a = DictionaryArray::<Int32Type>::from_iter(["32"]);
1388 let _ = DictionaryArray::<Int64Type>::from(a.into_data());
1389 }
1390
1391 #[test]
1392 fn test_into_primitive_dict_builder() {
1393 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1394 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1395
1396 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1397
1398 let boxed: ArrayRef = Arc::new(dict_array);
1399 let col: DictionaryArray<Int8Type> = as_dictionary_array(&boxed).clone();
1400
1401 drop(boxed);
1402
1403 let mut builder = col.into_primitive_dict_builder::<Int32Type>().unwrap();
1404
1405 let slice = builder.values_slice_mut();
1406 assert_eq!(slice, &[10, 12, 15]);
1407
1408 slice[0] = 4;
1409 slice[1] = 2;
1410 slice[2] = 1;
1411
1412 let values = Int32Array::from_iter_values([4_i32, 2, 1]);
1413 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1414
1415 let expected = DictionaryArray::new(keys, Arc::new(values));
1416
1417 let new_array = builder.finish();
1418 assert_eq!(expected, new_array);
1419 }
1420
1421 #[test]
1422 fn test_into_primitive_dict_builder_cloned_array() {
1423 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1424 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1425
1426 let dict_array = DictionaryArray::new(keys, Arc::new(values));
1427
1428 let boxed: ArrayRef = Arc::new(dict_array);
1429
1430 let col: DictionaryArray<Int8Type> = DictionaryArray::<Int8Type>::from(boxed.to_data());
1431 let err = col.into_primitive_dict_builder::<Int32Type>();
1432
1433 let returned = err.unwrap_err();
1434
1435 let values = Int32Array::from_iter_values([10_i32, 12, 15]);
1436 let keys = Int8Array::from_iter_values([1_i8, 0, 2, 0]);
1437
1438 let expected = DictionaryArray::new(keys, Arc::new(values));
1439 assert_eq!(expected, returned);
1440 }
1441
1442 #[test]
1443 fn test_occupancy() {
1444 let keys = Int32Array::new((100..200).collect(), None);
1445 let values = Int32Array::from(vec![0; 1024]);
1446 let dict = DictionaryArray::new(keys, Arc::new(values));
1447 for (idx, v) in dict.occupancy().iter().enumerate() {
1448 let expected = (100..200).contains(&idx);
1449 assert_eq!(v, expected, "{idx}");
1450 }
1451
1452 let keys = Int32Array::new(
1453 (0..100).collect(),
1454 Some((0..100).map(|x| x % 4 == 0).collect()),
1455 );
1456 let values = Int32Array::from(vec![0; 1024]);
1457 let dict = DictionaryArray::new(keys, Arc::new(values));
1458 for (idx, v) in dict.occupancy().iter().enumerate() {
1459 let expected = idx % 4 == 0 && idx < 100;
1460 assert_eq!(v, expected, "{idx}");
1461 }
1462 }
1463
1464 #[test]
1465 fn test_iterator_nulls() {
1466 let keys = Int32Array::new(
1467 vec![0, 700, 1, 2].into(),
1468 Some(NullBuffer::from(vec![true, false, true, true])),
1469 );
1470 let values = Int32Array::from(vec![Some(50), None, Some(2)]);
1471 let dict = DictionaryArray::new(keys, Arc::new(values));
1472 let values: Vec<_> = dict
1473 .downcast_dict::<Int32Array>()
1474 .unwrap()
1475 .into_iter()
1476 .collect();
1477 assert_eq!(values, &[Some(50), None, None, Some(2)])
1478 }
1479
1480 #[test]
1481 fn test_logical_nulls() -> Result<(), ArrowError> {
1482 let values = Arc::new(RunArray::try_new(
1483 &Int32Array::from(vec![1, 3, 7]),
1484 &Int32Array::from(vec![Some(1), None, Some(3)]),
1485 )?) as ArrayRef;
1486
1487 assert_eq!(values.null_count(), 0);
1489 assert_eq!(values.logical_null_count(), 2);
1490
1491 let dictionary = DictionaryArray::<Int8Type>::try_new(
1493 Int8Array::from((0..values.len()).map(|i| i as i8).collect::<Vec<_>>()),
1494 Arc::clone(&values),
1495 )?;
1496
1497 assert_eq!(dictionary.null_count(), 0);
1499 assert_eq!(dictionary.logical_null_count(), values.logical_null_count());
1501 assert_eq!(dictionary.logical_nulls(), values.logical_nulls());
1502 assert!(dictionary.is_nullable());
1503
1504 let dictionary = DictionaryArray::<Int8Type>::try_new(
1506 Int8Array::from(
1507 (0..values.len())
1508 .map(|i| i as i8)
1509 .map(|i| if i == 0 { None } else { Some(i) })
1510 .collect::<Vec<_>>(),
1511 ),
1512 Arc::clone(&values),
1513 )?;
1514
1515 assert_eq!(dictionary.null_count(), 1);
1517
1518 assert_eq!(
1520 dictionary.logical_null_count(),
1521 values.logical_null_count() + 1
1522 );
1523 assert!(dictionary.is_nullable());
1524
1525 Ok(())
1526 }
1527
1528 #[test]
1529 fn test_normalized_keys() {
1530 let values = vec![132, 0, 1].into();
1531 let nulls = NullBuffer::from(vec![false, true, true]);
1532 let keys = Int32Array::new(values, Some(nulls));
1533 let dictionary = DictionaryArray::new(keys, Arc::new(Int32Array::new_null(2)));
1534 assert_eq!(&dictionary.normalized_keys(), &[1, 0, 1])
1535 }
1536}