re_arrow2/array/dictionary/
mod.rs1use std::{hash::Hash, hint::unreachable_unchecked, sync::Arc};
2
3use crate::{
4 bitmap::{
5 utils::{BitmapIter, ZipValidity},
6 Bitmap,
7 },
8 datatypes::{DataType, IntegerType},
9 error::Error,
10 scalar::{new_scalar, Scalar},
11 trusted_len::TrustedLen,
12 types::NativeType,
13};
14
15#[cfg(feature = "arrow")]
16mod data;
17mod ffi;
18pub(super) mod fmt;
19mod iterator;
20mod mutable;
21use crate::array::specification::check_indexes_unchecked;
22mod typed_iterator;
23mod value_map;
24
25use crate::array::dictionary::typed_iterator::{DictValue, DictionaryValuesIterTyped};
26pub use iterator::*;
27pub use mutable::*;
28
29use super::{new_empty_array, primitive::PrimitiveArray, Array};
30use super::{new_null_array, specification::check_indexes};
31
32pub unsafe trait DictionaryKey: NativeType + TryInto<usize> + TryFrom<usize> + Hash {
38 const KEY_TYPE: IntegerType;
40
41 #[inline]
45 unsafe fn as_usize(self) -> usize {
46 match self.try_into() {
47 Ok(v) => v,
48 Err(_) => unreachable_unchecked(),
49 }
50 }
51
52 fn always_fits_usize() -> bool {
54 false
55 }
56}
57
58unsafe impl DictionaryKey for i8 {
59 const KEY_TYPE: IntegerType = IntegerType::Int8;
60}
61unsafe impl DictionaryKey for i16 {
62 const KEY_TYPE: IntegerType = IntegerType::Int16;
63}
64unsafe impl DictionaryKey for i32 {
65 const KEY_TYPE: IntegerType = IntegerType::Int32;
66}
67unsafe impl DictionaryKey for i64 {
68 const KEY_TYPE: IntegerType = IntegerType::Int64;
69}
70unsafe impl DictionaryKey for u8 {
71 const KEY_TYPE: IntegerType = IntegerType::UInt8;
72
73 fn always_fits_usize() -> bool {
74 true
75 }
76}
77unsafe impl DictionaryKey for u16 {
78 const KEY_TYPE: IntegerType = IntegerType::UInt16;
79
80 fn always_fits_usize() -> bool {
81 true
82 }
83}
84unsafe impl DictionaryKey for u32 {
85 const KEY_TYPE: IntegerType = IntegerType::UInt32;
86
87 fn always_fits_usize() -> bool {
88 true
89 }
90}
91unsafe impl DictionaryKey for u64 {
92 const KEY_TYPE: IntegerType = IntegerType::UInt64;
93
94 #[cfg(target_pointer_width = "64")]
95 fn always_fits_usize() -> bool {
96 true
97 }
98}
99
100#[derive(Clone)]
108pub struct DictionaryArray<K: DictionaryKey> {
109 data_type: DataType,
110 keys: PrimitiveArray<K>,
111 values: Box<dyn Array>,
112}
113
114fn check_data_type(
115 key_type: IntegerType,
116 data_type: &DataType,
117 values_data_type: &DataType,
118) -> Result<(), Error> {
119 if let DataType::Dictionary(key, value, _) = data_type.to_logical_type() {
120 if *key != key_type {
121 return Err(Error::oos(
122 "DictionaryArray must be initialized with a DataType::Dictionary whose integer is compatible to its keys",
123 ));
124 }
125 if value.as_ref().to_logical_type() != values_data_type.to_logical_type() {
126 return Err(Error::oos(
127 "DictionaryArray must be initialized with a DataType::Dictionary whose value is equal to its values",
128 ));
129 }
130 } else {
131 return Err(Error::oos(
132 "DictionaryArray must be initialized with logical DataType::Dictionary",
133 ));
134 }
135 Ok(())
136}
137
138impl<K: DictionaryKey> DictionaryArray<K> {
139 pub fn try_new(
149 data_type: DataType,
150 keys: PrimitiveArray<K>,
151 values: Box<dyn Array>,
152 ) -> Result<Self, Error> {
153 check_data_type(K::KEY_TYPE, &data_type, values.data_type())?;
154
155 if keys.null_count() != keys.len() {
156 if K::always_fits_usize() {
157 unsafe { check_indexes_unchecked(keys.values(), values.len()) }?;
160 } else {
161 check_indexes(keys.values(), values.len())?;
162 }
163 }
164
165 Ok(Self {
166 data_type,
167 keys,
168 values,
169 })
170 }
171
172 pub fn try_from_keys(keys: PrimitiveArray<K>, values: Box<dyn Array>) -> Result<Self, Error> {
179 let data_type = Self::default_data_type(values.data_type().clone());
180 Self::try_new(data_type, keys, values)
181 }
182
183 pub unsafe fn try_new_unchecked(
192 data_type: DataType,
193 keys: PrimitiveArray<K>,
194 values: Box<dyn Array>,
195 ) -> Result<Self, Error> {
196 check_data_type(K::KEY_TYPE, &data_type, values.data_type())?;
197
198 Ok(Self {
199 data_type,
200 keys,
201 values,
202 })
203 }
204
205 pub fn new_empty(data_type: DataType) -> Self {
207 let values = Self::try_get_child(&data_type).unwrap();
208 let values = new_empty_array(values.clone());
209 Self::try_new(
210 data_type,
211 PrimitiveArray::<K>::new_empty(K::PRIMITIVE.into()),
212 values,
213 )
214 .unwrap()
215 }
216
217 #[inline]
219 pub fn new_null(data_type: DataType, length: usize) -> Self {
220 let values = Self::try_get_child(&data_type).unwrap();
221 let values = new_null_array(values.clone(), 1);
222 Self::try_new(
223 data_type,
224 PrimitiveArray::<K>::new_null(K::PRIMITIVE.into(), length),
225 values,
226 )
227 .unwrap()
228 }
229
230 pub fn iter(&self) -> ZipValidity<Box<dyn Scalar>, DictionaryValuesIter<K>, BitmapIter> {
235 ZipValidity::new_with_validity(DictionaryValuesIter::new(self), self.keys.validity())
236 }
237
238 pub fn values_iter(&self) -> DictionaryValuesIter<K> {
243 DictionaryValuesIter::new(self)
244 }
245
246 pub fn values_iter_typed<V: DictValue>(
253 &self,
254 ) -> Result<DictionaryValuesIterTyped<K, V>, Error> {
255 let keys = &self.keys;
256 assert_eq!(keys.null_count(), 0);
257 let values = self.values.as_ref();
258 let values = V::downcast_values(values)?;
259 Ok(unsafe { DictionaryValuesIterTyped::new(keys, values) })
260 }
261
262 pub fn iter_typed<V: DictValue>(
268 &self,
269 ) -> Result<ZipValidity<V::IterValue<'_>, DictionaryValuesIterTyped<K, V>, BitmapIter>, Error>
270 {
271 let keys = &self.keys;
272 let values = self.values.as_ref();
273 let values = V::downcast_values(values)?;
274 let values_iter = unsafe { DictionaryValuesIterTyped::new(keys, values) };
275 Ok(ZipValidity::new_with_validity(values_iter, self.validity()))
276 }
277
278 #[inline]
280 pub fn data_type(&self) -> &DataType {
281 &self.data_type
282 }
283
284 #[inline]
286 pub fn is_ordered(&self) -> bool {
287 match self.data_type.to_logical_type() {
288 DataType::Dictionary(_, _, is_ordered) => *is_ordered,
289 _ => unreachable!(),
290 }
291 }
292
293 pub(crate) fn default_data_type(values_datatype: DataType) -> DataType {
294 DataType::Dictionary(K::KEY_TYPE, Arc::new(values_datatype), false)
295 }
296
297 pub fn slice(&mut self, offset: usize, length: usize) {
301 self.keys.slice(offset, length);
302 }
303
304 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
308 self.keys.slice_unchecked(offset, length);
309 }
310
311 impl_sliced!();
312
313 #[must_use]
317 pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
318 self.set_validity(validity);
319 self
320 }
321
322 pub fn set_validity(&mut self, validity: Option<Bitmap>) {
326 self.keys.set_validity(validity);
327 }
328
329 impl_into_array!();
330
331 #[inline]
333 pub fn len(&self) -> usize {
334 self.keys.len()
335 }
336
337 #[inline]
339 pub fn validity(&self) -> Option<&Bitmap> {
340 self.keys.validity()
341 }
342
343 #[inline]
346 pub fn keys(&self) -> &PrimitiveArray<K> {
347 &self.keys
348 }
349
350 #[inline]
352 pub fn keys_values_iter(&self) -> impl TrustedLen<Item = usize> + Clone + '_ {
353 self.keys.values_iter().map(|x| unsafe { x.as_usize() })
355 }
356
357 #[inline]
359 pub fn keys_iter(&self) -> impl TrustedLen<Item = Option<usize>> + Clone + '_ {
360 self.keys.iter().map(|x| x.map(|x| unsafe { x.as_usize() }))
362 }
363
364 #[inline]
368 pub fn key_value(&self, index: usize) -> usize {
369 unsafe { self.keys.values()[index].as_usize() }
371 }
372
373 #[inline]
375 pub fn values(&self) -> &Box<dyn Array> {
376 &self.values
377 }
378
379 #[inline]
386 pub fn value(&self, index: usize) -> Box<dyn Scalar> {
387 let index = unsafe { self.keys.value(index).as_usize() };
389 new_scalar(self.values.as_ref(), index)
390 }
391
392 pub(crate) fn try_get_child(data_type: &DataType) -> Result<&DataType, Error> {
393 Ok(match data_type.to_logical_type() {
394 DataType::Dictionary(_, values, _) => values.as_ref(),
395 _ => {
396 return Err(Error::oos(
397 "Dictionaries must be initialized with DataType::Dictionary",
398 ))
399 }
400 })
401 }
402}
403
404impl<K: DictionaryKey> Array for DictionaryArray<K> {
405 impl_common_array!();
406
407 fn validity(&self) -> Option<&Bitmap> {
408 self.keys.validity()
409 }
410
411 #[inline]
412 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
413 Box::new(self.clone().with_validity(validity))
414 }
415}