1use std::marker::PhantomData;
4
5use arrow_array::{
6 builder::{
7 BinaryDictionaryBuilder, FixedSizeBinaryDictionaryBuilder, LargeBinaryDictionaryBuilder,
8 LargeStringDictionaryBuilder, PrimitiveDictionaryBuilder, StringDictionaryBuilder,
9 },
10 types::{
11 Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type, UInt16Type,
12 UInt32Type, UInt64Type,
13 },
14};
15use arrow_schema::DataType;
16
17use super::{ArrowBinding, binary::LargeBinary, strings::LargeUtf8};
18
19#[repr(transparent)]
27pub struct Dictionary<K, V>(V, PhantomData<K>);
28
29impl<K, V> Dictionary<K, V> {
30 #[inline]
32 pub fn new(value: V) -> Self {
33 Self(value, PhantomData)
34 }
35
36 #[inline]
38 pub fn value(&self) -> &V {
39 &self.0
40 }
41
42 #[inline]
44 pub fn into_value(self) -> V {
45 self.0
46 }
47}
48
49impl<K, V> From<V> for Dictionary<K, V> {
50 #[inline]
51 fn from(value: V) -> Self {
52 Self::new(value)
53 }
54}
55
56pub trait DictKey {
58 type ArrowKey;
60
61 fn data_type() -> DataType;
63}
64
65macro_rules! impl_dict_key {
66 ($rust:ty, $arrow:ty, $dt:expr) => {
67 impl DictKey for $rust {
68 type ArrowKey = $arrow;
69 fn data_type() -> DataType {
70 $dt
71 }
72 }
73 };
74}
75
76impl_dict_key!(i8, Int8Type, DataType::Int8);
77impl_dict_key!(i16, Int16Type, DataType::Int16);
78impl_dict_key!(i32, Int32Type, DataType::Int32);
79impl_dict_key!(i64, Int64Type, DataType::Int64);
80impl_dict_key!(u8, UInt8Type, DataType::UInt8);
81impl_dict_key!(u16, UInt16Type, DataType::UInt16);
82impl_dict_key!(u32, UInt32Type, DataType::UInt32);
83impl_dict_key!(u64, UInt64Type, DataType::UInt64);
84
85impl<K> ArrowBinding for Dictionary<K, String>
87where
88 K: DictKey,
89 <K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
90{
91 type Builder = StringDictionaryBuilder<<K as DictKey>::ArrowKey>;
92 type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
93 fn data_type() -> DataType {
94 DataType::Dictionary(
95 Box::new(<K as DictKey>::data_type()),
96 Box::new(DataType::Utf8),
97 )
98 }
99 fn new_builder(_capacity: usize) -> Self::Builder {
100 StringDictionaryBuilder::new()
101 }
102 fn append_value(b: &mut Self::Builder, v: &Self) {
103 let _ = b.append(v.value().as_str());
104 }
105 fn append_null(b: &mut Self::Builder) {
106 b.append_null();
107 }
108 fn finish(mut b: Self::Builder) -> Self::Array {
109 b.finish()
110 }
111}
112
113impl<K> ArrowBinding for Dictionary<K, Vec<u8>>
115where
116 K: DictKey,
117 <K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
118{
119 type Builder = BinaryDictionaryBuilder<<K as DictKey>::ArrowKey>;
120 type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
121 fn data_type() -> DataType {
122 DataType::Dictionary(
123 Box::new(<K as DictKey>::data_type()),
124 Box::new(DataType::Binary),
125 )
126 }
127 fn new_builder(_capacity: usize) -> Self::Builder {
128 BinaryDictionaryBuilder::new()
129 }
130 fn append_value(b: &mut Self::Builder, v: &Self) {
131 let _ = b.append(v.value().as_slice());
132 }
133 fn append_null(b: &mut Self::Builder) {
134 b.append_null();
135 }
136 fn finish(mut b: Self::Builder) -> Self::Array {
137 b.finish()
138 }
139}
140
141impl<K, const N: usize> ArrowBinding for Dictionary<K, [u8; N]>
143where
144 K: DictKey,
145 <K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
146{
147 type Builder = FixedSizeBinaryDictionaryBuilder<<K as DictKey>::ArrowKey>;
148 type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
149 fn data_type() -> DataType {
150 DataType::Dictionary(
151 Box::new(<K as DictKey>::data_type()),
152 Box::new(DataType::FixedSizeBinary(
153 i32::try_from(N).expect("width fits i32"),
154 )),
155 )
156 }
157 fn new_builder(_capacity: usize) -> Self::Builder {
158 FixedSizeBinaryDictionaryBuilder::new(i32::try_from(N).expect("width fits i32"))
160 }
161 fn append_value(b: &mut Self::Builder, v: &Self) {
162 let _ = b.append(*v.value());
163 }
164 fn append_null(b: &mut Self::Builder) {
165 b.append_null();
166 }
167 fn finish(mut b: Self::Builder) -> Self::Array {
168 b.finish()
169 }
170}
171
172impl<K> ArrowBinding for Dictionary<K, LargeBinary>
174where
175 K: DictKey,
176 <K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
177{
178 type Builder = LargeBinaryDictionaryBuilder<<K as DictKey>::ArrowKey>;
179 type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
180 fn data_type() -> DataType {
181 DataType::Dictionary(
182 Box::new(<K as DictKey>::data_type()),
183 Box::new(DataType::LargeBinary),
184 )
185 }
186 fn new_builder(_capacity: usize) -> Self::Builder {
187 LargeBinaryDictionaryBuilder::new()
188 }
189 fn append_value(b: &mut Self::Builder, v: &Self) {
190 let _ = b.append(v.value().as_slice());
191 }
192 fn append_null(b: &mut Self::Builder) {
193 b.append_null();
194 }
195 fn finish(mut b: Self::Builder) -> Self::Array {
196 b.finish()
197 }
198}
199
200impl<K> ArrowBinding for Dictionary<K, LargeUtf8>
202where
203 K: DictKey,
204 <K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
205{
206 type Builder = LargeStringDictionaryBuilder<<K as DictKey>::ArrowKey>;
207 type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
208 fn data_type() -> DataType {
209 DataType::Dictionary(
210 Box::new(<K as DictKey>::data_type()),
211 Box::new(DataType::LargeUtf8),
212 )
213 }
214 fn new_builder(_capacity: usize) -> Self::Builder {
215 LargeStringDictionaryBuilder::new()
216 }
217 fn append_value(b: &mut Self::Builder, v: &Self) {
218 let _ = b.append(v.value().as_str());
219 }
220 fn append_null(b: &mut Self::Builder) {
221 b.append_null();
222 }
223 fn finish(mut b: Self::Builder) -> Self::Array {
224 b.finish()
225 }
226}
227
228macro_rules! impl_dict_primitive_value {
230 ($rust:ty, $atype:ty, $dt:expr) => {
231 impl<K> ArrowBinding for Dictionary<K, $rust>
232 where
233 K: DictKey,
234 <K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
235 {
236 type Builder = PrimitiveDictionaryBuilder<<K as DictKey>::ArrowKey, $atype>;
237 type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
238 fn data_type() -> DataType {
239 DataType::Dictionary(Box::new(<K as DictKey>::data_type()), Box::new($dt))
240 }
241 fn new_builder(_capacity: usize) -> Self::Builder {
242 PrimitiveDictionaryBuilder::<_, $atype>::new()
243 }
244 fn append_value(b: &mut Self::Builder, v: &Self) {
245 let _ = b.append(*v.value());
246 }
247 fn append_null(b: &mut Self::Builder) {
248 b.append_null();
249 }
250 fn finish(mut b: Self::Builder) -> Self::Array {
251 b.finish()
252 }
253 }
254 };
255}
256
257impl_dict_primitive_value!(i8, Int8Type, DataType::Int8);
258impl_dict_primitive_value!(i16, Int16Type, DataType::Int16);
259impl_dict_primitive_value!(i32, Int32Type, DataType::Int32);
260impl_dict_primitive_value!(i64, Int64Type, DataType::Int64);
261impl_dict_primitive_value!(u8, UInt8Type, DataType::UInt8);
262impl_dict_primitive_value!(u16, UInt16Type, DataType::UInt16);
263impl_dict_primitive_value!(u32, UInt32Type, DataType::UInt32);
264impl_dict_primitive_value!(u64, UInt64Type, DataType::UInt64);
265impl_dict_primitive_value!(f32, Float32Type, DataType::Float32);
266impl_dict_primitive_value!(f64, Float64Type, DataType::Float64);
267
268#[cfg(feature = "views")]
271impl<K, V> super::ArrowBindingView for Dictionary<K, V>
272where
273 K: DictKey + 'static,
274 V: ArrowBinding + super::ArrowBindingView + 'static,
275 <K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
276{
277 type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
278 type View<'a>
279 = V::View<'a>
280 where
281 Self: 'a;
282
283 fn get_view(
284 array: &Self::Array,
285 index: usize,
286 ) -> Result<Self::View<'_>, crate::schema::ViewAccessError> {
287 use arrow_array::Array;
288 use arrow_buffer::ArrowNativeType;
289
290 if index >= array.len() {
291 return Err(crate::schema::ViewAccessError::OutOfBounds {
292 index,
293 len: array.len(),
294 field_name: None,
295 });
296 }
297 if array.is_null(index) {
298 return Err(crate::schema::ViewAccessError::UnexpectedNull {
299 index,
300 field_name: None,
301 });
302 }
303
304 let keys = array.keys();
306 let key_value = keys.value(index);
307 let dict_index = key_value.as_usize();
308
309 let values_array = array.values();
311 let typed_values = values_array
312 .as_any()
313 .downcast_ref::<<V as super::ArrowBindingView>::Array>()
314 .ok_or_else(|| crate::schema::ViewAccessError::TypeMismatch {
315 expected: V::data_type(),
316 actual: values_array.data_type().clone(),
317 field_name: None,
318 })?;
319
320 V::get_view(typed_values, dict_index)
322 }
323}
324
325#[cfg(feature = "views")]
331impl<K> TryFrom<&str> for Dictionary<K, String>
332where
333 K: DictKey,
334{
335 type Error = crate::schema::ViewAccessError;
336
337 fn try_from(view: &str) -> Result<Self, Self::Error> {
338 Ok(Dictionary::new(view.into()))
339 }
340}
341
342#[cfg(feature = "views")]
344impl<K> TryFrom<&[u8]> for Dictionary<K, Vec<u8>>
345where
346 K: DictKey,
347{
348 type Error = crate::schema::ViewAccessError;
349
350 fn try_from(view: &[u8]) -> Result<Self, Self::Error> {
351 Ok(Dictionary::new(view.to_vec()))
352 }
353}
354
355#[cfg(feature = "views")]
357impl<K, const N: usize> TryFrom<&[u8]> for Dictionary<K, [u8; N]>
358where
359 K: DictKey,
360{
361 type Error = crate::schema::ViewAccessError;
362
363 fn try_from(view: &[u8]) -> Result<Self, Self::Error> {
364 let arr: [u8; N] =
365 view.try_into()
366 .map_err(|_| crate::schema::ViewAccessError::TypeMismatch {
367 expected: arrow_schema::DataType::FixedSizeBinary(N as i32),
368 actual: arrow_schema::DataType::Binary,
369 field_name: None,
370 })?;
371 Ok(Dictionary::new(arr))
372 }
373}
374
375#[cfg(feature = "views")]
377impl<K> TryFrom<&[u8]> for Dictionary<K, super::binary::LargeBinary>
378where
379 K: DictKey,
380{
381 type Error = crate::schema::ViewAccessError;
382
383 fn try_from(view: &[u8]) -> Result<Self, Self::Error> {
384 Ok(Dictionary::new(super::binary::LargeBinary::new(
385 view.to_vec(),
386 )))
387 }
388}
389
390#[cfg(feature = "views")]
392impl<K> TryFrom<&str> for Dictionary<K, super::strings::LargeUtf8>
393where
394 K: DictKey,
395{
396 type Error = crate::schema::ViewAccessError;
397
398 fn try_from(view: &str) -> Result<Self, Self::Error> {
399 Ok(Dictionary::new(super::strings::LargeUtf8::new(
400 view.to_string(),
401 )))
402 }
403}
404
405