1use std::marker::PhantomData;
4
5use arrow_array::{
6 builder::{
7 BinaryDictionaryBuilder, FixedSizeBinaryDictionaryBuilder, LargeBinaryDictionaryBuilder,
8 LargeStringDictionaryBuilder, PrimitiveDictionaryBuilder, StringDictionaryBuilder,
9 },
10 types::{
11 Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type, UInt16Type,
12 UInt32Type, UInt64Type,
13 },
14};
15use arrow_schema::DataType;
16
17use super::{ArrowBinding, binary::LargeBinary, strings::LargeUtf8};
18
19#[derive(Debug, Clone, PartialEq)]
27#[repr(transparent)]
28pub struct Dictionary<K, V>(V, PhantomData<K>);
29
30impl<K, V> Dictionary<K, V> {
31 #[inline]
33 pub fn new(value: V) -> Self {
34 Self(value, PhantomData)
35 }
36
37 #[inline]
39 pub fn value(&self) -> &V {
40 &self.0
41 }
42
43 #[inline]
45 pub fn into_value(self) -> V {
46 self.0
47 }
48}
49
50#[cfg(feature = "serde")]
52impl<'de, K, V> serde::de::Deserialize<'de> for Dictionary<K, V>
53where
54 V: serde::de::Deserialize<'de>,
55{
56 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
57 where
58 D: serde::de::Deserializer<'de>,
59 {
60 Ok(Self(V::deserialize(deserializer)?, PhantomData))
61 }
62}
63
64#[cfg(feature = "serde")]
65impl<K, V> serde::Serialize for Dictionary<K, V>
66where
67 V: serde::Serialize,
68{
69 fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
70 self.0.serialize(serializer)
71 }
72}
73
74impl<K, V> From<V> for Dictionary<K, V> {
75 #[inline]
76 fn from(value: V) -> Self {
77 Self::new(value)
78 }
79}
80
81pub trait DictKey {
83 type ArrowKey;
85
86 fn data_type() -> DataType;
88}
89
90macro_rules! impl_dict_key {
91 ($rust:ty, $arrow:ty, $dt:expr) => {
92 impl DictKey for $rust {
93 type ArrowKey = $arrow;
94 fn data_type() -> DataType {
95 $dt
96 }
97 }
98 };
99}
100
101impl_dict_key!(i8, Int8Type, DataType::Int8);
102impl_dict_key!(i16, Int16Type, DataType::Int16);
103impl_dict_key!(i32, Int32Type, DataType::Int32);
104impl_dict_key!(i64, Int64Type, DataType::Int64);
105impl_dict_key!(u8, UInt8Type, DataType::UInt8);
106impl_dict_key!(u16, UInt16Type, DataType::UInt16);
107impl_dict_key!(u32, UInt32Type, DataType::UInt32);
108impl_dict_key!(u64, UInt64Type, DataType::UInt64);
109
110impl<K> ArrowBinding for Dictionary<K, String>
112where
113 K: DictKey,
114 <K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
115{
116 type Builder = StringDictionaryBuilder<<K as DictKey>::ArrowKey>;
117 type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
118 fn data_type() -> DataType {
119 DataType::Dictionary(
120 Box::new(<K as DictKey>::data_type()),
121 Box::new(DataType::Utf8),
122 )
123 }
124 fn new_builder(_capacity: usize) -> Self::Builder {
125 StringDictionaryBuilder::new()
126 }
127 fn append_value(b: &mut Self::Builder, v: &Self) {
128 let _ = b.append(v.value().as_str());
129 }
130 fn append_null(b: &mut Self::Builder) {
131 b.append_null();
132 }
133 fn finish(mut b: Self::Builder) -> Self::Array {
134 b.finish()
135 }
136}
137
138impl<K> ArrowBinding for Dictionary<K, Vec<u8>>
140where
141 K: DictKey,
142 <K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
143{
144 type Builder = BinaryDictionaryBuilder<<K as DictKey>::ArrowKey>;
145 type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
146 fn data_type() -> DataType {
147 DataType::Dictionary(
148 Box::new(<K as DictKey>::data_type()),
149 Box::new(DataType::Binary),
150 )
151 }
152 fn new_builder(_capacity: usize) -> Self::Builder {
153 BinaryDictionaryBuilder::new()
154 }
155 fn append_value(b: &mut Self::Builder, v: &Self) {
156 let _ = b.append(v.value().as_slice());
157 }
158 fn append_null(b: &mut Self::Builder) {
159 b.append_null();
160 }
161 fn finish(mut b: Self::Builder) -> Self::Array {
162 b.finish()
163 }
164}
165
166impl<K, const N: usize> ArrowBinding for Dictionary<K, [u8; N]>
168where
169 K: DictKey,
170 <K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
171{
172 type Builder = FixedSizeBinaryDictionaryBuilder<<K as DictKey>::ArrowKey>;
173 type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
174 fn data_type() -> DataType {
175 DataType::Dictionary(
176 Box::new(<K as DictKey>::data_type()),
177 Box::new(DataType::FixedSizeBinary(
178 i32::try_from(N).expect("width fits i32"),
179 )),
180 )
181 }
182 fn new_builder(_capacity: usize) -> Self::Builder {
183 FixedSizeBinaryDictionaryBuilder::new(i32::try_from(N).expect("width fits i32"))
185 }
186 fn append_value(b: &mut Self::Builder, v: &Self) {
187 let _ = b.append(*v.value());
188 }
189 fn append_null(b: &mut Self::Builder) {
190 b.append_null();
191 }
192 fn finish(mut b: Self::Builder) -> Self::Array {
193 b.finish()
194 }
195}
196
197impl<K> ArrowBinding for Dictionary<K, LargeBinary>
199where
200 K: DictKey,
201 <K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
202{
203 type Builder = LargeBinaryDictionaryBuilder<<K as DictKey>::ArrowKey>;
204 type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
205 fn data_type() -> DataType {
206 DataType::Dictionary(
207 Box::new(<K as DictKey>::data_type()),
208 Box::new(DataType::LargeBinary),
209 )
210 }
211 fn new_builder(_capacity: usize) -> Self::Builder {
212 LargeBinaryDictionaryBuilder::new()
213 }
214 fn append_value(b: &mut Self::Builder, v: &Self) {
215 let _ = b.append(v.value().as_slice());
216 }
217 fn append_null(b: &mut Self::Builder) {
218 b.append_null();
219 }
220 fn finish(mut b: Self::Builder) -> Self::Array {
221 b.finish()
222 }
223}
224
225impl<K> ArrowBinding for Dictionary<K, LargeUtf8>
227where
228 K: DictKey,
229 <K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
230{
231 type Builder = LargeStringDictionaryBuilder<<K as DictKey>::ArrowKey>;
232 type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
233 fn data_type() -> DataType {
234 DataType::Dictionary(
235 Box::new(<K as DictKey>::data_type()),
236 Box::new(DataType::LargeUtf8),
237 )
238 }
239 fn new_builder(_capacity: usize) -> Self::Builder {
240 LargeStringDictionaryBuilder::new()
241 }
242 fn append_value(b: &mut Self::Builder, v: &Self) {
243 let _ = b.append(v.value().as_str());
244 }
245 fn append_null(b: &mut Self::Builder) {
246 b.append_null();
247 }
248 fn finish(mut b: Self::Builder) -> Self::Array {
249 b.finish()
250 }
251}
252
253macro_rules! impl_dict_primitive_value {
255 ($rust:ty, $atype:ty, $dt:expr) => {
256 impl<K> ArrowBinding for Dictionary<K, $rust>
257 where
258 K: DictKey,
259 <K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
260 {
261 type Builder = PrimitiveDictionaryBuilder<<K as DictKey>::ArrowKey, $atype>;
262 type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
263 fn data_type() -> DataType {
264 DataType::Dictionary(Box::new(<K as DictKey>::data_type()), Box::new($dt))
265 }
266 fn new_builder(_capacity: usize) -> Self::Builder {
267 PrimitiveDictionaryBuilder::<_, $atype>::new()
268 }
269 fn append_value(b: &mut Self::Builder, v: &Self) {
270 let _ = b.append(*v.value());
271 }
272 fn append_null(b: &mut Self::Builder) {
273 b.append_null();
274 }
275 fn finish(mut b: Self::Builder) -> Self::Array {
276 b.finish()
277 }
278 }
279 };
280}
281
282impl_dict_primitive_value!(i8, Int8Type, DataType::Int8);
283impl_dict_primitive_value!(i16, Int16Type, DataType::Int16);
284impl_dict_primitive_value!(i32, Int32Type, DataType::Int32);
285impl_dict_primitive_value!(i64, Int64Type, DataType::Int64);
286impl_dict_primitive_value!(u8, UInt8Type, DataType::UInt8);
287impl_dict_primitive_value!(u16, UInt16Type, DataType::UInt16);
288impl_dict_primitive_value!(u32, UInt32Type, DataType::UInt32);
289impl_dict_primitive_value!(u64, UInt64Type, DataType::UInt64);
290impl_dict_primitive_value!(f32, Float32Type, DataType::Float32);
291impl_dict_primitive_value!(f64, Float64Type, DataType::Float64);
292
293#[cfg(feature = "views")]
296impl<K, V> super::ArrowBindingView for Dictionary<K, V>
297where
298 K: DictKey + 'static,
299 V: ArrowBinding + super::ArrowBindingView + 'static,
300 <K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
301{
302 type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
303 type View<'a>
304 = V::View<'a>
305 where
306 Self: 'a;
307
308 fn get_view(
309 array: &Self::Array,
310 index: usize,
311 ) -> Result<Self::View<'_>, crate::schema::ViewAccessError> {
312 use arrow_array::Array;
313 use arrow_buffer::ArrowNativeType;
314
315 if index >= array.len() {
316 return Err(crate::schema::ViewAccessError::OutOfBounds {
317 index,
318 len: array.len(),
319 field_name: None,
320 });
321 }
322 if array.is_null(index) {
323 return Err(crate::schema::ViewAccessError::UnexpectedNull {
324 index,
325 field_name: None,
326 });
327 }
328
329 let keys = array.keys();
331 let key_value = keys.value(index);
332 let dict_index = key_value.as_usize();
333
334 let values_array = array.values();
336 let typed_values = values_array
337 .as_any()
338 .downcast_ref::<<V as super::ArrowBindingView>::Array>()
339 .ok_or_else(|| crate::schema::ViewAccessError::TypeMismatch {
340 expected: V::data_type(),
341 actual: values_array.data_type().clone(),
342 field_name: None,
343 })?;
344
345 V::get_view(typed_values, dict_index)
347 }
348}
349
350#[cfg(feature = "views")]
356impl<K> TryFrom<&str> for Dictionary<K, String>
357where
358 K: DictKey,
359{
360 type Error = crate::schema::ViewAccessError;
361
362 fn try_from(view: &str) -> Result<Self, Self::Error> {
363 Ok(Dictionary::new(view.into()))
364 }
365}
366
367#[cfg(feature = "views")]
369impl<K> TryFrom<&[u8]> for Dictionary<K, Vec<u8>>
370where
371 K: DictKey,
372{
373 type Error = crate::schema::ViewAccessError;
374
375 fn try_from(view: &[u8]) -> Result<Self, Self::Error> {
376 Ok(Dictionary::new(view.to_vec()))
377 }
378}
379
380#[cfg(feature = "views")]
382impl<K, const N: usize> TryFrom<&[u8]> for Dictionary<K, [u8; N]>
383where
384 K: DictKey,
385{
386 type Error = crate::schema::ViewAccessError;
387
388 fn try_from(view: &[u8]) -> Result<Self, Self::Error> {
389 let arr: [u8; N] =
390 view.try_into()
391 .map_err(|_| crate::schema::ViewAccessError::TypeMismatch {
392 expected: arrow_schema::DataType::FixedSizeBinary(N as i32),
393 actual: arrow_schema::DataType::Binary,
394 field_name: None,
395 })?;
396 Ok(Dictionary::new(arr))
397 }
398}
399
400#[cfg(feature = "views")]
402impl<K> TryFrom<&[u8]> for Dictionary<K, super::binary::LargeBinary>
403where
404 K: DictKey,
405{
406 type Error = crate::schema::ViewAccessError;
407
408 fn try_from(view: &[u8]) -> Result<Self, Self::Error> {
409 Ok(Dictionary::new(super::binary::LargeBinary::new(
410 view.to_vec(),
411 )))
412 }
413}
414
415#[cfg(feature = "views")]
417impl<K> TryFrom<&str> for Dictionary<K, super::strings::LargeUtf8>
418where
419 K: DictKey,
420{
421 type Error = crate::schema::ViewAccessError;
422
423 fn try_from(view: &str) -> Result<Self, Self::Error> {
424 Ok(Dictionary::new(super::strings::LargeUtf8::new(
425 view.to_string(),
426 )))
427 }
428}
429
430