use std::marker::PhantomData;
use arrow_array::{
builder::{
BinaryDictionaryBuilder, FixedSizeBinaryDictionaryBuilder, LargeBinaryDictionaryBuilder,
LargeStringDictionaryBuilder, PrimitiveDictionaryBuilder, StringDictionaryBuilder,
},
types::{
Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type, UInt16Type,
UInt32Type, UInt64Type,
},
};
use arrow_schema::DataType;
use super::{ArrowBinding, binary::LargeBinary, strings::LargeUtf8};
#[derive(Debug, Clone, PartialEq)]
#[repr(transparent)]
pub struct Dictionary<K, V>(V, PhantomData<K>);
impl<K, V> Dictionary<K, V> {
#[inline]
pub fn new(value: V) -> Self {
Self(value, PhantomData)
}
#[inline]
pub fn value(&self) -> &V {
&self.0
}
#[inline]
pub fn into_value(self) -> V {
self.0
}
}
#[cfg(feature = "serde")]
impl<'de, K, V> serde::de::Deserialize<'de> for Dictionary<K, V>
where
V: serde::de::Deserialize<'de>,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::de::Deserializer<'de>,
{
Ok(Self(V::deserialize(deserializer)?, PhantomData))
}
}
#[cfg(feature = "serde")]
impl<K, V> serde::Serialize for Dictionary<K, V>
where
V: serde::Serialize,
{
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
self.0.serialize(serializer)
}
}
impl<K, V> From<V> for Dictionary<K, V> {
#[inline]
fn from(value: V) -> Self {
Self::new(value)
}
}
pub trait DictKey {
type ArrowKey;
fn data_type() -> DataType;
}
macro_rules! impl_dict_key {
($rust:ty, $arrow:ty, $dt:expr) => {
impl DictKey for $rust {
type ArrowKey = $arrow;
fn data_type() -> DataType {
$dt
}
}
};
}
impl_dict_key!(i8, Int8Type, DataType::Int8);
impl_dict_key!(i16, Int16Type, DataType::Int16);
impl_dict_key!(i32, Int32Type, DataType::Int32);
impl_dict_key!(i64, Int64Type, DataType::Int64);
impl_dict_key!(u8, UInt8Type, DataType::UInt8);
impl_dict_key!(u16, UInt16Type, DataType::UInt16);
impl_dict_key!(u32, UInt32Type, DataType::UInt32);
impl_dict_key!(u64, UInt64Type, DataType::UInt64);
impl<K> ArrowBinding for Dictionary<K, String>
where
K: DictKey,
<K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
{
type Builder = StringDictionaryBuilder<<K as DictKey>::ArrowKey>;
type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
fn data_type() -> DataType {
DataType::Dictionary(
Box::new(<K as DictKey>::data_type()),
Box::new(DataType::Utf8),
)
}
fn new_builder(_capacity: usize) -> Self::Builder {
StringDictionaryBuilder::new()
}
fn append_value(b: &mut Self::Builder, v: &Self) {
let _ = b.append(v.value().as_str());
}
fn append_null(b: &mut Self::Builder) {
b.append_null();
}
fn finish(mut b: Self::Builder) -> Self::Array {
b.finish()
}
}
impl<K> ArrowBinding for Dictionary<K, Vec<u8>>
where
K: DictKey,
<K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
{
type Builder = BinaryDictionaryBuilder<<K as DictKey>::ArrowKey>;
type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
fn data_type() -> DataType {
DataType::Dictionary(
Box::new(<K as DictKey>::data_type()),
Box::new(DataType::Binary),
)
}
fn new_builder(_capacity: usize) -> Self::Builder {
BinaryDictionaryBuilder::new()
}
fn append_value(b: &mut Self::Builder, v: &Self) {
let _ = b.append(v.value().as_slice());
}
fn append_null(b: &mut Self::Builder) {
b.append_null();
}
fn finish(mut b: Self::Builder) -> Self::Array {
b.finish()
}
}
impl<K, const N: usize> ArrowBinding for Dictionary<K, [u8; N]>
where
K: DictKey,
<K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
{
type Builder = FixedSizeBinaryDictionaryBuilder<<K as DictKey>::ArrowKey>;
type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
fn data_type() -> DataType {
DataType::Dictionary(
Box::new(<K as DictKey>::data_type()),
Box::new(DataType::FixedSizeBinary(
i32::try_from(N).expect("width fits i32"),
)),
)
}
fn new_builder(_capacity: usize) -> Self::Builder {
FixedSizeBinaryDictionaryBuilder::new(i32::try_from(N).expect("width fits i32"))
}
fn append_value(b: &mut Self::Builder, v: &Self) {
let _ = b.append(*v.value());
}
fn append_null(b: &mut Self::Builder) {
b.append_null();
}
fn finish(mut b: Self::Builder) -> Self::Array {
b.finish()
}
}
impl<K> ArrowBinding for Dictionary<K, LargeBinary>
where
K: DictKey,
<K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
{
type Builder = LargeBinaryDictionaryBuilder<<K as DictKey>::ArrowKey>;
type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
fn data_type() -> DataType {
DataType::Dictionary(
Box::new(<K as DictKey>::data_type()),
Box::new(DataType::LargeBinary),
)
}
fn new_builder(_capacity: usize) -> Self::Builder {
LargeBinaryDictionaryBuilder::new()
}
fn append_value(b: &mut Self::Builder, v: &Self) {
let _ = b.append(v.value().as_slice());
}
fn append_null(b: &mut Self::Builder) {
b.append_null();
}
fn finish(mut b: Self::Builder) -> Self::Array {
b.finish()
}
}
impl<K> ArrowBinding for Dictionary<K, LargeUtf8>
where
K: DictKey,
<K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
{
type Builder = LargeStringDictionaryBuilder<<K as DictKey>::ArrowKey>;
type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
fn data_type() -> DataType {
DataType::Dictionary(
Box::new(<K as DictKey>::data_type()),
Box::new(DataType::LargeUtf8),
)
}
fn new_builder(_capacity: usize) -> Self::Builder {
LargeStringDictionaryBuilder::new()
}
fn append_value(b: &mut Self::Builder, v: &Self) {
let _ = b.append(v.value().as_str());
}
fn append_null(b: &mut Self::Builder) {
b.append_null();
}
fn finish(mut b: Self::Builder) -> Self::Array {
b.finish()
}
}
macro_rules! impl_dict_primitive_value {
($rust:ty, $atype:ty, $dt:expr) => {
impl<K> ArrowBinding for Dictionary<K, $rust>
where
K: DictKey,
<K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
{
type Builder = PrimitiveDictionaryBuilder<<K as DictKey>::ArrowKey, $atype>;
type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
fn data_type() -> DataType {
DataType::Dictionary(Box::new(<K as DictKey>::data_type()), Box::new($dt))
}
fn new_builder(_capacity: usize) -> Self::Builder {
PrimitiveDictionaryBuilder::<_, $atype>::new()
}
fn append_value(b: &mut Self::Builder, v: &Self) {
let _ = b.append(*v.value());
}
fn append_null(b: &mut Self::Builder) {
b.append_null();
}
fn finish(mut b: Self::Builder) -> Self::Array {
b.finish()
}
}
};
}
impl_dict_primitive_value!(i8, Int8Type, DataType::Int8);
impl_dict_primitive_value!(i16, Int16Type, DataType::Int16);
impl_dict_primitive_value!(i32, Int32Type, DataType::Int32);
impl_dict_primitive_value!(i64, Int64Type, DataType::Int64);
impl_dict_primitive_value!(u8, UInt8Type, DataType::UInt8);
impl_dict_primitive_value!(u16, UInt16Type, DataType::UInt16);
impl_dict_primitive_value!(u32, UInt32Type, DataType::UInt32);
impl_dict_primitive_value!(u64, UInt64Type, DataType::UInt64);
impl_dict_primitive_value!(f32, Float32Type, DataType::Float32);
impl_dict_primitive_value!(f64, Float64Type, DataType::Float64);
#[cfg(feature = "views")]
impl<K, V> super::ArrowBindingView for Dictionary<K, V>
where
K: DictKey + 'static,
V: ArrowBinding + super::ArrowBindingView + 'static,
<K as DictKey>::ArrowKey: arrow_array::types::ArrowDictionaryKeyType,
{
type Array = arrow_array::DictionaryArray<<K as DictKey>::ArrowKey>;
type View<'a>
= V::View<'a>
where
Self: 'a;
fn get_view(
array: &Self::Array,
index: usize,
) -> Result<Self::View<'_>, crate::schema::ViewAccessError> {
use arrow_array::Array;
use arrow_buffer::ArrowNativeType;
if index >= array.len() {
return Err(crate::schema::ViewAccessError::OutOfBounds {
index,
len: array.len(),
field_name: None,
});
}
if array.is_null(index) {
return Err(crate::schema::ViewAccessError::UnexpectedNull {
index,
field_name: None,
});
}
let keys = array.keys();
let key_value = keys.value(index);
let dict_index = key_value.as_usize();
let values_array = array.values();
let typed_values = values_array
.as_any()
.downcast_ref::<<V as super::ArrowBindingView>::Array>()
.ok_or_else(|| crate::schema::ViewAccessError::TypeMismatch {
expected: V::data_type(),
actual: values_array.data_type().clone(),
field_name: None,
})?;
V::get_view(typed_values, dict_index)
}
}
#[cfg(feature = "views")]
impl<K> TryFrom<&str> for Dictionary<K, String>
where
K: DictKey,
{
type Error = crate::schema::ViewAccessError;
fn try_from(view: &str) -> Result<Self, Self::Error> {
Ok(Dictionary::new(view.into()))
}
}
#[cfg(feature = "views")]
impl<K> TryFrom<&[u8]> for Dictionary<K, Vec<u8>>
where
K: DictKey,
{
type Error = crate::schema::ViewAccessError;
fn try_from(view: &[u8]) -> Result<Self, Self::Error> {
Ok(Dictionary::new(view.to_vec()))
}
}
#[cfg(feature = "views")]
impl<K, const N: usize> TryFrom<&[u8]> for Dictionary<K, [u8; N]>
where
K: DictKey,
{
type Error = crate::schema::ViewAccessError;
fn try_from(view: &[u8]) -> Result<Self, Self::Error> {
let arr: [u8; N] =
view.try_into()
.map_err(|_| crate::schema::ViewAccessError::TypeMismatch {
expected: arrow_schema::DataType::FixedSizeBinary(N as i32),
actual: arrow_schema::DataType::Binary,
field_name: None,
})?;
Ok(Dictionary::new(arr))
}
}
#[cfg(feature = "views")]
impl<K> TryFrom<&[u8]> for Dictionary<K, super::binary::LargeBinary>
where
K: DictKey,
{
type Error = crate::schema::ViewAccessError;
fn try_from(view: &[u8]) -> Result<Self, Self::Error> {
Ok(Dictionary::new(super::binary::LargeBinary::new(
view.to_vec(),
)))
}
}
#[cfg(feature = "views")]
impl<K> TryFrom<&str> for Dictionary<K, super::strings::LargeUtf8>
where
K: DictKey,
{
type Error = crate::schema::ViewAccessError;
fn try_from(view: &str) -> Result<Self, Self::Error> {
Ok(Dictionary::new(super::strings::LargeUtf8::new(
view.to_string(),
)))
}
}