use std::marker::PhantomData;
use arrow::array::{Array, ArrayRef};
use arrow::datatypes::ArrowNativeType as _;
use arrow::datatypes::DataType;
use crate::datatype::{ColumnError, LogicalType, RefType, downcast_array};
pub struct Dictionary<K, V> {
_marker: PhantomData<fn() -> (K, V)>,
}
#[diagnostic::on_unimplemented(
message = "`{Self}` cannot be used as a dictionary key type",
label = "dictionary keys must be one of `i8`–`i64`, `u8`–`u64`",
note = "for nullable dictionary rows, use `Option<Dictionary<K, V>>` instead of `Dictionary<Option<K>, V>` — the keys are storage indices, not readable values"
)]
pub trait DictionaryKey: crate::ConcreteType {
type ArrowKeyType: arrow::datatypes::ArrowDictionaryKeyType;
}
macro_rules! impl_dictionary_key {
($rust:ty, $arrow:ty) => {
impl DictionaryKey for $rust {
type ArrowKeyType = $arrow;
}
};
}
impl_dictionary_key!(i8, arrow::datatypes::Int8Type);
impl_dictionary_key!(i16, arrow::datatypes::Int16Type);
impl_dictionary_key!(i32, arrow::datatypes::Int32Type);
impl_dictionary_key!(i64, arrow::datatypes::Int64Type);
impl_dictionary_key!(u8, arrow::datatypes::UInt8Type);
impl_dictionary_key!(u16, arrow::datatypes::UInt16Type);
impl_dictionary_key!(u32, arrow::datatypes::UInt32Type);
impl_dictionary_key!(u64, arrow::datatypes::UInt64Type);
pub struct TypedDictionary<K: DictionaryKey, V: LogicalType> {
dictionary: arrow::array::DictionaryArray<K::ArrowKeyType>,
values: V::Typed,
}
impl<K: DictionaryKey, V: LogicalType> Clone for TypedDictionary<K, V> {
fn clone(&self) -> Self {
Self {
dictionary: self.dictionary.clone(),
values: self.values.clone(),
}
}
}
impl<K: DictionaryKey + 'static, V: LogicalType + 'static> LogicalType for Dictionary<K, V> {
type Typed = TypedDictionary<K, V>;
type Value<'a> = V::Value<'a>;
type Owned = V::Owned;
fn downcast(array: &dyn Array) -> Result<Self::Typed, ColumnError> {
let dictionary =
downcast_array::<arrow::array::DictionaryArray<K::ArrowKeyType>>(array, || {
format!("Dictionary({:?}, …)", K::datatype())
})?;
if !V::NULLABLE && 0 < dictionary.values().null_count() {
let logical = dictionary
.logical_nulls()
.map_or(0, |nulls| nulls.null_count());
let null_keys = dictionary.keys().null_count();
let referenced_null_entries = logical.saturating_sub(null_keys);
if 0 < referenced_null_entries {
return Err(ColumnError::UnexpectedNulls {
null_count: referenced_null_entries,
});
}
}
let values = V::downcast(&**dictionary.values())?;
Ok(TypedDictionary { dictionary, values })
}
#[inline]
fn is_null(typed: &Self::Typed, index: usize) -> bool {
typed.dictionary.is_null(index)
}
#[inline]
unsafe fn is_null_unchecked(typed: &Self::Typed, index: usize) -> bool {
unsafe { crate::datatype::leaf_is_null_unchecked(&typed.dictionary, index) }
}
#[inline]
fn value(typed: &Self::Typed, index: usize) -> Self::Value<'_> {
let key = typed.dictionary.keys().value(index).as_usize();
V::value(&typed.values, key)
}
#[inline]
unsafe fn value_unchecked(typed: &Self::Typed, index: usize) -> Self::Value<'_> {
let key = unsafe { typed.dictionary.keys().value_unchecked(index) }.as_usize();
V::value(&typed.values, key)
}
fn to_owned_value(value: Self::Value<'_>) -> Self::Owned {
V::to_owned_value(value)
}
}
impl<K: DictionaryKey + 'static, V: crate::ConcreteType + 'static> crate::ConcreteType
for Dictionary<K, V>
{
fn datatype() -> DataType {
DataType::Dictionary(Box::new(K::datatype()), Box::new(V::datatype()))
}
fn build(values: impl Iterator<Item = Option<Self::Owned>>) -> Result<ArrayRef, ColumnError> {
let plain = V::build(values)?;
arrow::compute::cast(&plain, &Self::datatype()).map_err(ColumnError::Build)
}
}
impl<K: DictionaryKey + 'static, V: RefType + 'static> RefType for Dictionary<K, V> {
type Ref = V::Ref;
fn value_ref(typed: &Self::Typed, index: usize) -> &Self::Ref {
let key = typed.dictionary.keys().value(index).as_usize();
V::value_ref(&typed.values, key)
}
}
impl<K, V, T> TryFrom<Vec<T>> for crate::Column<Dictionary<K, V>>
where
K: DictionaryKey + 'static,
V: crate::ConcreteType + 'static,
T: Into<V::Owned>,
{
type Error = ColumnError;
fn try_from(values: Vec<T>) -> Result<Self, Self::Error> {
Self::try_from_values(values)
}
}