use crate::data::types::DictionaryKeyType;
use crate::data::ArrayDataLayout;
use crate::{ArrayData, ArrayDataBuilder, Buffers};
use arrow_buffer::buffer::{NullBuffer, ScalarBuffer};
use arrow_buffer::ArrowNativeType;
use arrow_schema::DataType;
mod private {
use super::*;
pub trait DictionaryKeySealed {
fn downcast_ref(data: &ArrayDataDictionary) -> Option<&DictionaryArrayData<Self>>
where
Self: DictionaryKey;
fn downcast(data: ArrayDataDictionary) -> Option<DictionaryArrayData<Self>>
where
Self: DictionaryKey;
fn upcast(v: DictionaryArrayData<Self>) -> ArrayDataDictionary
where
Self: DictionaryKey;
}
}
pub trait DictionaryKey: private::DictionaryKeySealed + ArrowNativeType {
const TYPE: DictionaryKeyType;
}
macro_rules! dictionary {
($t:ty,$v:ident) => {
impl DictionaryKey for $t {
const TYPE: DictionaryKeyType = DictionaryKeyType::$v;
}
impl private::DictionaryKeySealed for $t {
fn downcast_ref(
data: &ArrayDataDictionary,
) -> Option<&DictionaryArrayData<Self>> {
match data {
ArrayDataDictionary::$v(v) => Some(v),
_ => None,
}
}
fn downcast(data: ArrayDataDictionary) -> Option<DictionaryArrayData<Self>> {
match data {
ArrayDataDictionary::$v(v) => Some(v),
_ => None,
}
}
fn upcast(v: DictionaryArrayData<Self>) -> ArrayDataDictionary {
ArrayDataDictionary::$v(v)
}
}
};
}
dictionary!(i8, Int8);
dictionary!(i16, Int16);
dictionary!(i32, Int32);
dictionary!(i64, Int64);
dictionary!(u8, UInt8);
dictionary!(u16, UInt16);
dictionary!(u32, UInt32);
dictionary!(u64, UInt64);
macro_rules! dictionary_op {
($array:ident, $op:block) => {
match $array {
ArrayDataDictionary::Int8($array) => $op
ArrayDataDictionary::Int16($array) => $op
ArrayDataDictionary::Int32($array) => $op
ArrayDataDictionary::Int64($array) => $op
ArrayDataDictionary::UInt8($array) => $op
ArrayDataDictionary::UInt16($array) => $op
ArrayDataDictionary::UInt32($array) => $op
ArrayDataDictionary::UInt64($array) => $op
}
};
}
#[derive(Debug, Clone)]
pub enum ArrayDataDictionary {
Int8(DictionaryArrayData<i8>),
Int16(DictionaryArrayData<i16>),
Int32(DictionaryArrayData<i32>),
Int64(DictionaryArrayData<i64>),
UInt8(DictionaryArrayData<u8>),
UInt16(DictionaryArrayData<u16>),
UInt32(DictionaryArrayData<u32>),
UInt64(DictionaryArrayData<u64>),
}
impl ArrayDataDictionary {
pub fn downcast_ref<K: DictionaryKey>(&self) -> Option<&DictionaryArrayData<K>> {
K::downcast_ref(self)
}
pub fn downcast<K: DictionaryKey>(self) -> Option<DictionaryArrayData<K>> {
K::downcast(self)
}
pub fn values(&self) -> &ArrayData {
let s = self;
dictionary_op!(s, { s.values() })
}
pub fn slice(&self, offset: usize, len: usize) -> Self {
let s = self;
dictionary_op!(s, { s.slice(offset, len).into() })
}
pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
let s = self;
dictionary_op!(s, { s.layout() })
}
pub(crate) unsafe fn from_raw(
builder: ArrayDataBuilder,
key: DictionaryKeyType,
) -> Self {
use DictionaryKeyType::*;
match key {
Int8 => Self::Int8(DictionaryArrayData::from_raw(builder)),
Int16 => Self::Int16(DictionaryArrayData::from_raw(builder)),
Int32 => Self::Int32(DictionaryArrayData::from_raw(builder)),
Int64 => Self::Int64(DictionaryArrayData::from_raw(builder)),
UInt8 => Self::UInt8(DictionaryArrayData::from_raw(builder)),
UInt16 => Self::UInt16(DictionaryArrayData::from_raw(builder)),
UInt32 => Self::UInt32(DictionaryArrayData::from_raw(builder)),
UInt64 => Self::UInt64(DictionaryArrayData::from_raw(builder)),
}
}
}
impl<K: DictionaryKey> From<DictionaryArrayData<K>> for ArrayDataDictionary {
fn from(value: DictionaryArrayData<K>) -> Self {
K::upcast(value)
}
}
#[derive(Debug, Clone)]
pub struct DictionaryArrayData<K: DictionaryKey> {
data_type: DataType,
nulls: Option<NullBuffer>,
keys: ScalarBuffer<K>,
values: Box<ArrayData>,
}
impl<K: DictionaryKey> DictionaryArrayData<K> {
pub unsafe fn new_unchecked(
data_type: DataType,
keys: ScalarBuffer<K>,
nulls: Option<NullBuffer>,
child: ArrayData,
) -> Self {
Self {
data_type,
nulls,
keys,
values: Box::new(child),
}
}
pub(crate) unsafe fn from_raw(builder: ArrayDataBuilder) -> Self {
let keys = builder.buffers.into_iter().next().unwrap();
let keys = ScalarBuffer::new(keys, builder.offset, builder.len);
let values = builder.child_data.into_iter().next().unwrap();
Self {
keys,
data_type: builder.data_type,
nulls: builder.nulls,
values: Box::new(values),
}
}
#[inline]
pub fn len(&self) -> usize {
self.keys.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.keys.is_empty()
}
#[inline]
pub fn nulls(&self) -> Option<&NullBuffer> {
self.nulls.as_ref()
}
#[inline]
pub fn keys(&self) -> &[K] {
&self.keys
}
#[inline]
pub fn values(&self) -> &ArrayData {
self.values.as_ref()
}
#[inline]
pub fn data_type(&self) -> &DataType {
&self.data_type
}
pub fn into_parts(
self,
) -> (DataType, ScalarBuffer<K>, Option<NullBuffer>, ArrayData) {
(self.data_type, self.keys, self.nulls, *self.values)
}
pub fn slice(&self, offset: usize, len: usize) -> Self {
Self {
keys: self.keys.slice(offset, len),
data_type: self.data_type.clone(),
nulls: self.nulls.as_ref().map(|x| x.slice(offset, len)),
values: self.values.clone(),
}
}
pub(crate) fn layout(&self) -> ArrayDataLayout<'_> {
ArrayDataLayout {
data_type: &self.data_type,
len: self.keys.len(),
offset: 0,
nulls: self.nulls.as_ref(),
buffers: Buffers::one(self.keys.inner()),
child_data: std::slice::from_ref(self.values.as_ref()),
}
}
}