use std::any::Any;
use std::fmt;
use std::iter::IntoIterator;
use std::mem;
use std::{convert::From, iter::FromIterator};
use super::{
make_array, Array, ArrayData, ArrayRef, PrimitiveArray, PrimitiveBuilder,
StringArray, StringBuilder, StringDictionaryBuilder,
};
use crate::datatypes::ArrowNativeType;
use crate::datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType, DataType};
pub struct DictionaryArray<K: ArrowPrimitiveType> {
data: ArrayData,
keys: PrimitiveArray<K>,
values: ArrayRef,
is_ordered: bool,
}
impl<'a, K: ArrowPrimitiveType> DictionaryArray<K> {
pub fn keys(&self) -> &PrimitiveArray<K> {
&self.keys
}
pub fn keys_array(&self) -> PrimitiveArray<K> {
let data = self.data_ref();
let keys_data = ArrayData::new(
K::DATA_TYPE,
data.len(),
Some(data.null_count()),
data.null_buffer().cloned(),
data.offset(),
data.buffers().to_vec(),
vec![],
);
PrimitiveArray::<K>::from(keys_data)
}
pub fn lookup_key(&self, value: &str) -> Option<K::Native> {
let rd_buf: &StringArray =
self.values.as_any().downcast_ref::<StringArray>().unwrap();
(0..rd_buf.len())
.position(|i| rd_buf.value(i) == value)
.map(K::Native::from_usize)
.flatten()
}
pub fn values(&self) -> ArrayRef {
self.values.clone()
}
pub fn value_type(&self) -> DataType {
self.values.data_ref().data_type().clone()
}
pub fn len(&self) -> usize {
self.keys.len()
}
pub fn is_empty(&self) -> bool {
self.keys.is_empty()
}
pub fn is_ordered(&self) -> bool {
self.is_ordered
}
}
impl<T: ArrowPrimitiveType> From<ArrayData> for DictionaryArray<T> {
fn from(data: ArrayData) -> Self {
assert_eq!(
data.buffers().len(),
1,
"DictionaryArray data should contain a single buffer only (keys)."
);
assert_eq!(
data.child_data().len(),
1,
"DictionaryArray should contain a single child array (values)."
);
if let DataType::Dictionary(key_data_type, _) = data.data_type() {
if key_data_type.as_ref() != &T::DATA_TYPE {
panic!("DictionaryArray's data type must match.")
};
let keys = PrimitiveArray::<T>::from(ArrayData::new(
T::DATA_TYPE,
data.len(),
Some(data.null_count()),
data.null_buffer().cloned(),
data.offset(),
data.buffers().to_vec(),
vec![],
));
let values = make_array(data.child_data()[0].clone());
Self {
data,
keys,
values,
is_ordered: false,
}
} else {
panic!("DictionaryArray must have Dictionary data type.")
}
}
}
impl<'a, T: ArrowPrimitiveType + ArrowDictionaryKeyType> FromIterator<Option<&'a str>>
for DictionaryArray<T>
{
fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
let it = iter.into_iter();
let (lower, _) = it.size_hint();
let key_builder = PrimitiveBuilder::<T>::new(lower);
let value_builder = StringBuilder::new(256);
let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
it.for_each(|i| {
if let Some(i) = i {
builder
.append(i)
.expect("Unable to append a value to a dictionary array.");
} else {
builder
.append_null()
.expect("Unable to append a null value to a dictionary array.");
}
});
builder.finish()
}
}
impl<'a, T: ArrowPrimitiveType + ArrowDictionaryKeyType> FromIterator<&'a str>
for DictionaryArray<T>
{
fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
let it = iter.into_iter();
let (lower, _) = it.size_hint();
let key_builder = PrimitiveBuilder::<T>::new(lower);
let value_builder = StringBuilder::new(256);
let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
it.for_each(|i| {
builder
.append(i)
.expect("Unable to append a value to a dictionary array.");
});
builder.finish()
}
}
impl<T: ArrowPrimitiveType> Array for DictionaryArray<T> {
fn as_any(&self) -> &Any {
self
}
fn data(&self) -> &ArrayData {
&self.data
}
fn get_buffer_memory_size(&self) -> usize {
self.data.get_buffer_memory_size()
}
fn get_array_memory_size(&self) -> usize {
self.data.get_array_memory_size()
+ self.keys.get_array_memory_size()
+ self.values.get_array_memory_size()
+ mem::size_of_val(self)
}
}
impl<T: ArrowPrimitiveType> fmt::Debug for DictionaryArray<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
writeln!(
f,
"DictionaryArray {{keys: {:?} values: {:?}}}",
self.keys, self.values
)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
array::Int16Array,
datatypes::{Int32Type, Int8Type, UInt32Type, UInt8Type},
};
use crate::{
array::Int16DictionaryArray, array::PrimitiveDictionaryBuilder,
datatypes::DataType,
};
use crate::{buffer::Buffer, datatypes::ToByteSlice};
#[test]
fn test_dictionary_array() {
let value_data = ArrayData::builder(DataType::Int8)
.len(8)
.add_buffer(Buffer::from(
&[10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
))
.build();
let keys = Buffer::from(&[2_i16, 3, 4].to_byte_slice());
let key_type = DataType::Int16;
let value_type = DataType::Int8;
let dict_data_type =
DataType::Dictionary(Box::new(key_type), Box::new(value_type));
let dict_data = ArrayData::builder(dict_data_type.clone())
.len(3)
.add_buffer(keys.clone())
.add_child_data(value_data.clone())
.build();
let dict_array = Int16DictionaryArray::from(dict_data);
let values = dict_array.values();
assert_eq!(&value_data, values.data());
assert_eq!(DataType::Int8, dict_array.value_type());
assert_eq!(3, dict_array.len());
assert_eq!(0, dict_array.null_count());
assert_eq!(0, dict_array.values().null_count());
assert_eq!(dict_array.keys(), &Int16Array::from(vec![2_i16, 3, 4]));
let dict_data = ArrayData::builder(dict_data_type)
.len(2)
.offset(1)
.add_buffer(keys)
.add_child_data(value_data.clone())
.build();
let dict_array = Int16DictionaryArray::from(dict_data);
let values = dict_array.values();
assert_eq!(&value_data, values.data());
assert_eq!(DataType::Int8, dict_array.value_type());
assert_eq!(2, dict_array.len());
assert_eq!(dict_array.keys(), &Int16Array::from(vec![3_i16, 4]));
}
#[test]
fn test_dictionary_array_fmt_debug() {
let key_builder = PrimitiveBuilder::<UInt8Type>::new(3);
let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
builder.append(12345678).unwrap();
builder.append_null().unwrap();
builder.append(22345678).unwrap();
let array = builder.finish();
assert_eq!(
"DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n 0,\n null,\n 1,\n] values: PrimitiveArray<UInt32>\n[\n 12345678,\n 22345678,\n]}\n",
format!("{:?}", array)
);
let key_builder = PrimitiveBuilder::<UInt8Type>::new(20);
let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
for _ in 0..20 {
builder.append(1).unwrap();
}
let array = builder.finish();
assert_eq!(
"DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n] values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
format!("{:?}", array)
);
}
#[test]
fn test_dictionary_array_from_iter() {
let test = vec!["a", "a", "b", "c"];
let array: DictionaryArray<Int8Type> = test
.iter()
.map(|&x| if x == "b" { None } else { Some(x) })
.collect();
assert_eq!(
"DictionaryArray {keys: PrimitiveArray<Int8>\n[\n 0,\n 0,\n null,\n 1,\n] values: StringArray\n[\n \"a\",\n \"c\",\n]}\n",
format!("{:?}", array)
);
let array: DictionaryArray<Int8Type> = test.into_iter().collect();
assert_eq!(
"DictionaryArray {keys: PrimitiveArray<Int8>\n[\n 0,\n 0,\n 1,\n 2,\n] values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
format!("{:?}", array)
);
}
#[test]
fn test_dictionary_array_reverse_lookup_key() {
let test = vec!["a", "a", "b", "c"];
let array: DictionaryArray<Int8Type> = test.into_iter().collect();
assert_eq!(array.lookup_key("c"), Some(2));
let test = vec!["t3", "t3", "t2", "t2", "t1", "t3", "t4", "t1", "t0"];
let array: DictionaryArray<Int8Type> = test.into_iter().collect();
assert_eq!(array.lookup_key("t1"), Some(2));
assert_eq!(array.lookup_key("non-existent"), None);
}
#[test]
fn test_dictionary_keys_as_primitive_array() {
let test = vec!["a", "b", "c", "a"];
let array: DictionaryArray<Int8Type> = test.into_iter().collect();
let keys = array.keys_array();
assert_eq!(&DataType::Int8, keys.data_type());
assert_eq!(0, keys.null_count());
assert_eq!(&[0, 1, 2, 0], keys.values());
}
#[test]
fn test_dictionary_keys_as_primitive_array_with_null() {
let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
let array: DictionaryArray<Int32Type> = test.into_iter().collect();
let keys = array.keys_array();
assert_eq!(&DataType::Int32, keys.data_type());
assert_eq!(3, keys.null_count());
assert_eq!(true, keys.is_valid(0));
assert_eq!(false, keys.is_valid(1));
assert_eq!(true, keys.is_valid(2));
assert_eq!(false, keys.is_valid(3));
assert_eq!(false, keys.is_valid(4));
assert_eq!(true, keys.is_valid(5));
assert_eq!(0, keys.value(0));
assert_eq!(1, keys.value(2));
assert_eq!(0, keys.value(5));
}
}