use crate::cast::*;
pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
array: &dyn Array,
to_type: &DataType,
cast_options: &CastOptions,
) -> Result<ArrayRef, ArrowError> {
use DataType::*;
let array = array.as_dictionary::<K>();
let from_child_type = array.values().data_type();
match (from_child_type, to_type) {
(_, Dictionary(to_index_type, to_value_type)) => {
dictionary_to_dictionary_cast(array, to_index_type, to_value_type, cast_options)
}
(Utf8, Utf8View) => view_from_dict_values::<K, Utf8Type, StringViewType>(
array.keys(),
array.values().as_string::<i32>(),
),
(Binary, BinaryView) => view_from_dict_values::<K, BinaryType, BinaryViewType>(
array.keys(),
array.values().as_binary::<i32>(),
),
_ => unpack_dictionary(array, to_type, cast_options),
}
}
fn dictionary_to_dictionary_cast<K: ArrowDictionaryKeyType>(
array: &DictionaryArray<K>,
to_index_type: &DataType,
to_value_type: &DataType,
cast_options: &CastOptions,
) -> Result<ArrayRef, ArrowError> {
use DataType::*;
let keys_array: ArrayRef = Arc::new(PrimitiveArray::<K>::from(array.keys().to_data()));
let values_array = array.values();
let cast_keys = cast_with_options(&keys_array, to_index_type, cast_options)?;
let cast_values = cast_with_options(values_array, to_value_type, cast_options)?;
if cast_keys.null_count() > keys_array.null_count() {
return Err(ArrowError::ComputeError(format!(
"Could not convert {} dictionary indexes from {:?} to {:?}",
cast_keys.null_count() - keys_array.null_count(),
keys_array.data_type(),
to_index_type
)));
}
let data = cast_keys.into_data();
let builder = data
.into_builder()
.data_type(Dictionary(
Box::new(to_index_type.clone()),
Box::new(to_value_type.clone()),
))
.child_data(vec![cast_values.into_data()]);
let data = unsafe { builder.build_unchecked() };
let new_array: ArrayRef = match to_index_type {
Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
_ => {
return Err(ArrowError::CastError(format!(
"Unsupported type {to_index_type} for dictionary index"
)));
}
};
Ok(new_array)
}
fn view_from_dict_values<K: ArrowDictionaryKeyType, V: ByteArrayType, T: ByteViewType>(
keys: &PrimitiveArray<K>,
values: &GenericByteArray<V>,
) -> Result<ArrayRef, ArrowError> {
let value_buffer = values.values();
let value_offsets = values.value_offsets();
let mut builder = GenericByteViewBuilder::<T>::with_capacity(keys.len());
builder.append_block(value_buffer.clone());
for i in keys.iter() {
match i {
Some(v) => {
let idx = v.to_usize().ok_or_else(|| {
ArrowError::ComputeError("Invalid dictionary index".to_string())
})?;
unsafe {
let offset = value_offsets.get_unchecked(idx).as_usize();
let end = value_offsets.get_unchecked(idx + 1).as_usize();
let length = end - offset;
builder.append_view_unchecked(0, offset as u32, length as u32)
}
}
None => {
builder.append_null();
}
}
}
Ok(Arc::new(builder.finish()))
}
pub(crate) fn unpack_dictionary<K: ArrowDictionaryKeyType>(
array: &DictionaryArray<K>,
to_type: &DataType,
cast_options: &CastOptions,
) -> Result<ArrayRef, ArrowError> {
let cast_dict_values = cast_with_options(array.values(), to_type, cast_options)?;
take(cast_dict_values.as_ref(), array.keys(), None)
}
pub(crate) fn pack_array_to_dictionary_via_primitive<K: ArrowDictionaryKeyType>(
array: &dyn Array,
primitive_type: DataType,
dict_value_type: &DataType,
cast_options: &CastOptions,
) -> Result<ArrayRef, ArrowError> {
let primitive = cast_with_options(array, &primitive_type, cast_options)?;
let dict = cast_with_options(
primitive.as_ref(),
&DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(primitive_type)),
cast_options,
)?;
cast_with_options(
dict.as_ref(),
&DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(dict_value_type.clone())),
cast_options,
)
}
pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
array: &dyn Array,
dict_value_type: &DataType,
cast_options: &CastOptions,
) -> Result<ArrayRef, ArrowError> {
use DataType::*;
match *dict_value_type {
Int8 => pack_numeric_to_dictionary::<K, Int8Type>(array, dict_value_type, cast_options),
Int16 => pack_numeric_to_dictionary::<K, Int16Type>(array, dict_value_type, cast_options),
Int32 => pack_numeric_to_dictionary::<K, Int32Type>(array, dict_value_type, cast_options),
Int64 => pack_numeric_to_dictionary::<K, Int64Type>(array, dict_value_type, cast_options),
UInt8 => pack_numeric_to_dictionary::<K, UInt8Type>(array, dict_value_type, cast_options),
UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, dict_value_type, cast_options),
UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, dict_value_type, cast_options),
UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, dict_value_type, cast_options),
Decimal32(p, s) => pack_decimal_to_dictionary::<K, Decimal32Type>(
array,
dict_value_type,
p,
s,
cast_options,
),
Decimal64(p, s) => pack_decimal_to_dictionary::<K, Decimal64Type>(
array,
dict_value_type,
p,
s,
cast_options,
),
Decimal128(p, s) => pack_decimal_to_dictionary::<K, Decimal128Type>(
array,
dict_value_type,
p,
s,
cast_options,
),
Decimal256(p, s) => pack_decimal_to_dictionary::<K, Decimal256Type>(
array,
dict_value_type,
p,
s,
cast_options,
),
Float16 => {
pack_numeric_to_dictionary::<K, Float16Type>(array, dict_value_type, cast_options)
}
Float32 => {
pack_numeric_to_dictionary::<K, Float32Type>(array, dict_value_type, cast_options)
}
Float64 => {
pack_numeric_to_dictionary::<K, Float64Type>(array, dict_value_type, cast_options)
}
Date32 => pack_array_to_dictionary_via_primitive::<K>(
array,
DataType::Int32,
dict_value_type,
cast_options,
),
Date64 => pack_array_to_dictionary_via_primitive::<K>(
array,
DataType::Int64,
dict_value_type,
cast_options,
),
Time32(_) => pack_array_to_dictionary_via_primitive::<K>(
array,
DataType::Int32,
dict_value_type,
cast_options,
),
Time64(_) => pack_array_to_dictionary_via_primitive::<K>(
array,
DataType::Int64,
dict_value_type,
cast_options,
),
Timestamp(_, _) => pack_array_to_dictionary_via_primitive::<K>(
array,
DataType::Int64,
dict_value_type,
cast_options,
),
Utf8 => {
if array.data_type() == &DataType::Utf8View {
return string_view_to_dictionary::<K, i32>(array);
}
pack_byte_to_dictionary::<K, GenericStringType<i32>>(array, cast_options)
}
LargeUtf8 => {
if array.data_type() == &DataType::Utf8View {
return string_view_to_dictionary::<K, i64>(array);
}
pack_byte_to_dictionary::<K, GenericStringType<i64>>(array, cast_options)
}
Utf8View => {
let base_value_type = match array.data_type() {
DataType::LargeUtf8 | DataType::Utf8View => DataType::LargeUtf8,
_ => DataType::Utf8,
};
let dict_base = cast_to_dictionary::<K>(array, &base_value_type, cast_options)?;
dictionary_cast::<K>(
dict_base.as_ref(),
&DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(DataType::Utf8View)),
cast_options,
)
}
Binary => {
if array.data_type() == &DataType::BinaryView {
return binary_view_to_dictionary::<K, i32>(array);
}
pack_byte_to_dictionary::<K, GenericBinaryType<i32>>(array, cast_options)
}
LargeBinary => {
if array.data_type() == &DataType::BinaryView {
return binary_view_to_dictionary::<K, i64>(array);
}
pack_byte_to_dictionary::<K, GenericBinaryType<i64>>(array, cast_options)
}
BinaryView => {
let base_value_type = match array.data_type() {
DataType::LargeBinary | DataType::BinaryView => DataType::LargeBinary,
_ => DataType::Binary,
};
let dict_base = cast_to_dictionary::<K>(array, &base_value_type, cast_options)?;
dictionary_cast::<K>(
dict_base.as_ref(),
&DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(DataType::BinaryView)),
cast_options,
)
}
FixedSizeBinary(byte_size) => {
pack_byte_to_fixed_size_dictionary::<K>(array, cast_options, byte_size)
}
_ => Err(ArrowError::CastError(format!(
"Unsupported output type for dictionary packing: {dict_value_type}"
))),
}
}
pub(crate) fn pack_numeric_to_dictionary<K, V>(
array: &dyn Array,
dict_value_type: &DataType,
cast_options: &CastOptions,
) -> Result<ArrayRef, ArrowError>
where
K: ArrowDictionaryKeyType,
V: ArrowPrimitiveType,
{
let cast_values = cast_with_options(array, dict_value_type, cast_options)?;
let values = cast_values.as_primitive::<V>();
let mut b = PrimitiveDictionaryBuilder::<K, V>::with_capacity(values.len(), values.len());
for i in 0..values.len() {
if values.is_null(i) {
b.append_null();
} else {
b.append(values.value(i))?;
}
}
Ok(Arc::new(b.finish()))
}
pub(crate) fn pack_decimal_to_dictionary<K, D>(
array: &dyn Array,
dict_value_type: &DataType,
precision: u8,
scale: i8,
cast_options: &CastOptions,
) -> Result<ArrayRef, ArrowError>
where
K: ArrowDictionaryKeyType,
D: DecimalType + ArrowPrimitiveType,
{
let dict = pack_numeric_to_dictionary::<K, D>(array, dict_value_type, cast_options)?;
let dict = dict
.as_dictionary::<K>()
.downcast_dict::<PrimitiveArray<D>>()
.ok_or_else(|| {
ArrowError::ComputeError(format!(
"Internal Error: Cannot cast dict to {}Array",
D::PREFIX
))
})?;
let value = dict.values().clone();
let value = value.with_precision_and_scale(precision, scale)?;
Ok(Arc::new(DictionaryArray::<K>::try_new(
dict.keys().clone(),
Arc::new(value),
)?))
}
pub(crate) fn string_view_to_dictionary<K, O: OffsetSizeTrait>(
array: &dyn Array,
) -> Result<ArrayRef, ArrowError>
where
K: ArrowDictionaryKeyType,
{
let mut b = GenericByteDictionaryBuilder::<K, GenericStringType<O>>::with_capacity(
array.len(),
1024,
1024,
);
let string_view = array
.as_any()
.downcast_ref::<StringViewArray>()
.ok_or_else(|| {
ArrowError::ComputeError("Internal Error: Cannot cast to StringViewArray".to_string())
})?;
for v in string_view.iter() {
match v {
Some(v) => {
b.append(v)?;
}
None => {
b.append_null();
}
}
}
Ok(Arc::new(b.finish()))
}
pub(crate) fn binary_view_to_dictionary<K, O: OffsetSizeTrait>(
array: &dyn Array,
) -> Result<ArrayRef, ArrowError>
where
K: ArrowDictionaryKeyType,
{
let mut b = GenericByteDictionaryBuilder::<K, GenericBinaryType<O>>::with_capacity(
array.len(),
1024,
1024,
);
let binary_view = array
.as_any()
.downcast_ref::<BinaryViewArray>()
.ok_or_else(|| {
ArrowError::ComputeError("Internal Error: Cannot cast to BinaryViewArray".to_string())
})?;
for v in binary_view.iter() {
match v {
Some(v) => {
b.append(v)?;
}
None => {
b.append_null();
}
}
}
Ok(Arc::new(b.finish()))
}
pub(crate) fn pack_byte_to_dictionary<K, T>(
array: &dyn Array,
cast_options: &CastOptions,
) -> Result<ArrayRef, ArrowError>
where
K: ArrowDictionaryKeyType,
T: ByteArrayType,
{
let cast_values = cast_with_options(array, &T::DATA_TYPE, cast_options)?;
let values = cast_values
.as_any()
.downcast_ref::<GenericByteArray<T>>()
.ok_or_else(|| {
ArrowError::ComputeError("Internal Error: Cannot cast to GenericByteArray".to_string())
})?;
let mut b = GenericByteDictionaryBuilder::<K, T>::with_capacity(values.len(), 1024, 1024);
for i in 0..values.len() {
if values.is_null(i) {
b.append_null();
} else {
b.append(values.value(i))?;
}
}
Ok(Arc::new(b.finish()))
}
pub(crate) fn pack_byte_to_fixed_size_dictionary<K>(
array: &dyn Array,
cast_options: &CastOptions,
byte_width: i32,
) -> Result<ArrayRef, ArrowError>
where
K: ArrowDictionaryKeyType,
{
let cast_values =
cast_with_options(array, &DataType::FixedSizeBinary(byte_width), cast_options)?;
let values = cast_values
.as_any()
.downcast_ref::<FixedSizeBinaryArray>()
.ok_or_else(|| {
ArrowError::ComputeError("Internal Error: Cannot cast to GenericByteArray".to_string())
})?;
let mut b = FixedSizeBinaryDictionaryBuilder::<K>::with_capacity(1024, 1024, byte_width);
for i in 0..values.len() {
if values.is_null(i) {
b.append_null();
} else {
b.append(values.value(i))?;
}
}
Ok(Arc::new(b.finish()))
}