use chrono::format::strftime::StrftimeItems;
use chrono::format::{parse, Parsed};
use chrono::Timelike;
use std::ops::{Div, Mul};
use std::str;
use std::sync::Arc;
use crate::buffer::MutableBuffer;
use crate::compute::divide_scalar;
use crate::compute::kernels::arithmetic::{divide, multiply};
use crate::compute::kernels::arity::unary;
use crate::compute::kernels::cast_utils::string_to_timestamp_nanos;
use crate::compute::kernels::temporal::extract_component_from_array;
use crate::compute::kernels::temporal::return_compute_error_with;
use crate::compute::{try_unary, using_chrono_tz_and_utc_naive_date_time};
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::temporal_conversions::{
as_datetime, EPOCH_DAYS_FROM_CE, MICROSECONDS, MILLISECONDS, MILLISECONDS_IN_DAY,
NANOSECONDS, SECONDS_IN_DAY,
};
use crate::{array::*, compute::take};
use crate::{buffer::Buffer, util::serialization::lexical_to_string};
use num::cast::AsPrimitive;
use num::{BigInt, NumCast, ToPrimitive};
#[derive(Debug)]
pub struct CastOptions {
pub safe: bool,
}
pub const DEFAULT_CAST_OPTIONS: CastOptions = CastOptions { safe: true };
pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
use self::DataType::*;
if from_type == to_type {
return true;
}
match (from_type, to_type) {
(Decimal128(_, _), Decimal128(_, _)) => true,
(Decimal256(_, _), Decimal256(_, _)) => true,
(Decimal128(_, _), Decimal256(_, _)) => true,
(Decimal256(_, _), Decimal128(_, _)) => true,
(Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal128(_, _)) |
(Decimal128(_, _), Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64)
| (
Null,
Boolean
| Int8
| UInt8
| Int16
| UInt16
| Int32
| UInt32
| Float32
| Date32
| Time32(_)
| Int64
| UInt64
| Float64
| Date64
| Timestamp(_, _)
| Time64(_)
| Duration(_)
| Interval(_)
| FixedSizeBinary(_)
| Binary
| Utf8
| LargeBinary
| LargeUtf8
| List(_)
| LargeList(_)
| FixedSizeList(_, _)
| Struct(_)
| Map(_, _)
| Dictionary(_, _)
) => true,
(Decimal128(_, _), _) => false,
(_, Decimal128(_, _)) => false,
(Struct(_), _) => false,
(_, Struct(_)) => false,
(LargeList(list_from), LargeList(list_to)) => {
can_cast_types(list_from.data_type(), list_to.data_type())
}
(List(list_from), List(list_to)) => {
can_cast_types(list_from.data_type(), list_to.data_type())
}
(List(list_from), LargeList(list_to)) => {
list_from.data_type() == list_to.data_type()
}
(List(_), _) => false,
(_, List(list_to)) => can_cast_types(from_type, list_to.data_type()),
(_, LargeList(list_to)) => can_cast_types(from_type, list_to.data_type()),
(Dictionary(_, from_value_type), Dictionary(_, to_value_type)) => {
can_cast_types(from_value_type, to_value_type)
}
(Dictionary(_, value_type), _) => can_cast_types(value_type, to_type),
(_, Dictionary(_, value_type)) => can_cast_types(from_type, value_type),
(_, Boolean) => DataType::is_numeric(from_type) || from_type == &Utf8,
(Boolean, _) => DataType::is_numeric(to_type) || to_type == &Utf8,
(Utf8, LargeUtf8) => true,
(LargeUtf8, Utf8) => true,
(Utf8,
Binary
| Date32
| Date64
| Time32(TimeUnit::Second)
| Time32(TimeUnit::Millisecond)
| Time64(TimeUnit::Microsecond)
| Time64(TimeUnit::Nanosecond)
| Timestamp(TimeUnit::Nanosecond, None)
) => true,
(Utf8, _) => DataType::is_numeric(to_type),
(LargeUtf8,
LargeBinary
| Date32
| Date64
| Time32(TimeUnit::Second)
| Time32(TimeUnit::Millisecond)
| Time64(TimeUnit::Microsecond)
| Time64(TimeUnit::Nanosecond)
| Timestamp(TimeUnit::Nanosecond, None)
) => true,
(LargeUtf8, _) => DataType::is_numeric(to_type),
(Timestamp(_, _), Utf8) | (Timestamp(_, _), LargeUtf8) => true,
(Date32, Utf8) | (Date32, LargeUtf8) => true,
(Date64, Utf8) | (Date64, LargeUtf8) => true,
(_, Utf8 | LargeUtf8) => DataType::is_numeric(from_type) || from_type == &Binary,
(
UInt8,
UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
) => true,
(
UInt16,
UInt8 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
) => true,
(
UInt32,
UInt8 | UInt16 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
) => true,
(
UInt64,
UInt8 | UInt16 | UInt32 | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
) => true,
(
Int8,
UInt8 | UInt16 | UInt32 | UInt64 | Int16 | Int32 | Int64 | Float32 | Float64,
) => true,
(
Int16,
UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int32 | Int64 | Float32 | Float64,
) => true,
(
Int32,
UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int64 | Float32 | Float64,
) => true,
(
Int64,
UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Float32 | Float64,
) => true,
(
Float32,
UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float64,
) => true,
(
Float64,
UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float32,
) => true,
(Int32, Date32 | Date64 | Time32(_)) => true,
(Date32, Int32 | Int64) => true,
(Time32(_), Int32) => true,
(Int64, Date64 | Date32 | Time64(_)) => true,
(Date64, Int64 | Int32) => true,
(Time64(_), Int64) => true,
(Date32, Date64) => true,
(Date64, Date32) => true,
(Time32(TimeUnit::Second), Time32(TimeUnit::Millisecond)) => true,
(Time32(TimeUnit::Millisecond), Time32(TimeUnit::Second)) => true,
(Time32(_), Time64(_)) => true,
(Time64(TimeUnit::Microsecond), Time64(TimeUnit::Nanosecond)) => true,
(Time64(TimeUnit::Nanosecond), Time64(TimeUnit::Microsecond)) => true,
(Time64(_), Time32(to_unit)) => {
matches!(to_unit, TimeUnit::Second | TimeUnit::Millisecond)
}
(Timestamp(_, _), Int64) => true,
(Int64, Timestamp(_, _)) => true,
(Timestamp(_, _), Timestamp(_, _) | Date32 | Date64) => true,
(Int64, Duration(_)) => true,
(Duration(_), Int64) => true,
(Interval(from_type), Int64) => {
match from_type {
IntervalUnit::YearMonth => true,
IntervalUnit::DayTime => true,
IntervalUnit::MonthDayNano => false, }
}
(Int32, Interval(to_type)) => {
match to_type {
IntervalUnit::YearMonth => true,
IntervalUnit::DayTime => false,
IntervalUnit::MonthDayNano => false,
}
}
(Int64, Interval(to_type)) => {
match to_type {
IntervalUnit::YearMonth => false,
IntervalUnit::DayTime => true,
IntervalUnit::MonthDayNano => false,
}
}
(_, _) => false,
}
}
pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
cast_with_options(array, to_type, &DEFAULT_CAST_OPTIONS)
}
fn cast_primitive_to_decimal<T: ArrayAccessor, F>(
array: T,
op: F,
precision: u8,
scale: u8,
) -> Result<Arc<dyn Array>>
where
F: Fn(T::Item) -> i128,
{
#[allow(clippy::redundant_closure)]
let decimal_array = ArrayIter::new(array)
.map(|v| v.map(|v| op(v)))
.collect::<Decimal128Array>()
.with_precision_and_scale(precision, scale)?;
Ok(Arc::new(decimal_array))
}
fn cast_integer_to_decimal<T: ArrowNumericType>(
array: &PrimitiveArray<T>,
precision: u8,
scale: u8,
) -> Result<Arc<dyn Array>>
where
<T as ArrowPrimitiveType>::Native: AsPrimitive<i128>,
{
let mul: i128 = 10_i128.pow(scale as u32);
cast_primitive_to_decimal(array, |v| v.as_() * mul, precision, scale)
}
fn cast_floating_point_to_decimal<T: ArrowNumericType>(
array: &PrimitiveArray<T>,
precision: u8,
scale: u8,
) -> Result<Arc<dyn Array>>
where
<T as ArrowPrimitiveType>::Native: AsPrimitive<f64>,
{
let mul = 10_f64.powi(scale as i32);
cast_primitive_to_decimal(
array,
|v| {
(v.as_() * mul) as i128
},
precision,
scale,
)
}
macro_rules! cast_decimal_to_integer {
($ARRAY:expr, $SCALE : ident, $VALUE_BUILDER: ident, $NATIVE_TYPE : ident, $DATA_TYPE : expr) => {{
let array = $ARRAY.as_any().downcast_ref::<Decimal128Array>().unwrap();
let mut value_builder = $VALUE_BUILDER::with_capacity(array.len());
let div: i128 = 10_i128.pow(*$SCALE as u32);
let min_bound = ($NATIVE_TYPE::MIN) as i128;
let max_bound = ($NATIVE_TYPE::MAX) as i128;
for i in 0..array.len() {
if array.is_null(i) {
value_builder.append_null();
} else {
let v = array.value(i).as_i128() / div;
if v <= max_bound && v >= min_bound {
value_builder.append_value(v as $NATIVE_TYPE);
} else {
return Err(ArrowError::CastError(format!(
"value of {} is out of range {}",
v, $DATA_TYPE
)));
}
}
}
Ok(Arc::new(value_builder.finish()))
}};
}
macro_rules! cast_decimal_to_float {
($ARRAY:expr, $SCALE : ident, $VALUE_BUILDER: ident, $NATIVE_TYPE : ty) => {{
let array = $ARRAY.as_any().downcast_ref::<Decimal128Array>().unwrap();
let div = 10_f64.powi(*$SCALE as i32);
let mut value_builder = $VALUE_BUILDER::with_capacity(array.len());
for i in 0..array.len() {
if array.is_null(i) {
value_builder.append_null();
} else {
let v = (array.value(i).as_i128() as f64 / div) as $NATIVE_TYPE;
value_builder.append_value(v);
}
}
Ok(Arc::new(value_builder.finish()))
}};
}
pub fn cast_with_options(
array: &ArrayRef,
to_type: &DataType,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
use DataType::*;
let from_type = array.data_type();
if from_type == to_type {
return Ok(array.clone());
}
match (from_type, to_type) {
(Decimal128(_, s1), Decimal128(p2, s2)) => {
cast_decimal_to_decimal::<16, 16>(array, s1, p2, s2)
}
(Decimal256(_, s1), Decimal256(p2, s2)) => {
cast_decimal_to_decimal::<32, 32>(array, s1, p2, s2)
}
(Decimal128(_, s1), Decimal256(p2, s2)) => {
cast_decimal_to_decimal::<16, 32>(array, s1, p2, s2)
}
(Decimal256(_, s1), Decimal128(p2, s2)) => {
cast_decimal_to_decimal::<32, 16>(array, s1, p2, s2)
}
(Decimal128(_, scale), _) => {
match to_type {
Int8 => {
cast_decimal_to_integer!(array, scale, Int8Builder, i8, Int8)
}
Int16 => {
cast_decimal_to_integer!(array, scale, Int16Builder, i16, Int16)
}
Int32 => {
cast_decimal_to_integer!(array, scale, Int32Builder, i32, Int32)
}
Int64 => {
cast_decimal_to_integer!(array, scale, Int64Builder, i64, Int64)
}
Float32 => {
cast_decimal_to_float!(array, scale, Float32Builder, f32)
}
Float64 => {
cast_decimal_to_float!(array, scale, Float64Builder, f64)
}
Null => Ok(new_null_array(to_type, array.len())),
_ => Err(ArrowError::CastError(format!(
"Casting from {:?} to {:?} not supported",
from_type, to_type
))),
}
}
(_, Decimal128(precision, scale)) => {
match from_type {
Int8 => cast_integer_to_decimal(
as_primitive_array::<Int8Type>(array),
*precision,
*scale,
),
Int16 => cast_integer_to_decimal(
as_primitive_array::<Int16Type>(array),
*precision,
*scale,
),
Int32 => cast_integer_to_decimal(
as_primitive_array::<Int32Type>(array),
*precision,
*scale,
),
Int64 => cast_integer_to_decimal(
as_primitive_array::<Int64Type>(array),
*precision,
*scale,
),
Float32 => cast_floating_point_to_decimal(
as_primitive_array::<Float32Type>(array),
*precision,
*scale,
),
Float64 => cast_floating_point_to_decimal(
as_primitive_array::<Float64Type>(array),
*precision,
*scale,
),
Null => Ok(new_null_array(to_type, array.len())),
_ => Err(ArrowError::CastError(format!(
"Casting from {:?} to {:?} not supported",
from_type, to_type
))),
}
}
(
Null,
Boolean
| Int8
| UInt8
| Int16
| UInt16
| Int32
| UInt32
| Float32
| Date32
| Time32(_)
| Int64
| UInt64
| Float64
| Date64
| Timestamp(_, _)
| Time64(_)
| Duration(_)
| Interval(_)
| FixedSizeBinary(_)
| Binary
| Utf8
| LargeBinary
| LargeUtf8
| List(_)
| LargeList(_)
| FixedSizeList(_, _)
| Struct(_)
| Map(_, _)
| Dictionary(_, _),
) => Ok(new_null_array(to_type, array.len())),
(Struct(_), _) => Err(ArrowError::CastError(
"Cannot cast from struct to other types".to_string(),
)),
(_, Struct(_)) => Err(ArrowError::CastError(
"Cannot cast to struct from other types".to_string(),
)),
(List(_), List(ref to)) => {
cast_list_inner::<i32>(array, to, to_type, cast_options)
}
(LargeList(_), LargeList(ref to)) => {
cast_list_inner::<i64>(array, to, to_type, cast_options)
}
(List(list_from), LargeList(list_to)) => {
if list_to.data_type() != list_from.data_type() {
Err(ArrowError::CastError(
"cannot cast list to large-list with different child data".into(),
))
} else {
cast_list_container::<i32, i64>(&**array, cast_options)
}
}
(LargeList(list_from), List(list_to)) => {
if list_to.data_type() != list_from.data_type() {
Err(ArrowError::CastError(
"cannot cast large-list to list with different child data".into(),
))
} else {
cast_list_container::<i64, i32>(&**array, cast_options)
}
}
(List(_), _) => Err(ArrowError::CastError(
"Cannot cast list to non-list data types".to_string(),
)),
(_, List(ref to)) => {
cast_primitive_to_list::<i32>(array, to, to_type, cast_options)
}
(_, LargeList(ref to)) => {
cast_primitive_to_list::<i64>(array, to, to_type, cast_options)
}
(Dictionary(index_type, _), _) => match **index_type {
DataType::Int8 => dictionary_cast::<Int8Type>(array, to_type, cast_options),
DataType::Int16 => dictionary_cast::<Int16Type>(array, to_type, cast_options),
DataType::Int32 => dictionary_cast::<Int32Type>(array, to_type, cast_options),
DataType::Int64 => dictionary_cast::<Int64Type>(array, to_type, cast_options),
DataType::UInt8 => dictionary_cast::<UInt8Type>(array, to_type, cast_options),
DataType::UInt16 => {
dictionary_cast::<UInt16Type>(array, to_type, cast_options)
}
DataType::UInt32 => {
dictionary_cast::<UInt32Type>(array, to_type, cast_options)
}
DataType::UInt64 => {
dictionary_cast::<UInt64Type>(array, to_type, cast_options)
}
_ => Err(ArrowError::CastError(format!(
"Casting from dictionary type {:?} to {:?} not supported",
from_type, to_type,
))),
},
(_, Dictionary(index_type, value_type)) => match **index_type {
DataType::Int8 => {
cast_to_dictionary::<Int8Type>(array, value_type, cast_options)
}
DataType::Int16 => {
cast_to_dictionary::<Int16Type>(array, value_type, cast_options)
}
DataType::Int32 => {
cast_to_dictionary::<Int32Type>(array, value_type, cast_options)
}
DataType::Int64 => {
cast_to_dictionary::<Int64Type>(array, value_type, cast_options)
}
DataType::UInt8 => {
cast_to_dictionary::<UInt8Type>(array, value_type, cast_options)
}
DataType::UInt16 => {
cast_to_dictionary::<UInt16Type>(array, value_type, cast_options)
}
DataType::UInt32 => {
cast_to_dictionary::<UInt32Type>(array, value_type, cast_options)
}
DataType::UInt64 => {
cast_to_dictionary::<UInt64Type>(array, value_type, cast_options)
}
_ => Err(ArrowError::CastError(format!(
"Casting from type {:?} to dictionary type {:?} not supported",
from_type, to_type,
))),
},
(_, Boolean) => match from_type {
UInt8 => cast_numeric_to_bool::<UInt8Type>(array),
UInt16 => cast_numeric_to_bool::<UInt16Type>(array),
UInt32 => cast_numeric_to_bool::<UInt32Type>(array),
UInt64 => cast_numeric_to_bool::<UInt64Type>(array),
Int8 => cast_numeric_to_bool::<Int8Type>(array),
Int16 => cast_numeric_to_bool::<Int16Type>(array),
Int32 => cast_numeric_to_bool::<Int32Type>(array),
Int64 => cast_numeric_to_bool::<Int64Type>(array),
Float32 => cast_numeric_to_bool::<Float32Type>(array),
Float64 => cast_numeric_to_bool::<Float64Type>(array),
Utf8 => cast_utf8_to_boolean(array, cast_options),
_ => Err(ArrowError::CastError(format!(
"Casting from {:?} to {:?} not supported",
from_type, to_type,
))),
},
(Boolean, _) => match to_type {
UInt8 => cast_bool_to_numeric::<UInt8Type>(array, cast_options),
UInt16 => cast_bool_to_numeric::<UInt16Type>(array, cast_options),
UInt32 => cast_bool_to_numeric::<UInt32Type>(array, cast_options),
UInt64 => cast_bool_to_numeric::<UInt64Type>(array, cast_options),
Int8 => cast_bool_to_numeric::<Int8Type>(array, cast_options),
Int16 => cast_bool_to_numeric::<Int16Type>(array, cast_options),
Int32 => cast_bool_to_numeric::<Int32Type>(array, cast_options),
Int64 => cast_bool_to_numeric::<Int64Type>(array, cast_options),
Float32 => cast_bool_to_numeric::<Float32Type>(array, cast_options),
Float64 => cast_bool_to_numeric::<Float64Type>(array, cast_options),
Utf8 => {
let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
Ok(Arc::new(
array
.iter()
.map(|value| value.map(|value| if value { "1" } else { "0" }))
.collect::<StringArray>(),
))
}
_ => Err(ArrowError::CastError(format!(
"Casting from {:?} to {:?} not supported",
from_type, to_type,
))),
},
(Utf8, _) => match to_type {
LargeUtf8 => cast_str_container::<i32, i64>(&**array),
UInt8 => cast_string_to_numeric::<UInt8Type, i32>(array, cast_options),
UInt16 => cast_string_to_numeric::<UInt16Type, i32>(array, cast_options),
UInt32 => cast_string_to_numeric::<UInt32Type, i32>(array, cast_options),
UInt64 => cast_string_to_numeric::<UInt64Type, i32>(array, cast_options),
Int8 => cast_string_to_numeric::<Int8Type, i32>(array, cast_options),
Int16 => cast_string_to_numeric::<Int16Type, i32>(array, cast_options),
Int32 => cast_string_to_numeric::<Int32Type, i32>(array, cast_options),
Int64 => cast_string_to_numeric::<Int64Type, i32>(array, cast_options),
Float32 => cast_string_to_numeric::<Float32Type, i32>(array, cast_options),
Float64 => cast_string_to_numeric::<Float64Type, i32>(array, cast_options),
Date32 => cast_string_to_date32::<i32>(&**array, cast_options),
Date64 => cast_string_to_date64::<i32>(&**array, cast_options),
Binary => cast_string_to_binary(array),
Time32(TimeUnit::Second) => {
cast_string_to_time32second::<i32>(&**array, cast_options)
}
Time32(TimeUnit::Millisecond) => {
cast_string_to_time32millisecond::<i32>(&**array, cast_options)
}
Time64(TimeUnit::Microsecond) => {
cast_string_to_time64microsecond::<i32>(&**array, cast_options)
}
Time64(TimeUnit::Nanosecond) => {
cast_string_to_time64nanosecond::<i32>(&**array, cast_options)
}
Timestamp(TimeUnit::Nanosecond, None) => {
cast_string_to_timestamp_ns::<i32>(&**array, cast_options)
}
_ => Err(ArrowError::CastError(format!(
"Casting from {:?} to {:?} not supported",
from_type, to_type,
))),
},
(_, Utf8) => match from_type {
LargeUtf8 => cast_str_container::<i64, i32>(&**array),
UInt8 => cast_numeric_to_string::<UInt8Type, i32>(array),
UInt16 => cast_numeric_to_string::<UInt16Type, i32>(array),
UInt32 => cast_numeric_to_string::<UInt32Type, i32>(array),
UInt64 => cast_numeric_to_string::<UInt64Type, i32>(array),
Int8 => cast_numeric_to_string::<Int8Type, i32>(array),
Int16 => cast_numeric_to_string::<Int16Type, i32>(array),
Int32 => cast_numeric_to_string::<Int32Type, i32>(array),
Int64 => cast_numeric_to_string::<Int64Type, i32>(array),
Float32 => cast_numeric_to_string::<Float32Type, i32>(array),
Float64 => cast_numeric_to_string::<Float64Type, i32>(array),
Timestamp(unit, tz) => match unit {
TimeUnit::Nanosecond => {
cast_timestamp_to_string::<TimestampNanosecondType, i32>(array, tz)
}
TimeUnit::Microsecond => {
cast_timestamp_to_string::<TimestampMicrosecondType, i32>(array, tz)
}
TimeUnit::Millisecond => {
cast_timestamp_to_string::<TimestampMillisecondType, i32>(array, tz)
}
TimeUnit::Second => {
cast_timestamp_to_string::<TimestampSecondType, i32>(array, tz)
}
},
Date32 => cast_date32_to_string::<i32>(array),
Date64 => cast_date64_to_string::<i32>(array),
Binary => {
let array = array.as_any().downcast_ref::<BinaryArray>().unwrap();
Ok(Arc::new(
array
.iter()
.map(|maybe_value| match maybe_value {
Some(value) => {
let result = str::from_utf8(value);
if cast_options.safe {
Ok(result.ok())
} else {
Some(result.map_err(|_| {
ArrowError::CastError(
"Cannot cast binary to string".to_string(),
)
}))
.transpose()
}
}
None => Ok(None),
})
.collect::<Result<StringArray>>()?,
))
}
_ => Err(ArrowError::CastError(format!(
"Casting from {:?} to {:?} not supported",
from_type, to_type,
))),
},
(_, LargeUtf8) => match from_type {
UInt8 => cast_numeric_to_string::<UInt8Type, i64>(array),
UInt16 => cast_numeric_to_string::<UInt16Type, i64>(array),
UInt32 => cast_numeric_to_string::<UInt32Type, i64>(array),
UInt64 => cast_numeric_to_string::<UInt64Type, i64>(array),
Int8 => cast_numeric_to_string::<Int8Type, i64>(array),
Int16 => cast_numeric_to_string::<Int16Type, i64>(array),
Int32 => cast_numeric_to_string::<Int32Type, i64>(array),
Int64 => cast_numeric_to_string::<Int64Type, i64>(array),
Float32 => cast_numeric_to_string::<Float32Type, i64>(array),
Float64 => cast_numeric_to_string::<Float64Type, i64>(array),
Timestamp(unit, tz) => match unit {
TimeUnit::Nanosecond => {
cast_timestamp_to_string::<TimestampNanosecondType, i64>(array, tz)
}
TimeUnit::Microsecond => {
cast_timestamp_to_string::<TimestampMicrosecondType, i64>(array, tz)
}
TimeUnit::Millisecond => {
cast_timestamp_to_string::<TimestampMillisecondType, i64>(array, tz)
}
TimeUnit::Second => {
cast_timestamp_to_string::<TimestampSecondType, i64>(array, tz)
}
},
Date32 => cast_date32_to_string::<i64>(array),
Date64 => cast_date64_to_string::<i64>(array),
Binary => {
let array = array.as_any().downcast_ref::<BinaryArray>().unwrap();
Ok(Arc::new(
array
.iter()
.map(|maybe_value| match maybe_value {
Some(value) => {
let result = str::from_utf8(value);
if cast_options.safe {
Ok(result.ok())
} else {
Some(result.map_err(|_| {
ArrowError::CastError(
"Cannot cast binary to string".to_string(),
)
}))
.transpose()
}
}
None => Ok(None),
})
.collect::<Result<LargeStringArray>>()?,
))
}
_ => Err(ArrowError::CastError(format!(
"Casting from {:?} to {:?} not supported",
from_type, to_type,
))),
},
(LargeUtf8, _) => match to_type {
UInt8 => cast_string_to_numeric::<UInt8Type, i64>(array, cast_options),
UInt16 => cast_string_to_numeric::<UInt16Type, i64>(array, cast_options),
UInt32 => cast_string_to_numeric::<UInt32Type, i64>(array, cast_options),
UInt64 => cast_string_to_numeric::<UInt64Type, i64>(array, cast_options),
Int8 => cast_string_to_numeric::<Int8Type, i64>(array, cast_options),
Int16 => cast_string_to_numeric::<Int16Type, i64>(array, cast_options),
Int32 => cast_string_to_numeric::<Int32Type, i64>(array, cast_options),
Int64 => cast_string_to_numeric::<Int64Type, i64>(array, cast_options),
Float32 => cast_string_to_numeric::<Float32Type, i64>(array, cast_options),
Float64 => cast_string_to_numeric::<Float64Type, i64>(array, cast_options),
Date32 => cast_string_to_date32::<i64>(&**array, cast_options),
Date64 => cast_string_to_date64::<i64>(&**array, cast_options),
LargeBinary => cast_string_to_binary(array),
Time32(TimeUnit::Second) => {
cast_string_to_time32second::<i64>(&**array, cast_options)
}
Time32(TimeUnit::Millisecond) => {
cast_string_to_time32millisecond::<i64>(&**array, cast_options)
}
Time64(TimeUnit::Microsecond) => {
cast_string_to_time64microsecond::<i64>(&**array, cast_options)
}
Time64(TimeUnit::Nanosecond) => {
cast_string_to_time64nanosecond::<i64>(&**array, cast_options)
}
Timestamp(TimeUnit::Nanosecond, None) => {
cast_string_to_timestamp_ns::<i64>(&**array, cast_options)
}
_ => Err(ArrowError::CastError(format!(
"Casting from {:?} to {:?} not supported",
from_type, to_type,
))),
},
(UInt8, UInt16) => {
cast_numeric_arrays::<UInt8Type, UInt16Type>(array, cast_options)
}
(UInt8, UInt32) => {
cast_numeric_arrays::<UInt8Type, UInt32Type>(array, cast_options)
}
(UInt8, UInt64) => {
cast_numeric_arrays::<UInt8Type, UInt64Type>(array, cast_options)
}
(UInt8, Int8) => cast_numeric_arrays::<UInt8Type, Int8Type>(array, cast_options),
(UInt8, Int16) => {
cast_numeric_arrays::<UInt8Type, Int16Type>(array, cast_options)
}
(UInt8, Int32) => {
cast_numeric_arrays::<UInt8Type, Int32Type>(array, cast_options)
}
(UInt8, Int64) => {
cast_numeric_arrays::<UInt8Type, Int64Type>(array, cast_options)
}
(UInt8, Float32) => {
cast_numeric_arrays::<UInt8Type, Float32Type>(array, cast_options)
}
(UInt8, Float64) => {
cast_numeric_arrays::<UInt8Type, Float64Type>(array, cast_options)
}
(UInt16, UInt8) => {
cast_numeric_arrays::<UInt16Type, UInt8Type>(array, cast_options)
}
(UInt16, UInt32) => {
cast_numeric_arrays::<UInt16Type, UInt32Type>(array, cast_options)
}
(UInt16, UInt64) => {
cast_numeric_arrays::<UInt16Type, UInt64Type>(array, cast_options)
}
(UInt16, Int8) => {
cast_numeric_arrays::<UInt16Type, Int8Type>(array, cast_options)
}
(UInt16, Int16) => {
cast_numeric_arrays::<UInt16Type, Int16Type>(array, cast_options)
}
(UInt16, Int32) => {
cast_numeric_arrays::<UInt16Type, Int32Type>(array, cast_options)
}
(UInt16, Int64) => {
cast_numeric_arrays::<UInt16Type, Int64Type>(array, cast_options)
}
(UInt16, Float32) => {
cast_numeric_arrays::<UInt16Type, Float32Type>(array, cast_options)
}
(UInt16, Float64) => {
cast_numeric_arrays::<UInt16Type, Float64Type>(array, cast_options)
}
(UInt32, UInt8) => {
cast_numeric_arrays::<UInt32Type, UInt8Type>(array, cast_options)
}
(UInt32, UInt16) => {
cast_numeric_arrays::<UInt32Type, UInt16Type>(array, cast_options)
}
(UInt32, UInt64) => {
cast_numeric_arrays::<UInt32Type, UInt64Type>(array, cast_options)
}
(UInt32, Int8) => {
cast_numeric_arrays::<UInt32Type, Int8Type>(array, cast_options)
}
(UInt32, Int16) => {
cast_numeric_arrays::<UInt32Type, Int16Type>(array, cast_options)
}
(UInt32, Int32) => {
cast_numeric_arrays::<UInt32Type, Int32Type>(array, cast_options)
}
(UInt32, Int64) => {
cast_numeric_arrays::<UInt32Type, Int64Type>(array, cast_options)
}
(UInt32, Float32) => {
cast_numeric_arrays::<UInt32Type, Float32Type>(array, cast_options)
}
(UInt32, Float64) => {
cast_numeric_arrays::<UInt32Type, Float64Type>(array, cast_options)
}
(UInt64, UInt8) => {
cast_numeric_arrays::<UInt64Type, UInt8Type>(array, cast_options)
}
(UInt64, UInt16) => {
cast_numeric_arrays::<UInt64Type, UInt16Type>(array, cast_options)
}
(UInt64, UInt32) => {
cast_numeric_arrays::<UInt64Type, UInt32Type>(array, cast_options)
}
(UInt64, Int8) => {
cast_numeric_arrays::<UInt64Type, Int8Type>(array, cast_options)
}
(UInt64, Int16) => {
cast_numeric_arrays::<UInt64Type, Int16Type>(array, cast_options)
}
(UInt64, Int32) => {
cast_numeric_arrays::<UInt64Type, Int32Type>(array, cast_options)
}
(UInt64, Int64) => {
cast_numeric_arrays::<UInt64Type, Int64Type>(array, cast_options)
}
(UInt64, Float32) => {
cast_numeric_arrays::<UInt64Type, Float32Type>(array, cast_options)
}
(UInt64, Float64) => {
cast_numeric_arrays::<UInt64Type, Float64Type>(array, cast_options)
}
(Int8, UInt8) => cast_numeric_arrays::<Int8Type, UInt8Type>(array, cast_options),
(Int8, UInt16) => {
cast_numeric_arrays::<Int8Type, UInt16Type>(array, cast_options)
}
(Int8, UInt32) => {
cast_numeric_arrays::<Int8Type, UInt32Type>(array, cast_options)
}
(Int8, UInt64) => {
cast_numeric_arrays::<Int8Type, UInt64Type>(array, cast_options)
}
(Int8, Int16) => cast_numeric_arrays::<Int8Type, Int16Type>(array, cast_options),
(Int8, Int32) => cast_numeric_arrays::<Int8Type, Int32Type>(array, cast_options),
(Int8, Int64) => cast_numeric_arrays::<Int8Type, Int64Type>(array, cast_options),
(Int8, Float32) => {
cast_numeric_arrays::<Int8Type, Float32Type>(array, cast_options)
}
(Int8, Float64) => {
cast_numeric_arrays::<Int8Type, Float64Type>(array, cast_options)
}
(Int16, UInt8) => {
cast_numeric_arrays::<Int16Type, UInt8Type>(array, cast_options)
}
(Int16, UInt16) => {
cast_numeric_arrays::<Int16Type, UInt16Type>(array, cast_options)
}
(Int16, UInt32) => {
cast_numeric_arrays::<Int16Type, UInt32Type>(array, cast_options)
}
(Int16, UInt64) => {
cast_numeric_arrays::<Int16Type, UInt64Type>(array, cast_options)
}
(Int16, Int8) => cast_numeric_arrays::<Int16Type, Int8Type>(array, cast_options),
(Int16, Int32) => {
cast_numeric_arrays::<Int16Type, Int32Type>(array, cast_options)
}
(Int16, Int64) => {
cast_numeric_arrays::<Int16Type, Int64Type>(array, cast_options)
}
(Int16, Float32) => {
cast_numeric_arrays::<Int16Type, Float32Type>(array, cast_options)
}
(Int16, Float64) => {
cast_numeric_arrays::<Int16Type, Float64Type>(array, cast_options)
}
(Int32, UInt8) => {
cast_numeric_arrays::<Int32Type, UInt8Type>(array, cast_options)
}
(Int32, UInt16) => {
cast_numeric_arrays::<Int32Type, UInt16Type>(array, cast_options)
}
(Int32, UInt32) => {
cast_numeric_arrays::<Int32Type, UInt32Type>(array, cast_options)
}
(Int32, UInt64) => {
cast_numeric_arrays::<Int32Type, UInt64Type>(array, cast_options)
}
(Int32, Int8) => cast_numeric_arrays::<Int32Type, Int8Type>(array, cast_options),
(Int32, Int16) => {
cast_numeric_arrays::<Int32Type, Int16Type>(array, cast_options)
}
(Int32, Int64) => {
cast_numeric_arrays::<Int32Type, Int64Type>(array, cast_options)
}
(Int32, Float32) => {
cast_numeric_arrays::<Int32Type, Float32Type>(array, cast_options)
}
(Int32, Float64) => {
cast_numeric_arrays::<Int32Type, Float64Type>(array, cast_options)
}
(Int64, UInt8) => {
cast_numeric_arrays::<Int64Type, UInt8Type>(array, cast_options)
}
(Int64, UInt16) => {
cast_numeric_arrays::<Int64Type, UInt16Type>(array, cast_options)
}
(Int64, UInt32) => {
cast_numeric_arrays::<Int64Type, UInt32Type>(array, cast_options)
}
(Int64, UInt64) => {
cast_numeric_arrays::<Int64Type, UInt64Type>(array, cast_options)
}
(Int64, Int8) => cast_numeric_arrays::<Int64Type, Int8Type>(array, cast_options),
(Int64, Int16) => {
cast_numeric_arrays::<Int64Type, Int16Type>(array, cast_options)
}
(Int64, Int32) => {
cast_numeric_arrays::<Int64Type, Int32Type>(array, cast_options)
}
(Int64, Float32) => {
cast_numeric_arrays::<Int64Type, Float32Type>(array, cast_options)
}
(Int64, Float64) => {
cast_numeric_arrays::<Int64Type, Float64Type>(array, cast_options)
}
(Float32, UInt8) => {
cast_numeric_arrays::<Float32Type, UInt8Type>(array, cast_options)
}
(Float32, UInt16) => {
cast_numeric_arrays::<Float32Type, UInt16Type>(array, cast_options)
}
(Float32, UInt32) => {
cast_numeric_arrays::<Float32Type, UInt32Type>(array, cast_options)
}
(Float32, UInt64) => {
cast_numeric_arrays::<Float32Type, UInt64Type>(array, cast_options)
}
(Float32, Int8) => {
cast_numeric_arrays::<Float32Type, Int8Type>(array, cast_options)
}
(Float32, Int16) => {
cast_numeric_arrays::<Float32Type, Int16Type>(array, cast_options)
}
(Float32, Int32) => {
cast_numeric_arrays::<Float32Type, Int32Type>(array, cast_options)
}
(Float32, Int64) => {
cast_numeric_arrays::<Float32Type, Int64Type>(array, cast_options)
}
(Float32, Float64) => {
cast_numeric_arrays::<Float32Type, Float64Type>(array, cast_options)
}
(Float64, UInt8) => {
cast_numeric_arrays::<Float64Type, UInt8Type>(array, cast_options)
}
(Float64, UInt16) => {
cast_numeric_arrays::<Float64Type, UInt16Type>(array, cast_options)
}
(Float64, UInt32) => {
cast_numeric_arrays::<Float64Type, UInt32Type>(array, cast_options)
}
(Float64, UInt64) => {
cast_numeric_arrays::<Float64Type, UInt64Type>(array, cast_options)
}
(Float64, Int8) => {
cast_numeric_arrays::<Float64Type, Int8Type>(array, cast_options)
}
(Float64, Int16) => {
cast_numeric_arrays::<Float64Type, Int16Type>(array, cast_options)
}
(Float64, Int32) => {
cast_numeric_arrays::<Float64Type, Int32Type>(array, cast_options)
}
(Float64, Int64) => {
cast_numeric_arrays::<Float64Type, Int64Type>(array, cast_options)
}
(Float64, Float32) => {
cast_numeric_arrays::<Float64Type, Float32Type>(array, cast_options)
}
(Int32, Date32) => cast_array_data::<Date32Type>(array, to_type.clone()),
(Int32, Date64) => cast_with_options(
&cast_with_options(array, &DataType::Date32, cast_options)?,
&DataType::Date64,
cast_options,
),
(Int32, Time32(TimeUnit::Second)) => {
cast_array_data::<Time32SecondType>(array, to_type.clone())
}
(Int32, Time32(TimeUnit::Millisecond)) => {
cast_array_data::<Time32MillisecondType>(array, to_type.clone())
}
(Date32, Int32) => cast_array_data::<Int32Type>(array, to_type.clone()),
(Date32, Int64) => cast_with_options(
&cast_with_options(array, &DataType::Int32, cast_options)?,
&DataType::Int64,
cast_options,
),
(Time32(_), Int32) => cast_array_data::<Int32Type>(array, to_type.clone()),
(Int64, Date64) => cast_array_data::<Date64Type>(array, to_type.clone()),
(Int64, Date32) => cast_with_options(
&cast_with_options(array, &DataType::Int32, cast_options)?,
&DataType::Date32,
cast_options,
),
(Int64, Time64(TimeUnit::Microsecond)) => {
cast_array_data::<Time64MicrosecondType>(array, to_type.clone())
}
(Int64, Time64(TimeUnit::Nanosecond)) => {
cast_array_data::<Time64NanosecondType>(array, to_type.clone())
}
(Date64, Int64) => cast_array_data::<Int64Type>(array, to_type.clone()),
(Date64, Int32) => cast_with_options(
&cast_with_options(array, &DataType::Int64, cast_options)?,
&DataType::Int32,
cast_options,
),
(Time64(_), Int64) => cast_array_data::<Int64Type>(array, to_type.clone()),
(Date32, Date64) => {
let date_array = array.as_any().downcast_ref::<Date32Array>().unwrap();
let values =
unary::<_, _, Date64Type>(date_array, |x| x as i64 * MILLISECONDS_IN_DAY);
Ok(Arc::new(values) as ArrayRef)
}
(Date64, Date32) => {
let date_array = array.as_any().downcast_ref::<Date64Array>().unwrap();
let values = unary::<_, _, Date32Type>(date_array, |x| {
(x / MILLISECONDS_IN_DAY) as i32
});
Ok(Arc::new(values) as ArrayRef)
}
(Time32(TimeUnit::Second), Time32(TimeUnit::Millisecond)) => {
let time_array = array.as_any().downcast_ref::<Time32SecondArray>().unwrap();
let values = unary::<_, _, Time32MillisecondType>(time_array, |x| {
x * MILLISECONDS as i32
});
Ok(Arc::new(values) as ArrayRef)
}
(Time32(TimeUnit::Millisecond), Time32(TimeUnit::Second)) => {
let time_array = array
.as_any()
.downcast_ref::<Time32MillisecondArray>()
.unwrap();
let values = unary::<_, _, Time32SecondType>(time_array, |x| {
x / (MILLISECONDS as i32)
});
Ok(Arc::new(values) as ArrayRef)
}
(Time32(from_unit), Time64(to_unit)) => {
let time_array = Int32Array::from(array.data().clone());
let c: Int64Array = numeric_cast(&time_array);
let from_size = time_unit_multiple(from_unit);
let to_size = time_unit_multiple(to_unit);
let mult = Int64Array::from(vec![to_size / from_size; array.len()]);
let converted = multiply(&c, &mult)?;
let array_ref = Arc::new(converted) as ArrayRef;
use TimeUnit::*;
match to_unit {
Microsecond => cast_array_data::<TimestampMicrosecondType>(
&array_ref,
to_type.clone(),
),
Nanosecond => cast_array_data::<TimestampNanosecondType>(
&array_ref,
to_type.clone(),
),
_ => unreachable!("array type not supported"),
}
}
(Time64(TimeUnit::Microsecond), Time64(TimeUnit::Nanosecond)) => {
let time_array = array
.as_any()
.downcast_ref::<Time64MicrosecondArray>()
.unwrap();
let values =
unary::<_, _, Time64NanosecondType>(time_array, |x| x * MILLISECONDS);
Ok(Arc::new(values) as ArrayRef)
}
(Time64(TimeUnit::Nanosecond), Time64(TimeUnit::Microsecond)) => {
let time_array = array
.as_any()
.downcast_ref::<Time64NanosecondArray>()
.unwrap();
let values =
unary::<_, _, Time64MicrosecondType>(time_array, |x| x / MILLISECONDS);
Ok(Arc::new(values) as ArrayRef)
}
(Time64(from_unit), Time32(to_unit)) => {
let time_array = Int64Array::from(array.data().clone());
let from_size = time_unit_multiple(from_unit);
let to_size = time_unit_multiple(to_unit);
let divisor = from_size / to_size;
match to_unit {
TimeUnit::Second => {
let values = unary::<_, _, Time32SecondType>(&time_array, |x| {
(x as i64 / divisor) as i32
});
Ok(Arc::new(values) as ArrayRef)
}
TimeUnit::Millisecond => {
let values = unary::<_, _, Time32MillisecondType>(&time_array, |x| {
(x as i64 / divisor) as i32
});
Ok(Arc::new(values) as ArrayRef)
}
_ => unreachable!("array type not supported"),
}
}
(Timestamp(_, _), Int64) => cast_array_data::<Int64Type>(array, to_type.clone()),
(Int64, Timestamp(to_unit, _)) => {
use TimeUnit::*;
match to_unit {
Second => cast_array_data::<TimestampSecondType>(array, to_type.clone()),
Millisecond => {
cast_array_data::<TimestampMillisecondType>(array, to_type.clone())
}
Microsecond => {
cast_array_data::<TimestampMicrosecondType>(array, to_type.clone())
}
Nanosecond => {
cast_array_data::<TimestampNanosecondType>(array, to_type.clone())
}
}
}
(Timestamp(from_unit, _), Timestamp(to_unit, _)) => {
let time_array = Int64Array::from(array.data().clone());
let from_size = time_unit_multiple(from_unit);
let to_size = time_unit_multiple(to_unit);
let converted = if from_size >= to_size {
divide_scalar(&time_array, from_size / to_size)?
} else {
multiply(
&time_array,
&Int64Array::from(vec![to_size / from_size; array.len()]),
)?
};
let array_ref = Arc::new(converted) as ArrayRef;
use TimeUnit::*;
match to_unit {
Second => {
cast_array_data::<TimestampSecondType>(&array_ref, to_type.clone())
}
Millisecond => cast_array_data::<TimestampMillisecondType>(
&array_ref,
to_type.clone(),
),
Microsecond => cast_array_data::<TimestampMicrosecondType>(
&array_ref,
to_type.clone(),
),
Nanosecond => cast_array_data::<TimestampNanosecondType>(
&array_ref,
to_type.clone(),
),
}
}
(Timestamp(from_unit, _), Date32) => {
let time_array = Int64Array::from(array.data().clone());
let from_size = time_unit_multiple(from_unit) * SECONDS_IN_DAY;
let mut b = Date32Builder::with_capacity(array.len());
for i in 0..array.len() {
if time_array.is_null(i) {
b.append_null();
} else {
b.append_value((time_array.value(i) / from_size) as i32);
}
}
Ok(Arc::new(b.finish()) as ArrayRef)
}
(Timestamp(from_unit, _), Date64) => {
let from_size = time_unit_multiple(from_unit);
let to_size = MILLISECONDS;
match to_size.cmp(&from_size) {
std::cmp::Ordering::Less => {
let time_array = Date64Array::from(array.data().clone());
Ok(Arc::new(divide(
&time_array,
&Date64Array::from(vec![from_size / to_size; array.len()]),
)?) as ArrayRef)
}
std::cmp::Ordering::Equal => {
cast_array_data::<Date64Type>(array, to_type.clone())
}
std::cmp::Ordering::Greater => {
let time_array = Date64Array::from(array.data().clone());
Ok(Arc::new(multiply(
&time_array,
&Date64Array::from(vec![to_size / from_size; array.len()]),
)?) as ArrayRef)
}
}
}
(Int64, Duration(to_unit)) => {
use TimeUnit::*;
match to_unit {
Second => cast_array_data::<DurationSecondType>(array, to_type.clone()),
Millisecond => {
cast_array_data::<DurationMillisecondType>(array, to_type.clone())
}
Microsecond => {
cast_array_data::<DurationMicrosecondType>(array, to_type.clone())
}
Nanosecond => {
cast_array_data::<DurationNanosecondType>(array, to_type.clone())
}
}
}
(Duration(_), Int64) => cast_array_data::<Int64Type>(array, to_type.clone()),
(Interval(from_type), Int64) => match from_type {
IntervalUnit::YearMonth => cast_numeric_arrays::<
IntervalYearMonthType,
Int64Type,
>(array, cast_options),
IntervalUnit::DayTime => cast_array_data::<Int64Type>(array, to_type.clone()),
IntervalUnit::MonthDayNano => Err(ArrowError::CastError(format!(
"Casting from {:?} to {:?} not supported",
from_type, to_type,
))),
},
(Int32, Interval(to_type)) => match to_type {
IntervalUnit::YearMonth => {
cast_array_data::<IntervalYearMonthType>(array, Interval(to_type.clone()))
}
_ => Err(ArrowError::CastError(format!(
"Casting from {:?} to {:?} not supported",
from_type, to_type,
))),
},
(Int64, Interval(to_type)) => match to_type {
IntervalUnit::DayTime => {
cast_array_data::<IntervalDayTimeType>(array, Interval(to_type.clone()))
}
_ => Err(ArrowError::CastError(format!(
"Casting from {:?} to {:?} not supported",
from_type, to_type,
))),
},
(_, _) => Err(ArrowError::CastError(format!(
"Casting from {:?} to {:?} not supported",
from_type, to_type,
))),
}
}
fn cast_string_to_binary(array: &ArrayRef) -> Result<ArrayRef> {
let from_type = array.data_type();
match *from_type {
DataType::Utf8 => {
let data = unsafe {
array
.data()
.clone()
.into_builder()
.data_type(DataType::Binary)
.build_unchecked()
};
Ok(Arc::new(BinaryArray::from(data)) as ArrayRef)
}
DataType::LargeUtf8 => {
let data = unsafe {
array
.data()
.clone()
.into_builder()
.data_type(DataType::LargeBinary)
.build_unchecked()
};
Ok(Arc::new(LargeBinaryArray::from(data)) as ArrayRef)
}
_ => Err(ArrowError::InvalidArgumentError(format!(
"{:?} cannot be converted to binary array",
from_type
))),
}
}
const fn time_unit_multiple(unit: &TimeUnit) -> i64 {
match unit {
TimeUnit::Second => 1,
TimeUnit::Millisecond => MILLISECONDS,
TimeUnit::Microsecond => MICROSECONDS,
TimeUnit::Nanosecond => NANOSECONDS,
}
}
fn cast_decimal_to_decimal<const BYTE_WIDTH1: usize, const BYTE_WIDTH2: usize>(
array: &ArrayRef,
input_scale: &u8,
output_precision: &u8,
output_scale: &u8,
) -> Result<ArrayRef> {
if input_scale > output_scale {
let div = 10_i128.pow((input_scale - output_scale) as u32);
if BYTE_WIDTH1 == 16 {
let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
let iter = array.iter().map(|v| v.map(|v| v.as_i128() / div));
if BYTE_WIDTH2 == 16 {
let output_array = iter
.collect::<Decimal128Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;
Ok(Arc::new(output_array))
} else {
let output_array = iter
.map(|v| v.map(BigInt::from))
.collect::<Decimal256Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;
Ok(Arc::new(output_array))
}
} else {
let array = array.as_any().downcast_ref::<Decimal256Array>().unwrap();
let iter = array.iter().map(|v| v.map(|v| v.to_big_int().div(div)));
if BYTE_WIDTH2 == 16 {
let values = iter
.map(|v| {
if v.is_none() {
Ok(None)
} else {
v.as_ref().and_then(|v| v.to_i128())
.ok_or_else(|| {
ArrowError::InvalidArgumentError(
format!("{:?} cannot be casted to 128-bit integer for Decimal128", v),
)
})
.map(Some)
}
})
.collect::<Result<Vec<_>>>()?;
let output_array = values
.into_iter()
.collect::<Decimal128Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;
Ok(Arc::new(output_array))
} else {
let output_array = iter
.collect::<Decimal256Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;
Ok(Arc::new(output_array))
}
}
} else {
let mul = 10_i128.pow((output_scale - input_scale) as u32);
if BYTE_WIDTH1 == 16 {
let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
let iter = array.iter().map(|v| v.map(|v| v.as_i128() * mul));
if BYTE_WIDTH2 == 16 {
let output_array = iter
.collect::<Decimal128Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;
Ok(Arc::new(output_array))
} else {
let output_array = iter
.map(|v| v.map(BigInt::from))
.collect::<Decimal256Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;
Ok(Arc::new(output_array))
}
} else {
let array = array.as_any().downcast_ref::<Decimal256Array>().unwrap();
let iter = array.iter().map(|v| v.map(|v| v.to_big_int().mul(mul)));
if BYTE_WIDTH2 == 16 {
let values = iter
.map(|v| {
if v.is_none() {
Ok(None)
} else {
v.as_ref().and_then(|v| v.to_i128())
.ok_or_else(|| {
ArrowError::InvalidArgumentError(
format!("{:?} cannot be casted to 128-bit integer for Decimal128", v),
)
})
.map(Some)
}
})
.collect::<Result<Vec<_>>>()?;
let output_array = values
.into_iter()
.collect::<Decimal128Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;
Ok(Arc::new(output_array))
} else {
let output_array = iter
.collect::<Decimal256Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;
Ok(Arc::new(output_array))
}
}
}
}
fn cast_array_data<TO>(array: &ArrayRef, to_type: DataType) -> Result<ArrayRef>
where
TO: ArrowNumericType,
{
let data = unsafe {
ArrayData::new_unchecked(
to_type,
array.len(),
Some(array.null_count()),
array
.data()
.null_bitmap()
.cloned()
.map(|bitmap| bitmap.into_buffer()),
array.data().offset(),
array.data().buffers().to_vec(),
vec![],
)
};
Ok(Arc::new(PrimitiveArray::<TO>::from(data)) as ArrayRef)
}
fn cast_numeric_arrays<FROM, TO>(
from: &ArrayRef,
cast_options: &CastOptions,
) -> Result<ArrayRef>
where
FROM: ArrowNumericType,
TO: ArrowNumericType,
FROM::Native: num::NumCast,
TO::Native: num::NumCast,
{
if cast_options.safe {
Ok(Arc::new(numeric_cast::<FROM, TO>(
from.as_any()
.downcast_ref::<PrimitiveArray<FROM>>()
.unwrap(),
)))
} else {
Ok(Arc::new(try_numeric_cast::<FROM, TO>(
from.as_any()
.downcast_ref::<PrimitiveArray<FROM>>()
.unwrap(),
)?))
}
}
fn try_numeric_cast<T, R>(from: &PrimitiveArray<T>) -> Result<PrimitiveArray<R>>
where
T: ArrowNumericType,
R: ArrowNumericType,
T::Native: num::NumCast,
R::Native: num::NumCast,
{
try_unary(from, |value| {
num::cast::cast::<T::Native, R::Native>(value).ok_or_else(|| {
ArrowError::CastError(format!(
"Can't cast value {:?} to type {}",
value,
R::DATA_TYPE
))
})
})
}
fn numeric_cast<T, R>(from: &PrimitiveArray<T>) -> PrimitiveArray<R>
where
T: ArrowNumericType,
R: ArrowNumericType,
T::Native: num::NumCast,
R::Native: num::NumCast,
{
let iter = from
.iter()
.map(|v| v.and_then(num::cast::cast::<T::Native, R::Native>));
unsafe { PrimitiveArray::<R>::from_trusted_len_iter(iter) }
}
fn cast_timestamp_to_string<T, OffsetSize>(
array: &ArrayRef,
tz: &Option<String>,
) -> Result<ArrayRef>
where
T: ArrowTemporalType + ArrowNumericType,
i64: From<<T as ArrowPrimitiveType>::Native>,
OffsetSize: OffsetSizeTrait,
{
let array = array.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
let mut builder = GenericStringBuilder::<OffsetSize>::new();
if let Some(tz) = tz {
let mut scratch = Parsed::new();
let iter = ArrayIter::new(array);
extract_component_from_array!(
iter,
builder,
to_string,
|value, tz| as_datetime::<T>(<i64 as From<_>>::from(value))
.map(|datetime| datetime + tz),
tz,
scratch,
|value| as_datetime::<T>(<i64 as From<_>>::from(value)),
|h| h
)
} else {
let iter = ArrayIter::new(array);
extract_component_from_array!(
iter,
builder,
to_string,
|value| as_datetime::<T>(<i64 as From<_>>::from(value)),
|h| h
)
}
Ok(Arc::new(builder.finish()) as ArrayRef)
}
fn cast_date32_to_string<OffsetSize: OffsetSizeTrait>(
array: &ArrayRef,
) -> Result<ArrayRef> {
let array = array.as_any().downcast_ref::<Date32Array>().unwrap();
Ok(Arc::new(
(0..array.len())
.map(|ix| {
if array.is_null(ix) {
None
} else {
array.value_as_date(ix).map(|v| v.to_string())
}
})
.collect::<GenericStringArray<OffsetSize>>(),
))
}
fn cast_date64_to_string<OffsetSize: OffsetSizeTrait>(
array: &ArrayRef,
) -> Result<ArrayRef> {
let array = array.as_any().downcast_ref::<Date64Array>().unwrap();
Ok(Arc::new(
(0..array.len())
.map(|ix| {
if array.is_null(ix) {
None
} else {
array.value_as_datetime(ix).map(|v| v.to_string())
}
})
.collect::<GenericStringArray<OffsetSize>>(),
))
}
fn cast_numeric_to_string<FROM, OffsetSize>(array: &ArrayRef) -> Result<ArrayRef>
where
FROM: ArrowNumericType,
FROM::Native: lexical_core::ToLexical,
OffsetSize: OffsetSizeTrait,
{
Ok(Arc::new(numeric_to_string_cast::<FROM, OffsetSize>(
array
.as_any()
.downcast_ref::<PrimitiveArray<FROM>>()
.unwrap(),
)))
}
fn numeric_to_string_cast<T, OffsetSize>(
from: &PrimitiveArray<T>,
) -> GenericStringArray<OffsetSize>
where
T: ArrowPrimitiveType + ArrowNumericType,
T::Native: lexical_core::ToLexical,
OffsetSize: OffsetSizeTrait,
{
from.iter()
.map(|maybe_value| maybe_value.map(lexical_to_string))
.collect()
}
fn cast_string_to_numeric<T, Offset: OffsetSizeTrait>(
from: &ArrayRef,
cast_options: &CastOptions,
) -> Result<ArrayRef>
where
T: ArrowNumericType,
<T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
{
Ok(Arc::new(string_to_numeric_cast::<T, Offset>(
from.as_any()
.downcast_ref::<GenericStringArray<Offset>>()
.unwrap(),
cast_options,
)?))
}
fn string_to_numeric_cast<T, Offset: OffsetSizeTrait>(
from: &GenericStringArray<Offset>,
cast_options: &CastOptions,
) -> Result<PrimitiveArray<T>>
where
T: ArrowNumericType,
<T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
{
if cast_options.safe {
let iter = from
.iter()
.map(|v| v.and_then(|v| lexical_core::parse(v.as_bytes()).ok()));
Ok(unsafe { PrimitiveArray::<T>::from_trusted_len_iter(iter) })
} else {
let vec = from
.iter()
.map(|v| {
v.map(|v| {
lexical_core::parse(v.as_bytes()).map_err(|_| {
ArrowError::CastError(format!(
"Cannot cast string '{}' to value of {:?} type",
v,
T::DATA_TYPE,
))
})
})
.transpose()
})
.collect::<Result<Vec<_>>>()?;
Ok(unsafe { PrimitiveArray::<T>::from_trusted_len_iter(vec.iter()) })
}
}
fn cast_string_to_date32<Offset: OffsetSizeTrait>(
array: &dyn Array,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
use chrono::Datelike;
let string_array = array
.as_any()
.downcast_ref::<GenericStringArray<Offset>>()
.unwrap();
let array = if cast_options.safe {
let iter = string_array.iter().map(|v| {
v.and_then(|v| {
v.parse::<chrono::NaiveDate>()
.map(|date| date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
.ok()
})
});
unsafe { Date32Array::from_trusted_len_iter(iter) }
} else {
let vec = string_array
.iter()
.map(|v| {
v.map(|v| {
v.parse::<chrono::NaiveDate>()
.map(|date| date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
.map_err(|_| {
ArrowError::CastError(format!(
"Cannot cast string '{}' to value of {:?} type",
v,
DataType::Date32
))
})
})
.transpose()
})
.collect::<Result<Vec<Option<i32>>>>()?;
unsafe { Date32Array::from_trusted_len_iter(vec.iter()) }
};
Ok(Arc::new(array) as ArrayRef)
}
fn cast_string_to_date64<Offset: OffsetSizeTrait>(
array: &dyn Array,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
let string_array = array
.as_any()
.downcast_ref::<GenericStringArray<Offset>>()
.unwrap();
let array = if cast_options.safe {
let iter = string_array.iter().map(|v| {
v.and_then(|v| {
v.parse::<chrono::NaiveDateTime>()
.map(|datetime| datetime.timestamp_millis())
.ok()
})
});
unsafe { Date64Array::from_trusted_len_iter(iter) }
} else {
let vec = string_array
.iter()
.map(|v| {
v.map(|v| {
v.parse::<chrono::NaiveDateTime>()
.map(|datetime| datetime.timestamp_millis())
.map_err(|_| {
ArrowError::CastError(format!(
"Cannot cast string '{}' to value of {:?} type",
v,
DataType::Date64
))
})
})
.transpose()
})
.collect::<Result<Vec<Option<i64>>>>()?;
unsafe { Date64Array::from_trusted_len_iter(vec.iter()) }
};
Ok(Arc::new(array) as ArrayRef)
}
fn cast_string_to_time32second<Offset: OffsetSizeTrait>(
array: &dyn Array,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
const NANOS_PER_SEC: u32 = 1_000_000_000;
let string_array = array
.as_any()
.downcast_ref::<GenericStringArray<Offset>>()
.unwrap();
let array = if cast_options.safe {
let iter = string_array.iter().map(|v| {
v.and_then(|v| {
v.parse::<chrono::NaiveTime>()
.map(|time| {
(time.num_seconds_from_midnight()
+ time.nanosecond() / NANOS_PER_SEC)
as i32
})
.ok()
})
});
unsafe { Time32SecondArray::from_trusted_len_iter(iter) }
} else {
let vec = string_array
.iter()
.map(|v| {
v.map(|v| {
v.parse::<chrono::NaiveTime>()
.map(|time| {
(time.num_seconds_from_midnight()
+ time.nanosecond() / NANOS_PER_SEC)
as i32
})
.map_err(|_| {
ArrowError::CastError(format!(
"Cannot cast string '{}' to value of {:?} type",
v,
DataType::Time32(TimeUnit::Second)
))
})
})
.transpose()
})
.collect::<Result<Vec<Option<i32>>>>()?;
unsafe { Time32SecondArray::from_trusted_len_iter(vec.iter()) }
};
Ok(Arc::new(array) as ArrayRef)
}
fn cast_string_to_time32millisecond<Offset: OffsetSizeTrait>(
array: &dyn Array,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
const NANOS_PER_MILLI: u32 = 1_000_000;
const MILLIS_PER_SEC: u32 = 1_000;
let string_array = array
.as_any()
.downcast_ref::<GenericStringArray<Offset>>()
.unwrap();
let array = if cast_options.safe {
let iter = string_array.iter().map(|v| {
v.and_then(|v| {
v.parse::<chrono::NaiveTime>()
.map(|time| {
(time.num_seconds_from_midnight() * MILLIS_PER_SEC
+ time.nanosecond() / NANOS_PER_MILLI)
as i32
})
.ok()
})
});
unsafe { Time32MillisecondArray::from_trusted_len_iter(iter) }
} else {
let vec = string_array
.iter()
.map(|v| {
v.map(|v| {
v.parse::<chrono::NaiveTime>()
.map(|time| {
(time.num_seconds_from_midnight() * MILLIS_PER_SEC
+ time.nanosecond() / NANOS_PER_MILLI)
as i32
})
.map_err(|_| {
ArrowError::CastError(format!(
"Cannot cast string '{}' to value of {:?} type",
v,
DataType::Time32(TimeUnit::Millisecond)
))
})
})
.transpose()
})
.collect::<Result<Vec<Option<i32>>>>()?;
unsafe { Time32MillisecondArray::from_trusted_len_iter(vec.iter()) }
};
Ok(Arc::new(array) as ArrayRef)
}
fn cast_string_to_time64microsecond<Offset: OffsetSizeTrait>(
array: &dyn Array,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
const NANOS_PER_MICRO: i64 = 1_000;
const MICROS_PER_SEC: i64 = 1_000_000;
let string_array = array
.as_any()
.downcast_ref::<GenericStringArray<Offset>>()
.unwrap();
let array = if cast_options.safe {
let iter = string_array.iter().map(|v| {
v.and_then(|v| {
v.parse::<chrono::NaiveTime>()
.map(|time| {
time.num_seconds_from_midnight() as i64 * MICROS_PER_SEC
+ time.nanosecond() as i64 / NANOS_PER_MICRO
})
.ok()
})
});
unsafe { Time64MicrosecondArray::from_trusted_len_iter(iter) }
} else {
let vec = string_array
.iter()
.map(|v| {
v.map(|v| {
v.parse::<chrono::NaiveTime>()
.map(|time| {
time.num_seconds_from_midnight() as i64 * MICROS_PER_SEC
+ time.nanosecond() as i64 / NANOS_PER_MICRO
})
.map_err(|_| {
ArrowError::CastError(format!(
"Cannot cast string '{}' to value of {:?} type",
v,
DataType::Time64(TimeUnit::Microsecond)
))
})
})
.transpose()
})
.collect::<Result<Vec<Option<i64>>>>()?;
unsafe { Time64MicrosecondArray::from_trusted_len_iter(vec.iter()) }
};
Ok(Arc::new(array) as ArrayRef)
}
fn cast_string_to_time64nanosecond<Offset: OffsetSizeTrait>(
array: &dyn Array,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
const NANOS_PER_SEC: i64 = 1_000_000_000;
let string_array = array
.as_any()
.downcast_ref::<GenericStringArray<Offset>>()
.unwrap();
let array = if cast_options.safe {
let iter = string_array.iter().map(|v| {
v.and_then(|v| {
v.parse::<chrono::NaiveTime>()
.map(|time| {
time.num_seconds_from_midnight() as i64 * NANOS_PER_SEC
+ time.nanosecond() as i64
})
.ok()
})
});
unsafe { Time64NanosecondArray::from_trusted_len_iter(iter) }
} else {
let vec = string_array
.iter()
.map(|v| {
v.map(|v| {
v.parse::<chrono::NaiveTime>()
.map(|time| {
time.num_seconds_from_midnight() as i64 * NANOS_PER_SEC
+ time.nanosecond() as i64
})
.map_err(|_| {
ArrowError::CastError(format!(
"Cannot cast string '{}' to value of {:?} type",
v,
DataType::Time64(TimeUnit::Nanosecond)
))
})
})
.transpose()
})
.collect::<Result<Vec<Option<i64>>>>()?;
unsafe { Time64NanosecondArray::from_trusted_len_iter(vec.iter()) }
};
Ok(Arc::new(array) as ArrayRef)
}
fn cast_string_to_timestamp_ns<Offset: OffsetSizeTrait>(
array: &dyn Array,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
let string_array = array
.as_any()
.downcast_ref::<GenericStringArray<Offset>>()
.unwrap();
let array = if cast_options.safe {
let iter = string_array
.iter()
.map(|v| v.and_then(|v| string_to_timestamp_nanos(v).ok()));
unsafe { TimestampNanosecondArray::from_trusted_len_iter(iter) }
} else {
let vec = string_array
.iter()
.map(|v| v.map(string_to_timestamp_nanos).transpose())
.collect::<Result<Vec<Option<i64>>>>()?;
unsafe { TimestampNanosecondArray::from_trusted_len_iter(vec.iter()) }
};
Ok(Arc::new(array) as ArrayRef)
}
fn cast_utf8_to_boolean(from: &ArrayRef, cast_options: &CastOptions) -> Result<ArrayRef> {
let array = as_string_array(from);
let output_array = array
.iter()
.map(|value| match value {
Some(value) => match value.to_ascii_lowercase().trim() {
"t" | "tr" | "tru" | "true" | "y" | "ye" | "yes" | "on" | "1" => {
Ok(Some(true))
}
"f" | "fa" | "fal" | "fals" | "false" | "n" | "no" | "of" | "off"
| "0" => Ok(Some(false)),
invalid_value => match cast_options.safe {
true => Ok(None),
false => Err(ArrowError::CastError(format!(
"Cannot cast string '{}' to value of Boolean type",
invalid_value,
))),
},
},
None => Ok(None),
})
.collect::<Result<BooleanArray>>()?;
Ok(Arc::new(output_array))
}
fn cast_numeric_to_bool<FROM>(from: &ArrayRef) -> Result<ArrayRef>
where
FROM: ArrowNumericType,
{
numeric_to_bool_cast::<FROM>(
from.as_any()
.downcast_ref::<PrimitiveArray<FROM>>()
.unwrap(),
)
.map(|to| Arc::new(to) as ArrayRef)
}
fn numeric_to_bool_cast<T>(from: &PrimitiveArray<T>) -> Result<BooleanArray>
where
T: ArrowPrimitiveType + ArrowNumericType,
{
let mut b = BooleanBuilder::with_capacity(from.len());
for i in 0..from.len() {
if from.is_null(i) {
b.append_null();
} else if from.value(i) != T::default_value() {
b.append_value(true);
} else {
b.append_value(false);
}
}
Ok(b.finish())
}
fn cast_bool_to_numeric<TO>(
from: &ArrayRef,
cast_options: &CastOptions,
) -> Result<ArrayRef>
where
TO: ArrowNumericType,
TO::Native: num::cast::NumCast,
{
Ok(Arc::new(bool_to_numeric_cast::<TO>(
from.as_any().downcast_ref::<BooleanArray>().unwrap(),
cast_options,
)))
}
fn bool_to_numeric_cast<T>(
from: &BooleanArray,
_cast_options: &CastOptions,
) -> PrimitiveArray<T>
where
T: ArrowNumericType,
T::Native: num::NumCast,
{
let iter = (0..from.len()).map(|i| {
if from.is_null(i) {
None
} else if from.value(i) {
num::cast::cast(1)
} else {
Some(T::default_value())
}
});
unsafe { PrimitiveArray::<T>::from_trusted_len_iter(iter) }
}
fn dictionary_cast<K: ArrowDictionaryKeyType>(
array: &ArrayRef,
to_type: &DataType,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
use DataType::*;
match to_type {
Dictionary(to_index_type, to_value_type) => {
let dict_array = array
.as_any()
.downcast_ref::<DictionaryArray<K>>()
.ok_or_else(|| {
ArrowError::ComputeError(
"Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(),
)
})?;
let keys_array: ArrayRef =
Arc::new(PrimitiveArray::<K>::from(dict_array.keys().data().clone()));
let values_array = dict_array.values();
let cast_keys = cast_with_options(&keys_array, to_index_type, cast_options)?;
let cast_values =
cast_with_options(values_array, to_value_type, cast_options)?;
if cast_keys.null_count() > keys_array.null_count() {
return Err(ArrowError::ComputeError(format!(
"Could not convert {} dictionary indexes from {:?} to {:?}",
cast_keys.null_count() - keys_array.null_count(),
keys_array.data_type(),
to_index_type
)));
}
let data = unsafe {
ArrayData::new_unchecked(
to_type.clone(),
cast_keys.len(),
Some(cast_keys.null_count()),
cast_keys
.data()
.null_bitmap()
.cloned()
.map(|bitmap| bitmap.into_buffer()),
cast_keys.data().offset(),
cast_keys.data().buffers().to_vec(),
vec![cast_values.into_data()],
)
};
let new_array: ArrayRef = match **to_index_type {
Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
_ => {
return Err(ArrowError::CastError(format!(
"Unsupported type {:?} for dictionary index",
to_index_type
)));
}
};
Ok(new_array)
}
_ => unpack_dictionary::<K>(array, to_type, cast_options),
}
}
fn unpack_dictionary<K>(
array: &ArrayRef,
to_type: &DataType,
cast_options: &CastOptions,
) -> Result<ArrayRef>
where
K: ArrowDictionaryKeyType,
{
let dict_array = array
.as_any()
.downcast_ref::<DictionaryArray<K>>()
.ok_or_else(|| {
ArrowError::ComputeError(
"Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(),
)
})?;
let cast_dict_values = cast_with_options(dict_array.values(), to_type, cast_options)?;
let keys_array: ArrayRef =
Arc::new(PrimitiveArray::<K>::from(dict_array.keys().data().clone()));
let indices = cast_with_options(&keys_array, &DataType::UInt32, cast_options)?;
let u32_indices =
indices
.as_any()
.downcast_ref::<UInt32Array>()
.ok_or_else(|| {
ArrowError::ComputeError(
"Internal Error: Cannot cast dict indices to UInt32".to_string(),
)
})?;
take(cast_dict_values.as_ref(), u32_indices, None)
}
fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
array: &ArrayRef,
dict_value_type: &DataType,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
use DataType::*;
match *dict_value_type {
Int8 => pack_numeric_to_dictionary::<K, Int8Type>(
array,
dict_value_type,
cast_options,
),
Int16 => pack_numeric_to_dictionary::<K, Int16Type>(
array,
dict_value_type,
cast_options,
),
Int32 => pack_numeric_to_dictionary::<K, Int32Type>(
array,
dict_value_type,
cast_options,
),
Int64 => pack_numeric_to_dictionary::<K, Int64Type>(
array,
dict_value_type,
cast_options,
),
UInt8 => pack_numeric_to_dictionary::<K, UInt8Type>(
array,
dict_value_type,
cast_options,
),
UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(
array,
dict_value_type,
cast_options,
),
UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(
array,
dict_value_type,
cast_options,
),
UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(
array,
dict_value_type,
cast_options,
),
Utf8 => pack_string_to_dictionary::<K>(array, cast_options),
_ => Err(ArrowError::CastError(format!(
"Unsupported output type for dictionary packing: {:?}",
dict_value_type
))),
}
}
fn pack_numeric_to_dictionary<K, V>(
array: &ArrayRef,
dict_value_type: &DataType,
cast_options: &CastOptions,
) -> Result<ArrayRef>
where
K: ArrowDictionaryKeyType,
V: ArrowNumericType,
{
let cast_values = cast_with_options(array, dict_value_type, cast_options)?;
let values = cast_values
.as_any()
.downcast_ref::<PrimitiveArray<V>>()
.unwrap();
let mut b =
PrimitiveDictionaryBuilder::<K, V>::with_capacity(values.len(), values.len());
for i in 0..values.len() {
if values.is_null(i) {
b.append_null();
} else {
b.append(values.value(i))?;
}
}
Ok(Arc::new(b.finish()))
}
fn pack_string_to_dictionary<K>(
array: &ArrayRef,
cast_options: &CastOptions,
) -> Result<ArrayRef>
where
K: ArrowDictionaryKeyType,
{
let cast_values = cast_with_options(array, &DataType::Utf8, cast_options)?;
let values = cast_values.as_any().downcast_ref::<StringArray>().unwrap();
let mut b = StringDictionaryBuilder::<K>::with_capacity(values.len(), 1024, 1024);
for i in 0..values.len() {
if values.is_null(i) {
b.append_null();
} else {
b.append(values.value(i))?;
}
}
Ok(Arc::new(b.finish()))
}
fn cast_primitive_to_list<OffsetSize: OffsetSizeTrait + NumCast>(
array: &ArrayRef,
to: &Field,
to_type: &DataType,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
let cast_array = cast_with_options(array, to.data_type(), cast_options)?;
let offsets = unsafe {
MutableBuffer::from_trusted_len_iter(
(0..=array.len()).map(|i| OffsetSize::from(i).expect("integer")),
)
};
let list_data = unsafe {
ArrayData::new_unchecked(
to_type.clone(),
array.len(),
Some(cast_array.null_count()),
cast_array
.data()
.null_bitmap()
.cloned()
.map(|bitmap| bitmap.into_buffer()),
0,
vec![offsets.into()],
vec![cast_array.into_data()],
)
};
let list_array =
Arc::new(GenericListArray::<OffsetSize>::from(list_data)) as ArrayRef;
Ok(list_array)
}
fn cast_list_inner<OffsetSize: OffsetSizeTrait>(
array: &Arc<dyn Array>,
to: &Field,
to_type: &DataType,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
let data = array.data_ref();
let underlying_array = make_array(data.child_data()[0].clone());
let cast_array = cast_with_options(&underlying_array, to.data_type(), cast_options)?;
let array_data = unsafe {
ArrayData::new_unchecked(
to_type.clone(),
array.len(),
Some(data.null_count()),
data.null_bitmap()
.cloned()
.map(|bitmap| bitmap.into_buffer()),
array.offset(),
data.buffers().to_vec(),
vec![cast_array.into_data()],
)
};
let list = GenericListArray::<OffsetSize>::from(array_data);
Ok(Arc::new(list) as ArrayRef)
}
fn cast_str_container<OffsetSizeFrom, OffsetSizeTo>(array: &dyn Array) -> Result<ArrayRef>
where
OffsetSizeFrom: OffsetSizeTrait + ToPrimitive,
OffsetSizeTo: OffsetSizeTrait + NumCast + ArrowNativeType,
{
let str_array = array
.as_any()
.downcast_ref::<GenericStringArray<OffsetSizeFrom>>()
.unwrap();
let list_data = array.data();
let str_values_buf = str_array.value_data();
let offsets = list_data.buffers()[0].typed_data::<OffsetSizeFrom>();
let mut offset_builder = BufferBuilder::<OffsetSizeTo>::new(offsets.len());
offsets.iter().try_for_each::<_, Result<_>>(|offset| {
let offset = OffsetSizeTo::from(*offset).ok_or_else(|| {
ArrowError::ComputeError(
"large-utf8 array too large to cast to utf8-array".into(),
)
})?;
offset_builder.append(offset);
Ok(())
})?;
let offset_buffer = offset_builder.finish();
let dtype = if matches!(std::mem::size_of::<OffsetSizeTo>(), 8) {
DataType::LargeUtf8
} else {
DataType::Utf8
};
let builder = ArrayData::builder(dtype)
.offset(array.offset())
.len(array.len())
.add_buffer(offset_buffer)
.add_buffer(str_values_buf)
.null_bit_buffer(list_data.null_buffer().cloned());
let array_data = unsafe { builder.build_unchecked() };
Ok(Arc::new(GenericStringArray::<OffsetSizeTo>::from(
array_data,
)))
}
fn cast_list_container<OffsetSizeFrom, OffsetSizeTo>(
array: &dyn Array,
_cast_options: &CastOptions,
) -> Result<ArrayRef>
where
OffsetSizeFrom: OffsetSizeTrait + ToPrimitive,
OffsetSizeTo: OffsetSizeTrait + NumCast,
{
let data = array.data_ref();
let value_data = data.child_data()[0].clone();
let out_dtype = match array.data_type() {
DataType::List(value_type) => {
assert_eq!(
std::mem::size_of::<OffsetSizeFrom>(),
std::mem::size_of::<i32>()
);
assert_eq!(
std::mem::size_of::<OffsetSizeTo>(),
std::mem::size_of::<i64>()
);
DataType::LargeList(value_type.clone())
}
DataType::LargeList(value_type) => {
assert_eq!(
std::mem::size_of::<OffsetSizeFrom>(),
std::mem::size_of::<i64>()
);
assert_eq!(
std::mem::size_of::<OffsetSizeTo>(),
std::mem::size_of::<i32>()
);
if value_data.len() > i32::MAX as usize {
return Err(ArrowError::ComputeError(
"LargeList too large to cast to List".into(),
));
}
DataType::List(value_type.clone())
}
_ => unreachable!(),
};
let offsets = unsafe { data.buffers()[0].as_slice().align_to::<OffsetSizeFrom>() }.1;
let iter = offsets.iter().map(|idx| {
let idx: OffsetSizeTo = NumCast::from(*idx).unwrap();
idx
});
let offset_buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
let builder = ArrayData::builder(out_dtype)
.offset(array.offset())
.len(array.len())
.add_buffer(offset_buffer)
.add_child_data(value_data)
.null_bit_buffer(data.null_buffer().cloned());
let array_data = unsafe { builder.build_unchecked() };
Ok(make_array(array_data))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::datatypes::TimeUnit;
use crate::util::decimal::{Decimal128, Decimal256};
use crate::{buffer::Buffer, util::display::array_value_to_string};
macro_rules! generate_cast_test_case {
($INPUT_ARRAY: expr, $OUTPUT_TYPE_ARRAY: ident, $OUTPUT_TYPE: expr, $OUTPUT_VALUES: expr) => {
let input_array_type = $INPUT_ARRAY.data_type();
assert!(can_cast_types(input_array_type, $OUTPUT_TYPE));
let casted_array = cast($INPUT_ARRAY, $OUTPUT_TYPE).unwrap();
let result_array = casted_array
.as_any()
.downcast_ref::<$OUTPUT_TYPE_ARRAY>()
.unwrap();
assert_eq!($OUTPUT_TYPE, result_array.data_type());
assert_eq!(result_array.len(), $OUTPUT_VALUES.len());
for (i, x) in $OUTPUT_VALUES.iter().enumerate() {
match x {
Some(x) => {
assert_eq!(result_array.value(i), *x);
}
None => {
assert!(result_array.is_null(i));
}
}
}
};
}
fn create_decimal_array(
array: Vec<Option<i128>>,
precision: u8,
scale: u8,
) -> Result<Decimal128Array> {
array
.into_iter()
.collect::<Decimal128Array>()
.with_precision_and_scale(precision, scale)
}
fn create_decimal256_array(
array: Vec<Option<BigInt>>,
precision: u8,
scale: u8,
) -> Result<Decimal256Array> {
array
.into_iter()
.collect::<Decimal256Array>()
.with_precision_and_scale(precision, scale)
}
#[test]
fn test_cast_decimal128_to_decimal128() {
let input_type = DataType::Decimal128(20, 3);
let output_type = DataType::Decimal128(20, 4);
assert!(can_cast_types(&input_type, &output_type));
let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
let input_decimal_array = create_decimal_array(array, 20, 3).unwrap();
let array = Arc::new(input_decimal_array) as ArrayRef;
generate_cast_test_case!(
&array,
Decimal128Array,
&output_type,
vec![
Some(Decimal128::new_from_i128(20, 4, 11234560_i128)),
Some(Decimal128::new_from_i128(20, 4, 21234560_i128)),
Some(Decimal128::new_from_i128(20, 4, 31234560_i128)),
None
]
);
let array = vec![Some(123456), None];
let input_decimal_array = create_decimal_array(array, 10, 0).unwrap();
let array = Arc::new(input_decimal_array) as ArrayRef;
let result = cast(&array, &DataType::Decimal128(2, 2));
assert!(result.is_err());
assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal128 of precision 2. Max is 99",
result.unwrap_err().to_string());
}
#[test]
fn test_cast_decimal128_to_decimal256() {
let input_type = DataType::Decimal128(20, 3);
let output_type = DataType::Decimal256(20, 4);
assert!(can_cast_types(&input_type, &output_type));
let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
let input_decimal_array = create_decimal_array(array, 20, 3).unwrap();
let array = Arc::new(input_decimal_array) as ArrayRef;
generate_cast_test_case!(
&array,
Decimal256Array,
&output_type,
vec![
Some(
Decimal256::from_big_int(&BigInt::from(11234560_i128), 20, 4)
.unwrap()
),
Some(
Decimal256::from_big_int(&BigInt::from(21234560_i128), 20, 4)
.unwrap()
),
Some(
Decimal256::from_big_int(&BigInt::from(31234560_i128), 20, 4)
.unwrap()
),
None
]
);
}
#[test]
fn test_cast_decimal256_to_decimal128() {
let input_type = DataType::Decimal256(20, 3);
let output_type = DataType::Decimal128(20, 4);
assert!(can_cast_types(&input_type, &output_type));
let array = vec![
Some(BigInt::from(1123456)),
Some(BigInt::from(2123456)),
Some(BigInt::from(3123456)),
None,
];
let input_decimal_array = create_decimal256_array(array, 20, 3).unwrap();
let array = Arc::new(input_decimal_array) as ArrayRef;
generate_cast_test_case!(
&array,
Decimal128Array,
&output_type,
vec![
Some(Decimal128::new_from_i128(20, 4, 11234560_i128)),
Some(Decimal128::new_from_i128(20, 4, 21234560_i128)),
Some(Decimal128::new_from_i128(20, 4, 31234560_i128)),
None
]
);
}
#[test]
fn test_cast_decimal256_to_decimal256() {
let input_type = DataType::Decimal256(20, 3);
let output_type = DataType::Decimal256(20, 4);
assert!(can_cast_types(&input_type, &output_type));
let array = vec![
Some(BigInt::from(1123456)),
Some(BigInt::from(2123456)),
Some(BigInt::from(3123456)),
None,
];
let input_decimal_array = create_decimal256_array(array, 20, 3).unwrap();
let array = Arc::new(input_decimal_array) as ArrayRef;
generate_cast_test_case!(
&array,
Decimal256Array,
&output_type,
vec![
Some(
Decimal256::from_big_int(&BigInt::from(11234560_i128), 20, 4)
.unwrap()
),
Some(
Decimal256::from_big_int(&BigInt::from(21234560_i128), 20, 4)
.unwrap()
),
Some(
Decimal256::from_big_int(&BigInt::from(31234560_i128), 20, 4)
.unwrap()
),
None
]
);
}
#[test]
fn test_cast_decimal_to_numeric() {
let decimal_type = DataType::Decimal128(38, 2);
assert!(!can_cast_types(&decimal_type, &DataType::UInt8));
let value_array: Vec<Option<i128>> =
vec![Some(125), Some(225), Some(325), None, Some(525)];
let decimal_array = create_decimal_array(value_array, 38, 2).unwrap();
let array = Arc::new(decimal_array) as ArrayRef;
generate_cast_test_case!(
&array,
Int8Array,
&DataType::Int8,
vec![Some(1_i8), Some(2_i8), Some(3_i8), None, Some(5_i8)]
);
generate_cast_test_case!(
&array,
Int16Array,
&DataType::Int16,
vec![Some(1_i16), Some(2_i16), Some(3_i16), None, Some(5_i16)]
);
generate_cast_test_case!(
&array,
Int32Array,
&DataType::Int32,
vec![Some(1_i32), Some(2_i32), Some(3_i32), None, Some(5_i32)]
);
generate_cast_test_case!(
&array,
Int64Array,
&DataType::Int64,
vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
);
generate_cast_test_case!(
&array,
Int64Array,
&DataType::Int64,
vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
);
generate_cast_test_case!(
&array,
Int64Array,
&DataType::Int64,
vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
);
let value_array: Vec<Option<i128>> = vec![Some(24400)];
let decimal_array = create_decimal_array(value_array, 38, 2).unwrap();
let array = Arc::new(decimal_array) as ArrayRef;
let casted_array = cast(&array, &DataType::Int8);
assert_eq!(
"Cast error: value of 244 is out of range Int8".to_string(),
casted_array.unwrap_err().to_string()
);
let value_array: Vec<Option<i128>> = vec![
Some(125),
Some(225),
Some(325),
None,
Some(525),
Some(112345678),
Some(112345679),
];
let decimal_array = create_decimal_array(value_array, 38, 2).unwrap();
let array = Arc::new(decimal_array) as ArrayRef;
generate_cast_test_case!(
&array,
Float32Array,
&DataType::Float32,
vec![
Some(1.25_f32),
Some(2.25_f32),
Some(3.25_f32),
None,
Some(5.25_f32),
Some(1_123_456.7_f32),
Some(1_123_456.7_f32)
]
);
let value_array: Vec<Option<i128>> = vec![
Some(125),
Some(225),
Some(325),
None,
Some(525),
Some(112345678901234568),
Some(112345678901234560),
];
let decimal_array = create_decimal_array(value_array, 38, 2).unwrap();
let array = Arc::new(decimal_array) as ArrayRef;
generate_cast_test_case!(
&array,
Float64Array,
&DataType::Float64,
vec![
Some(1.25_f64),
Some(2.25_f64),
Some(3.25_f64),
None,
Some(5.25_f64),
Some(1_123_456_789_012_345.6_f64),
Some(1_123_456_789_012_345.6_f64),
]
);
}
#[test]
fn test_cast_numeric_to_decimal() {
let decimal_type = DataType::Decimal128(38, 6);
assert!(!can_cast_types(&DataType::UInt64, &decimal_type));
let input_datas = vec![
Arc::new(Int8Array::from(vec![
Some(1),
Some(2),
Some(3),
None,
Some(5),
])) as ArrayRef, Arc::new(Int16Array::from(vec![
Some(1),
Some(2),
Some(3),
None,
Some(5),
])) as ArrayRef, Arc::new(Int32Array::from(vec![
Some(1),
Some(2),
Some(3),
None,
Some(5),
])) as ArrayRef, Arc::new(Int64Array::from(vec![
Some(1),
Some(2),
Some(3),
None,
Some(5),
])) as ArrayRef, ];
for array in input_datas {
generate_cast_test_case!(
&array,
Decimal128Array,
&decimal_type,
vec![
Some(Decimal128::new_from_i128(38, 6, 1000000_i128)),
Some(Decimal128::new_from_i128(38, 6, 2000000_i128)),
Some(Decimal128::new_from_i128(38, 6, 3000000_i128)),
None,
Some(Decimal128::new_from_i128(38, 6, 5000000_i128))
]
);
}
let array = Int8Array::from(vec![1, 2, 3, 4, 100]);
let array = Arc::new(array) as ArrayRef;
let casted_array = cast(&array, &DataType::Decimal128(3, 1));
assert!(casted_array.is_err());
assert_eq!("Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999", casted_array.unwrap_err().to_string());
let array = Float32Array::from(vec![
Some(1.1),
Some(2.2),
Some(4.4),
None,
Some(1.123_456_7),
Some(1.123_456_7),
]);
let array = Arc::new(array) as ArrayRef;
generate_cast_test_case!(
&array,
Decimal128Array,
&decimal_type,
vec![
Some(Decimal128::new_from_i128(38, 6, 1100000_i128)),
Some(Decimal128::new_from_i128(38, 6, 2200000_i128)),
Some(Decimal128::new_from_i128(38, 6, 4400000_i128)),
None,
Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
]
);
let array = Float64Array::from(vec![
Some(1.1),
Some(2.2),
Some(4.4),
None,
Some(1.123_456_789_123_4),
Some(1.123_456_789_012_345_6),
Some(1.123_456_789_012_345_6),
]);
let array = Arc::new(array) as ArrayRef;
generate_cast_test_case!(
&array,
Decimal128Array,
&decimal_type,
vec![
Some(Decimal128::new_from_i128(38, 6, 1100000_i128)),
Some(Decimal128::new_from_i128(38, 6, 2200000_i128)),
Some(Decimal128::new_from_i128(38, 6, 4400000_i128)),
None,
Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
]
);
}
#[test]
fn test_cast_i32_to_f64() {
let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Float64).unwrap();
let c = b.as_any().downcast_ref::<Float64Array>().unwrap();
assert_eq!(5.0, c.value(0));
assert_eq!(6.0, c.value(1));
assert_eq!(7.0, c.value(2));
assert_eq!(8.0, c.value(3));
assert_eq!(9.0, c.value(4));
}
#[test]
fn test_cast_i32_to_u8() {
let a = Int32Array::from(vec![-5, 6, -7, 8, 100000000]);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::UInt8).unwrap();
let c = b.as_any().downcast_ref::<UInt8Array>().unwrap();
assert!(!c.is_valid(0));
assert_eq!(6, c.value(1));
assert!(!c.is_valid(2));
assert_eq!(8, c.value(3));
assert!(!c.is_valid(4));
}
#[test]
#[should_panic(expected = "Can't cast value -5 to type UInt8")]
fn test_cast_int32_to_u8_with_error() {
let a = Int32Array::from(vec![-5, 6, -7, 8, 100000000]);
let array = Arc::new(a) as ArrayRef;
let cast_option = CastOptions { safe: false };
let result = cast_with_options(&array, &DataType::UInt8, &cast_option);
assert!(result.is_err());
result.unwrap();
}
#[test]
fn test_cast_i32_to_u8_sliced() {
let a = Int32Array::from(vec![-5, 6, -7, 8, 100000000]);
let array = Arc::new(a) as ArrayRef;
assert_eq!(0, array.offset());
let array = array.slice(2, 3);
assert_eq!(2, array.offset());
let b = cast(&array, &DataType::UInt8).unwrap();
assert_eq!(3, b.len());
assert_eq!(0, b.offset());
let c = b.as_any().downcast_ref::<UInt8Array>().unwrap();
assert!(!c.is_valid(0));
assert_eq!(8, c.value(1));
assert!(!c.is_valid(2));
}
#[test]
fn test_cast_i32_to_i32() {
let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Int32).unwrap();
let c = b.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(5, c.value(0));
assert_eq!(6, c.value(1));
assert_eq!(7, c.value(2));
assert_eq!(8, c.value(3));
assert_eq!(9, c.value(4));
}
#[test]
fn test_cast_i32_to_list_i32() {
let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
let array = Arc::new(a) as ArrayRef;
let b = cast(
&array,
&DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
)
.unwrap();
assert_eq!(5, b.len());
let arr = b.as_any().downcast_ref::<ListArray>().unwrap();
assert_eq!(&[0, 1, 2, 3, 4, 5], arr.value_offsets());
assert_eq!(1, arr.value_length(0));
assert_eq!(1, arr.value_length(1));
assert_eq!(1, arr.value_length(2));
assert_eq!(1, arr.value_length(3));
assert_eq!(1, arr.value_length(4));
let values = arr.values();
let c = values.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(5, c.value(0));
assert_eq!(6, c.value(1));
assert_eq!(7, c.value(2));
assert_eq!(8, c.value(3));
assert_eq!(9, c.value(4));
}
#[test]
fn test_cast_i32_to_list_i32_nullable() {
let a = Int32Array::from(vec![Some(5), None, Some(7), Some(8), Some(9)]);
let array = Arc::new(a) as ArrayRef;
let b = cast(
&array,
&DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
)
.unwrap();
assert_eq!(5, b.len());
assert_eq!(1, b.null_count());
let arr = b.as_any().downcast_ref::<ListArray>().unwrap();
assert_eq!(&[0, 1, 2, 3, 4, 5], arr.value_offsets());
assert_eq!(1, arr.value_length(0));
assert_eq!(1, arr.value_length(1));
assert_eq!(1, arr.value_length(2));
assert_eq!(1, arr.value_length(3));
assert_eq!(1, arr.value_length(4));
let values = arr.values();
let c = values.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(1, c.null_count());
assert_eq!(5, c.value(0));
assert!(!c.is_valid(1));
assert_eq!(7, c.value(2));
assert_eq!(8, c.value(3));
assert_eq!(9, c.value(4));
}
#[test]
fn test_cast_i32_to_list_f64_nullable_sliced() {
let a = Int32Array::from(vec![Some(5), None, Some(7), Some(8), None, Some(10)]);
let array = Arc::new(a) as ArrayRef;
let array = array.slice(2, 4);
let b = cast(
&array,
&DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
)
.unwrap();
assert_eq!(4, b.len());
assert_eq!(1, b.null_count());
let arr = b.as_any().downcast_ref::<ListArray>().unwrap();
assert_eq!(&[0, 1, 2, 3, 4], arr.value_offsets());
assert_eq!(1, arr.value_length(0));
assert_eq!(1, arr.value_length(1));
assert_eq!(1, arr.value_length(2));
assert_eq!(1, arr.value_length(3));
let values = arr.values();
let c = values.as_any().downcast_ref::<Float64Array>().unwrap();
assert_eq!(1, c.null_count());
assert_eq!(7.0, c.value(0));
assert_eq!(8.0, c.value(1));
assert!(!c.is_valid(2));
assert_eq!(10.0, c.value(3));
}
#[test]
fn test_cast_utf8_to_i32() {
let a = StringArray::from(vec!["5", "6", "seven", "8", "9.1"]);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Int32).unwrap();
let c = b.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(5, c.value(0));
assert_eq!(6, c.value(1));
assert!(!c.is_valid(2));
assert_eq!(8, c.value(3));
assert!(!c.is_valid(4));
}
#[test]
fn test_cast_with_options_utf8_to_i32() {
let a = StringArray::from(vec!["5", "6", "seven", "8", "9.1"]);
let array = Arc::new(a) as ArrayRef;
let result =
cast_with_options(&array, &DataType::Int32, &CastOptions { safe: false });
match result {
Ok(_) => panic!("expected error"),
Err(e) => {
assert!(
e.to_string().contains(
"Cast error: Cannot cast string 'seven' to value of Int32 type",
),
"Error: {}",
e
)
}
}
}
#[test]
fn test_cast_utf8_to_bool() {
let strings = Arc::new(StringArray::from(vec![
"true", "false", "invalid", " Y ", "",
])) as ArrayRef;
let casted = cast(&strings, &DataType::Boolean).unwrap();
let expected =
BooleanArray::from(vec![Some(true), Some(false), None, Some(true), None]);
assert_eq!(*as_boolean_array(&casted), expected);
}
#[test]
fn test_cast_with_options_utf8_to_bool() {
let strings = Arc::new(StringArray::from(vec![
"true", "false", "invalid", " Y ", "",
])) as ArrayRef;
let casted =
cast_with_options(&strings, &DataType::Boolean, &CastOptions { safe: false });
match casted {
Ok(_) => panic!("expected error"),
Err(e) => {
assert!(e.to_string().contains(
"Cast error: Cannot cast string 'invalid' to value of Boolean type"
))
}
}
}
#[test]
fn test_cast_bool_to_i32() {
let a = BooleanArray::from(vec![Some(true), Some(false), None]);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Int32).unwrap();
let c = b.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(1, c.value(0));
assert_eq!(0, c.value(1));
assert!(!c.is_valid(2));
}
#[test]
fn test_cast_bool_to_f64() {
let a = BooleanArray::from(vec![Some(true), Some(false), None]);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Float64).unwrap();
let c = b.as_any().downcast_ref::<Float64Array>().unwrap();
assert_eq!(1.0, c.value(0));
assert_eq!(0.0, c.value(1));
assert!(!c.is_valid(2));
}
#[test]
#[should_panic(
expected = "Casting from Int32 to Timestamp(Microsecond, None) not supported"
)]
fn test_cast_int32_to_timestamp() {
let a = Int32Array::from(vec![Some(2), Some(10), None]);
let array = Arc::new(a) as ArrayRef;
cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
}
#[test]
fn test_cast_list_i32_to_list_u16() {
let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000])
.data()
.clone();
let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8]);
let list_data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
.add_child_data(value_data)
.build()
.unwrap();
let list_array = Arc::new(ListArray::from(list_data)) as ArrayRef;
let cast_array = cast(
&list_array,
&DataType::List(Box::new(Field::new("item", DataType::UInt16, true))),
)
.unwrap();
assert_eq!(0, cast_array.null_count());
assert_eq!(
list_array.data().buffers().to_vec(),
cast_array.data().buffers().to_vec()
);
let array = cast_array
.as_ref()
.as_any()
.downcast_ref::<ListArray>()
.unwrap();
assert_eq!(DataType::UInt16, array.value_type());
assert_eq!(3, array.value_length(0));
assert_eq!(3, array.value_length(1));
assert_eq!(2, array.value_length(2));
let values = array.values();
assert_eq!(4, values.null_count());
let u16arr = values.as_any().downcast_ref::<UInt16Array>().unwrap();
let expected: UInt16Array =
vec![Some(0), Some(0), Some(0), None, None, None, Some(2), None]
.into_iter()
.collect();
assert_eq!(u16arr, &expected);
}
#[test]
#[should_panic(
expected = "Casting from Int32 to Timestamp(Microsecond, None) not supported"
)]
fn test_cast_list_i32_to_list_timestamp() {
let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000])
.data()
.clone();
let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 9]);
let list_data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
.add_child_data(value_data)
.build()
.unwrap();
let list_array = Arc::new(ListArray::from(list_data)) as ArrayRef;
cast(
&list_array,
&DataType::List(Box::new(Field::new(
"item",
DataType::Timestamp(TimeUnit::Microsecond, None),
true,
))),
)
.unwrap();
}
#[test]
fn test_cast_date32_to_date64() {
let a = Date32Array::from(vec![10000, 17890]);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Date64).unwrap();
let c = b.as_any().downcast_ref::<Date64Array>().unwrap();
assert_eq!(864000000000, c.value(0));
assert_eq!(1545696000000, c.value(1));
}
#[test]
fn test_cast_date64_to_date32() {
let a = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Date32).unwrap();
let c = b.as_any().downcast_ref::<Date32Array>().unwrap();
assert_eq!(10000, c.value(0));
assert_eq!(17890, c.value(1));
assert!(c.is_null(2));
}
#[test]
fn test_cast_string_to_timestamp() {
let a1 = Arc::new(StringArray::from(vec![
Some("2020-09-08T12:00:00+00:00"),
Some("Not a valid date"),
None,
])) as ArrayRef;
let a2 = Arc::new(LargeStringArray::from(vec![
Some("2020-09-08T12:00:00+00:00"),
Some("Not a valid date"),
None,
])) as ArrayRef;
for array in &[a1, a2] {
let to_type = DataType::Timestamp(TimeUnit::Nanosecond, None);
let b = cast(array, &to_type).unwrap();
let c = b
.as_any()
.downcast_ref::<TimestampNanosecondArray>()
.unwrap();
assert_eq!(1599566400000000000, c.value(0));
assert!(c.is_null(1));
assert!(c.is_null(2));
let options = CastOptions { safe: false };
let err = cast_with_options(array, &to_type, &options).unwrap_err();
assert_eq!(
err.to_string(),
"Cast error: Error parsing 'Not a valid date' as timestamp"
);
}
}
#[test]
fn test_cast_string_to_date32() {
let a1 = Arc::new(StringArray::from(vec![
Some("2018-12-25"),
Some("Not a valid date"),
None,
])) as ArrayRef;
let a2 = Arc::new(LargeStringArray::from(vec![
Some("2018-12-25"),
Some("Not a valid date"),
None,
])) as ArrayRef;
for array in &[a1, a2] {
let to_type = DataType::Date32;
let b = cast(array, &to_type).unwrap();
let c = b.as_any().downcast_ref::<Date32Array>().unwrap();
assert_eq!(17890, c.value(0));
assert!(c.is_null(1));
assert!(c.is_null(2));
let options = CastOptions { safe: false };
let err = cast_with_options(array, &to_type, &options).unwrap_err();
assert_eq!(err.to_string(), "Cast error: Cannot cast string 'Not a valid date' to value of Date32 type");
}
}
#[test]
fn test_cast_string_to_time32second() {
let a1 = Arc::new(StringArray::from(vec![
Some("08:08:35.091323414"),
Some("08:08:60.091323414"), Some("08:08:61.091323414"), Some("Not a valid time"),
None,
])) as ArrayRef;
let a2 = Arc::new(LargeStringArray::from(vec![
Some("08:08:35.091323414"),
Some("08:08:60.091323414"), Some("08:08:61.091323414"), Some("Not a valid time"),
None,
])) as ArrayRef;
for array in &[a1, a2] {
let to_type = DataType::Time32(TimeUnit::Second);
let b = cast(array, &to_type).unwrap();
let c = b.as_any().downcast_ref::<Time32SecondArray>().unwrap();
assert_eq!(29315, c.value(0));
assert_eq!(29340, c.value(1));
assert!(c.is_null(2));
assert!(c.is_null(3));
assert!(c.is_null(4));
let options = CastOptions { safe: false };
let err = cast_with_options(array, &to_type, &options).unwrap_err();
assert_eq!(err.to_string(), "Cast error: Cannot cast string '08:08:61.091323414' to value of Time32(Second) type");
}
}
#[test]
fn test_cast_string_to_time32millisecond() {
let a1 = Arc::new(StringArray::from(vec![
Some("08:08:35.091323414"),
Some("08:08:60.091323414"), Some("08:08:61.091323414"), Some("Not a valid time"),
None,
])) as ArrayRef;
let a2 = Arc::new(LargeStringArray::from(vec![
Some("08:08:35.091323414"),
Some("08:08:60.091323414"), Some("08:08:61.091323414"), Some("Not a valid time"),
None,
])) as ArrayRef;
for array in &[a1, a2] {
let to_type = DataType::Time32(TimeUnit::Millisecond);
let b = cast(array, &to_type).unwrap();
let c = b.as_any().downcast_ref::<Time32MillisecondArray>().unwrap();
assert_eq!(29315091, c.value(0));
assert_eq!(29340091, c.value(1));
assert!(c.is_null(2));
assert!(c.is_null(3));
assert!(c.is_null(4));
let options = CastOptions { safe: false };
let err = cast_with_options(array, &to_type, &options).unwrap_err();
assert_eq!(err.to_string(), "Cast error: Cannot cast string '08:08:61.091323414' to value of Time32(Millisecond) type");
}
}
#[test]
fn test_cast_string_to_time64microsecond() {
let a1 = Arc::new(StringArray::from(vec![
Some("08:08:35.091323414"),
Some("Not a valid time"),
None,
])) as ArrayRef;
let a2 = Arc::new(LargeStringArray::from(vec![
Some("08:08:35.091323414"),
Some("Not a valid time"),
None,
])) as ArrayRef;
for array in &[a1, a2] {
let to_type = DataType::Time64(TimeUnit::Microsecond);
let b = cast(array, &to_type).unwrap();
let c = b.as_any().downcast_ref::<Time64MicrosecondArray>().unwrap();
assert_eq!(29315091323, c.value(0));
assert!(c.is_null(1));
assert!(c.is_null(2));
let options = CastOptions { safe: false };
let err = cast_with_options(array, &to_type, &options).unwrap_err();
assert_eq!(err.to_string(), "Cast error: Cannot cast string 'Not a valid time' to value of Time64(Microsecond) type");
}
}
#[test]
fn test_cast_string_to_time64nanosecond() {
let a1 = Arc::new(StringArray::from(vec![
Some("08:08:35.091323414"),
Some("Not a valid time"),
None,
])) as ArrayRef;
let a2 = Arc::new(LargeStringArray::from(vec![
Some("08:08:35.091323414"),
Some("Not a valid time"),
None,
])) as ArrayRef;
for array in &[a1, a2] {
let to_type = DataType::Time64(TimeUnit::Nanosecond);
let b = cast(array, &to_type).unwrap();
let c = b.as_any().downcast_ref::<Time64NanosecondArray>().unwrap();
assert_eq!(29315091323414, c.value(0));
assert!(c.is_null(1));
assert!(c.is_null(2));
let options = CastOptions { safe: false };
let err = cast_with_options(array, &to_type, &options).unwrap_err();
assert_eq!(err.to_string(), "Cast error: Cannot cast string 'Not a valid time' to value of Time64(Nanosecond) type");
}
}
#[test]
fn test_cast_string_to_date64() {
let a1 = Arc::new(StringArray::from(vec![
Some("2020-09-08T12:00:00"),
Some("Not a valid date"),
None,
])) as ArrayRef;
let a2 = Arc::new(LargeStringArray::from(vec![
Some("2020-09-08T12:00:00"),
Some("Not a valid date"),
None,
])) as ArrayRef;
for array in &[a1, a2] {
let to_type = DataType::Date64;
let b = cast(array, &to_type).unwrap();
let c = b.as_any().downcast_ref::<Date64Array>().unwrap();
assert_eq!(1599566400000, c.value(0));
assert!(c.is_null(1));
assert!(c.is_null(2));
let options = CastOptions { safe: false };
let err = cast_with_options(array, &to_type, &options).unwrap_err();
assert_eq!(err.to_string(), "Cast error: Cannot cast string 'Not a valid date' to value of Date64 type");
}
}
#[test]
fn test_cast_string_to_binary() {
let string_1 = "Hi";
let string_2 = "Hello";
let bytes_1 = string_1.as_bytes();
let bytes_2 = string_2.as_bytes();
let string_data = vec![Some(string_1), Some(string_2), None];
let a1 = Arc::new(StringArray::from(string_data.clone())) as ArrayRef;
let a2 = Arc::new(LargeStringArray::from(string_data)) as ArrayRef;
let mut array_ref = cast(&a1, &DataType::Binary).unwrap();
let down_cast = array_ref.as_any().downcast_ref::<BinaryArray>().unwrap();
assert_eq!(bytes_1, down_cast.value(0));
assert_eq!(bytes_2, down_cast.value(1));
assert!(down_cast.is_null(2));
array_ref = cast(&a2, &DataType::LargeBinary).unwrap();
let down_cast = array_ref
.as_any()
.downcast_ref::<LargeBinaryArray>()
.unwrap();
assert_eq!(bytes_1, down_cast.value(0));
assert_eq!(bytes_2, down_cast.value(1));
assert!(down_cast.is_null(2));
}
#[test]
fn test_cast_date32_to_int32() {
let a = Date32Array::from(vec![10000, 17890]);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Int32).unwrap();
let c = b.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(10000, c.value(0));
assert_eq!(17890, c.value(1));
}
#[test]
fn test_cast_int32_to_date32() {
let a = Int32Array::from(vec![10000, 17890]);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Date32).unwrap();
let c = b.as_any().downcast_ref::<Date32Array>().unwrap();
assert_eq!(10000, c.value(0));
assert_eq!(17890, c.value(1));
}
#[test]
fn test_cast_timestamp_to_date32() {
let a = TimestampMillisecondArray::from_opt_vec(
vec![Some(864000000005), Some(1545696000001), None],
Some(String::from("UTC")),
);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Date32).unwrap();
let c = b.as_any().downcast_ref::<Date32Array>().unwrap();
assert_eq!(10000, c.value(0));
assert_eq!(17890, c.value(1));
assert!(c.is_null(2));
}
#[test]
fn test_cast_timestamp_to_date64() {
let a = TimestampMillisecondArray::from_opt_vec(
vec![Some(864000000005), Some(1545696000001), None],
None,
);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Date64).unwrap();
let c = b.as_any().downcast_ref::<Date64Array>().unwrap();
assert_eq!(864000000005, c.value(0));
assert_eq!(1545696000001, c.value(1));
assert!(c.is_null(2));
}
#[test]
fn test_cast_timestamp_to_i64() {
let a = TimestampMillisecondArray::from_opt_vec(
vec![Some(864000000005), Some(1545696000001), None],
Some("UTC".to_string()),
);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Int64).unwrap();
let c = b.as_any().downcast_ref::<Int64Array>().unwrap();
assert_eq!(&DataType::Int64, c.data_type());
assert_eq!(864000000005, c.value(0));
assert_eq!(1545696000001, c.value(1));
assert!(c.is_null(2));
}
#[test]
#[cfg(feature = "chrono-tz")]
fn test_cast_timestamp_to_string() {
let a = TimestampMillisecondArray::from_opt_vec(
vec![Some(864000000005), Some(1545696000001), None],
Some("UTC".to_string()),
);
let array = Arc::new(a) as ArrayRef;
dbg!(&array);
let b = cast(&array, &DataType::Utf8).unwrap();
let c = b.as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(&DataType::Utf8, c.data_type());
assert_eq!("1997-05-19 00:00:00.005", c.value(0));
assert_eq!("2018-12-25 00:00:00.001", c.value(1));
assert!(c.is_null(2));
}
#[test]
fn test_cast_date32_to_string() {
let a = Date32Array::from(vec![10000, 17890]);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Utf8).unwrap();
let c = b.as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(&DataType::Utf8, c.data_type());
assert_eq!("1997-05-19", c.value(0));
assert_eq!("2018-12-25", c.value(1));
}
#[test]
fn test_cast_date64_to_string() {
let a = Date64Array::from(vec![10000 * 86400000, 17890 * 86400000]);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Utf8).unwrap();
let c = b.as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(&DataType::Utf8, c.data_type());
assert_eq!("1997-05-19 00:00:00", c.value(0));
assert_eq!("2018-12-25 00:00:00", c.value(1));
}
#[test]
fn test_cast_between_timestamps() {
let a = TimestampMillisecondArray::from_opt_vec(
vec![Some(864000003005), Some(1545696002001), None],
None,
);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Timestamp(TimeUnit::Second, None)).unwrap();
let c = b.as_any().downcast_ref::<TimestampSecondArray>().unwrap();
assert_eq!(864000003, c.value(0));
assert_eq!(1545696002, c.value(1));
assert!(c.is_null(2));
}
#[test]
fn test_cast_duration_to_i64() {
let base = vec![5, 6, 7, 8, 100000000];
let duration_arrays = vec![
Arc::new(DurationNanosecondArray::from(base.clone())) as ArrayRef,
Arc::new(DurationMicrosecondArray::from(base.clone())) as ArrayRef,
Arc::new(DurationMillisecondArray::from(base.clone())) as ArrayRef,
Arc::new(DurationSecondArray::from(base.clone())) as ArrayRef,
];
for arr in duration_arrays {
assert!(can_cast_types(arr.data_type(), &DataType::Int64));
let result = cast(&arr, &DataType::Int64).unwrap();
let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
assert_eq!(base.as_slice(), result.values());
}
}
#[test]
fn test_cast_interval_to_i64() {
let base = vec![5, 6, 7, 8];
let interval_arrays = vec![
Arc::new(IntervalDayTimeArray::from(base.clone())) as ArrayRef,
Arc::new(IntervalYearMonthArray::from(
base.iter().map(|x| *x as i32).collect::<Vec<i32>>(),
)) as ArrayRef,
];
for arr in interval_arrays {
assert!(can_cast_types(arr.data_type(), &DataType::Int64));
let result = cast(&arr, &DataType::Int64).unwrap();
let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
assert_eq!(base.as_slice(), result.values());
}
}
#[test]
fn test_cast_to_strings() {
let a = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef;
let out = cast(&a, &DataType::Utf8).unwrap();
let out = out
.as_any()
.downcast_ref::<StringArray>()
.unwrap()
.into_iter()
.collect::<Vec<_>>();
assert_eq!(out, vec![Some("1"), Some("2"), Some("3")]);
let out = cast(&a, &DataType::LargeUtf8).unwrap();
let out = out
.as_any()
.downcast_ref::<LargeStringArray>()
.unwrap()
.into_iter()
.collect::<Vec<_>>();
assert_eq!(out, vec![Some("1"), Some("2"), Some("3")]);
}
#[test]
fn test_str_to_str_casts() {
for data in vec![
vec![Some("foo"), Some("bar"), Some("ham")],
vec![Some("foo"), None, Some("bar")],
] {
let a = Arc::new(LargeStringArray::from(data.clone())) as ArrayRef;
let to = cast(&a, &DataType::Utf8).unwrap();
let expect = a
.as_any()
.downcast_ref::<LargeStringArray>()
.unwrap()
.into_iter()
.collect::<Vec<_>>();
let out = to
.as_any()
.downcast_ref::<StringArray>()
.unwrap()
.into_iter()
.collect::<Vec<_>>();
assert_eq!(expect, out);
let a = Arc::new(StringArray::from(data)) as ArrayRef;
let to = cast(&a, &DataType::LargeUtf8).unwrap();
let expect = a
.as_any()
.downcast_ref::<StringArray>()
.unwrap()
.into_iter()
.collect::<Vec<_>>();
let out = to
.as_any()
.downcast_ref::<LargeStringArray>()
.unwrap()
.into_iter()
.collect::<Vec<_>>();
assert_eq!(expect, out);
}
}
#[test]
fn test_cast_from_f64() {
let f64_values: Vec<f64> = vec![
i64::MIN as f64,
i32::MIN as f64,
i16::MIN as f64,
i8::MIN as f64,
0_f64,
u8::MAX as f64,
u16::MAX as f64,
u32::MAX as f64,
u64::MAX as f64,
];
let f64_array: ArrayRef = Arc::new(Float64Array::from(f64_values));
let f64_expected = vec![
-9223372036854776000.0,
-2147483648.0,
-32768.0,
-128.0,
0.0,
255.0,
65535.0,
4294967295.0,
18446744073709552000.0,
];
assert_eq!(
f64_expected,
get_cast_values::<Float64Type>(&f64_array, &DataType::Float64)
.iter()
.map(|i| i.parse::<f64>().unwrap())
.collect::<Vec<f64>>()
);
let f32_expected = vec![
-9223372000000000000.0,
-2147483600.0,
-32768.0,
-128.0,
0.0,
255.0,
65535.0,
4294967300.0,
18446744000000000000.0,
];
assert_eq!(
f32_expected,
get_cast_values::<Float32Type>(&f64_array, &DataType::Float32)
.iter()
.map(|i| i.parse::<f32>().unwrap())
.collect::<Vec<f32>>()
);
let i64_expected = vec![
"-9223372036854775808",
"-2147483648",
"-32768",
"-128",
"0",
"255",
"65535",
"4294967295",
"null",
];
assert_eq!(
i64_expected,
get_cast_values::<Int64Type>(&f64_array, &DataType::Int64)
);
let i32_expected = vec![
"null",
"-2147483648",
"-32768",
"-128",
"0",
"255",
"65535",
"null",
"null",
];
assert_eq!(
i32_expected,
get_cast_values::<Int32Type>(&f64_array, &DataType::Int32)
);
let i16_expected = vec![
"null", "null", "-32768", "-128", "0", "255", "null", "null", "null",
];
assert_eq!(
i16_expected,
get_cast_values::<Int16Type>(&f64_array, &DataType::Int16)
);
let i8_expected = vec![
"null", "null", "null", "-128", "0", "null", "null", "null", "null",
];
assert_eq!(
i8_expected,
get_cast_values::<Int8Type>(&f64_array, &DataType::Int8)
);
let u64_expected = vec![
"null",
"null",
"null",
"null",
"0",
"255",
"65535",
"4294967295",
"null",
];
assert_eq!(
u64_expected,
get_cast_values::<UInt64Type>(&f64_array, &DataType::UInt64)
);
let u32_expected = vec![
"null",
"null",
"null",
"null",
"0",
"255",
"65535",
"4294967295",
"null",
];
assert_eq!(
u32_expected,
get_cast_values::<UInt32Type>(&f64_array, &DataType::UInt32)
);
let u16_expected = vec![
"null", "null", "null", "null", "0", "255", "65535", "null", "null",
];
assert_eq!(
u16_expected,
get_cast_values::<UInt16Type>(&f64_array, &DataType::UInt16)
);
let u8_expected = vec![
"null", "null", "null", "null", "0", "255", "null", "null", "null",
];
assert_eq!(
u8_expected,
get_cast_values::<UInt8Type>(&f64_array, &DataType::UInt8)
);
}
#[test]
fn test_cast_from_f32() {
let f32_values: Vec<f32> = vec![
i32::MIN as f32,
i32::MIN as f32,
i16::MIN as f32,
i8::MIN as f32,
0_f32,
u8::MAX as f32,
u16::MAX as f32,
u32::MAX as f32,
u32::MAX as f32,
];
let f32_array: ArrayRef = Arc::new(Float32Array::from(f32_values));
let f64_expected = vec![
"-2147483648.0",
"-2147483648.0",
"-32768.0",
"-128.0",
"0.0",
"255.0",
"65535.0",
"4294967296.0",
"4294967296.0",
];
assert_eq!(
f64_expected,
get_cast_values::<Float64Type>(&f32_array, &DataType::Float64)
);
let f32_expected = vec![
"-2147483600.0",
"-2147483600.0",
"-32768.0",
"-128.0",
"0.0",
"255.0",
"65535.0",
"4294967300.0",
"4294967300.0",
];
assert_eq!(
f32_expected,
get_cast_values::<Float32Type>(&f32_array, &DataType::Float32)
);
let i64_expected = vec![
"-2147483648",
"-2147483648",
"-32768",
"-128",
"0",
"255",
"65535",
"4294967296",
"4294967296",
];
assert_eq!(
i64_expected,
get_cast_values::<Int64Type>(&f32_array, &DataType::Int64)
);
let i32_expected = vec![
"-2147483648",
"-2147483648",
"-32768",
"-128",
"0",
"255",
"65535",
"null",
"null",
];
assert_eq!(
i32_expected,
get_cast_values::<Int32Type>(&f32_array, &DataType::Int32)
);
let i16_expected = vec![
"null", "null", "-32768", "-128", "0", "255", "null", "null", "null",
];
assert_eq!(
i16_expected,
get_cast_values::<Int16Type>(&f32_array, &DataType::Int16)
);
let i8_expected = vec![
"null", "null", "null", "-128", "0", "null", "null", "null", "null",
];
assert_eq!(
i8_expected,
get_cast_values::<Int8Type>(&f32_array, &DataType::Int8)
);
let u64_expected = vec![
"null",
"null",
"null",
"null",
"0",
"255",
"65535",
"4294967296",
"4294967296",
];
assert_eq!(
u64_expected,
get_cast_values::<UInt64Type>(&f32_array, &DataType::UInt64)
);
let u32_expected = vec![
"null", "null", "null", "null", "0", "255", "65535", "null", "null",
];
assert_eq!(
u32_expected,
get_cast_values::<UInt32Type>(&f32_array, &DataType::UInt32)
);
let u16_expected = vec![
"null", "null", "null", "null", "0", "255", "65535", "null", "null",
];
assert_eq!(
u16_expected,
get_cast_values::<UInt16Type>(&f32_array, &DataType::UInt16)
);
let u8_expected = vec![
"null", "null", "null", "null", "0", "255", "null", "null", "null",
];
assert_eq!(
u8_expected,
get_cast_values::<UInt8Type>(&f32_array, &DataType::UInt8)
);
}
#[test]
fn test_cast_from_uint64() {
let u64_values: Vec<u64> = vec![
0,
u8::MAX as u64,
u16::MAX as u64,
u32::MAX as u64,
u64::MAX,
];
let u64_array: ArrayRef = Arc::new(UInt64Array::from(u64_values));
let f64_expected =
vec![0.0, 255.0, 65535.0, 4294967295.0, 18446744073709552000.0];
assert_eq!(
f64_expected,
get_cast_values::<Float64Type>(&u64_array, &DataType::Float64)
.iter()
.map(|i| i.parse::<f64>().unwrap())
.collect::<Vec<f64>>()
);
let f32_expected =
vec![0.0, 255.0, 65535.0, 4294967300.0, 18446744000000000000.0];
assert_eq!(
f32_expected,
get_cast_values::<Float32Type>(&u64_array, &DataType::Float32)
.iter()
.map(|i| i.parse::<f32>().unwrap())
.collect::<Vec<f32>>()
);
let i64_expected = vec!["0", "255", "65535", "4294967295", "null"];
assert_eq!(
i64_expected,
get_cast_values::<Int64Type>(&u64_array, &DataType::Int64)
);
let i32_expected = vec!["0", "255", "65535", "null", "null"];
assert_eq!(
i32_expected,
get_cast_values::<Int32Type>(&u64_array, &DataType::Int32)
);
let i16_expected = vec!["0", "255", "null", "null", "null"];
assert_eq!(
i16_expected,
get_cast_values::<Int16Type>(&u64_array, &DataType::Int16)
);
let i8_expected = vec!["0", "null", "null", "null", "null"];
assert_eq!(
i8_expected,
get_cast_values::<Int8Type>(&u64_array, &DataType::Int8)
);
let u64_expected =
vec!["0", "255", "65535", "4294967295", "18446744073709551615"];
assert_eq!(
u64_expected,
get_cast_values::<UInt64Type>(&u64_array, &DataType::UInt64)
);
let u32_expected = vec!["0", "255", "65535", "4294967295", "null"];
assert_eq!(
u32_expected,
get_cast_values::<UInt32Type>(&u64_array, &DataType::UInt32)
);
let u16_expected = vec!["0", "255", "65535", "null", "null"];
assert_eq!(
u16_expected,
get_cast_values::<UInt16Type>(&u64_array, &DataType::UInt16)
);
let u8_expected = vec!["0", "255", "null", "null", "null"];
assert_eq!(
u8_expected,
get_cast_values::<UInt8Type>(&u64_array, &DataType::UInt8)
);
}
#[test]
fn test_cast_from_uint32() {
let u32_values: Vec<u32> =
vec![0, u8::MAX as u32, u16::MAX as u32, u32::MAX as u32];
let u32_array: ArrayRef = Arc::new(UInt32Array::from(u32_values));
let f64_expected = vec!["0.0", "255.0", "65535.0", "4294967295.0"];
assert_eq!(
f64_expected,
get_cast_values::<Float64Type>(&u32_array, &DataType::Float64)
);
let f32_expected = vec!["0.0", "255.0", "65535.0", "4294967300.0"];
assert_eq!(
f32_expected,
get_cast_values::<Float32Type>(&u32_array, &DataType::Float32)
);
let i64_expected = vec!["0", "255", "65535", "4294967295"];
assert_eq!(
i64_expected,
get_cast_values::<Int64Type>(&u32_array, &DataType::Int64)
);
let i32_expected = vec!["0", "255", "65535", "null"];
assert_eq!(
i32_expected,
get_cast_values::<Int32Type>(&u32_array, &DataType::Int32)
);
let i16_expected = vec!["0", "255", "null", "null"];
assert_eq!(
i16_expected,
get_cast_values::<Int16Type>(&u32_array, &DataType::Int16)
);
let i8_expected = vec!["0", "null", "null", "null"];
assert_eq!(
i8_expected,
get_cast_values::<Int8Type>(&u32_array, &DataType::Int8)
);
let u64_expected = vec!["0", "255", "65535", "4294967295"];
assert_eq!(
u64_expected,
get_cast_values::<UInt64Type>(&u32_array, &DataType::UInt64)
);
let u32_expected = vec!["0", "255", "65535", "4294967295"];
assert_eq!(
u32_expected,
get_cast_values::<UInt32Type>(&u32_array, &DataType::UInt32)
);
let u16_expected = vec!["0", "255", "65535", "null"];
assert_eq!(
u16_expected,
get_cast_values::<UInt16Type>(&u32_array, &DataType::UInt16)
);
let u8_expected = vec!["0", "255", "null", "null"];
assert_eq!(
u8_expected,
get_cast_values::<UInt8Type>(&u32_array, &DataType::UInt8)
);
}
#[test]
fn test_cast_from_uint16() {
let u16_values: Vec<u16> = vec![0, u8::MAX as u16, u16::MAX as u16];
let u16_array: ArrayRef = Arc::new(UInt16Array::from(u16_values));
let f64_expected = vec!["0.0", "255.0", "65535.0"];
assert_eq!(
f64_expected,
get_cast_values::<Float64Type>(&u16_array, &DataType::Float64)
);
let f32_expected = vec!["0.0", "255.0", "65535.0"];
assert_eq!(
f32_expected,
get_cast_values::<Float32Type>(&u16_array, &DataType::Float32)
);
let i64_expected = vec!["0", "255", "65535"];
assert_eq!(
i64_expected,
get_cast_values::<Int64Type>(&u16_array, &DataType::Int64)
);
let i32_expected = vec!["0", "255", "65535"];
assert_eq!(
i32_expected,
get_cast_values::<Int32Type>(&u16_array, &DataType::Int32)
);
let i16_expected = vec!["0", "255", "null"];
assert_eq!(
i16_expected,
get_cast_values::<Int16Type>(&u16_array, &DataType::Int16)
);
let i8_expected = vec!["0", "null", "null"];
assert_eq!(
i8_expected,
get_cast_values::<Int8Type>(&u16_array, &DataType::Int8)
);
let u64_expected = vec!["0", "255", "65535"];
assert_eq!(
u64_expected,
get_cast_values::<UInt64Type>(&u16_array, &DataType::UInt64)
);
let u32_expected = vec!["0", "255", "65535"];
assert_eq!(
u32_expected,
get_cast_values::<UInt32Type>(&u16_array, &DataType::UInt32)
);
let u16_expected = vec!["0", "255", "65535"];
assert_eq!(
u16_expected,
get_cast_values::<UInt16Type>(&u16_array, &DataType::UInt16)
);
let u8_expected = vec!["0", "255", "null"];
assert_eq!(
u8_expected,
get_cast_values::<UInt8Type>(&u16_array, &DataType::UInt8)
);
}
#[test]
fn test_cast_from_uint8() {
let u8_values: Vec<u8> = vec![0, u8::MAX];
let u8_array: ArrayRef = Arc::new(UInt8Array::from(u8_values));
let f64_expected = vec!["0.0", "255.0"];
assert_eq!(
f64_expected,
get_cast_values::<Float64Type>(&u8_array, &DataType::Float64)
);
let f32_expected = vec!["0.0", "255.0"];
assert_eq!(
f32_expected,
get_cast_values::<Float32Type>(&u8_array, &DataType::Float32)
);
let i64_expected = vec!["0", "255"];
assert_eq!(
i64_expected,
get_cast_values::<Int64Type>(&u8_array, &DataType::Int64)
);
let i32_expected = vec!["0", "255"];
assert_eq!(
i32_expected,
get_cast_values::<Int32Type>(&u8_array, &DataType::Int32)
);
let i16_expected = vec!["0", "255"];
assert_eq!(
i16_expected,
get_cast_values::<Int16Type>(&u8_array, &DataType::Int16)
);
let i8_expected = vec!["0", "null"];
assert_eq!(
i8_expected,
get_cast_values::<Int8Type>(&u8_array, &DataType::Int8)
);
let u64_expected = vec!["0", "255"];
assert_eq!(
u64_expected,
get_cast_values::<UInt64Type>(&u8_array, &DataType::UInt64)
);
let u32_expected = vec!["0", "255"];
assert_eq!(
u32_expected,
get_cast_values::<UInt32Type>(&u8_array, &DataType::UInt32)
);
let u16_expected = vec!["0", "255"];
assert_eq!(
u16_expected,
get_cast_values::<UInt16Type>(&u8_array, &DataType::UInt16)
);
let u8_expected = vec!["0", "255"];
assert_eq!(
u8_expected,
get_cast_values::<UInt8Type>(&u8_array, &DataType::UInt8)
);
}
#[test]
fn test_cast_from_int64() {
let i64_values: Vec<i64> = vec![
i64::MIN,
i32::MIN as i64,
i16::MIN as i64,
i8::MIN as i64,
0,
i8::MAX as i64,
i16::MAX as i64,
i32::MAX as i64,
i64::MAX,
];
let i64_array: ArrayRef = Arc::new(Int64Array::from(i64_values));
let f64_expected = vec![
-9223372036854776000.0,
-2147483648.0,
-32768.0,
-128.0,
0.0,
127.0,
32767.0,
2147483647.0,
9223372036854776000.0,
];
assert_eq!(
f64_expected,
get_cast_values::<Float64Type>(&i64_array, &DataType::Float64)
.iter()
.map(|i| i.parse::<f64>().unwrap())
.collect::<Vec<f64>>()
);
let f32_expected = vec![
-9223372000000000000.0,
-2147483600.0,
-32768.0,
-128.0,
0.0,
127.0,
32767.0,
2147483600.0,
9223372000000000000.0,
];
assert_eq!(
f32_expected,
get_cast_values::<Float32Type>(&i64_array, &DataType::Float32)
.iter()
.map(|i| i.parse::<f32>().unwrap())
.collect::<Vec<f32>>()
);
let i64_expected = vec![
"-9223372036854775808",
"-2147483648",
"-32768",
"-128",
"0",
"127",
"32767",
"2147483647",
"9223372036854775807",
];
assert_eq!(
i64_expected,
get_cast_values::<Int64Type>(&i64_array, &DataType::Int64)
);
let i32_expected = vec![
"null",
"-2147483648",
"-32768",
"-128",
"0",
"127",
"32767",
"2147483647",
"null",
];
assert_eq!(
i32_expected,
get_cast_values::<Int32Type>(&i64_array, &DataType::Int32)
);
assert_eq!(
i32_expected,
get_cast_values::<Date32Type>(&i64_array, &DataType::Date32)
);
let i16_expected = vec![
"null", "null", "-32768", "-128", "0", "127", "32767", "null", "null",
];
assert_eq!(
i16_expected,
get_cast_values::<Int16Type>(&i64_array, &DataType::Int16)
);
let i8_expected = vec![
"null", "null", "null", "-128", "0", "127", "null", "null", "null",
];
assert_eq!(
i8_expected,
get_cast_values::<Int8Type>(&i64_array, &DataType::Int8)
);
let u64_expected = vec![
"null",
"null",
"null",
"null",
"0",
"127",
"32767",
"2147483647",
"9223372036854775807",
];
assert_eq!(
u64_expected,
get_cast_values::<UInt64Type>(&i64_array, &DataType::UInt64)
);
let u32_expected = vec![
"null",
"null",
"null",
"null",
"0",
"127",
"32767",
"2147483647",
"null",
];
assert_eq!(
u32_expected,
get_cast_values::<UInt32Type>(&i64_array, &DataType::UInt32)
);
let u16_expected = vec![
"null", "null", "null", "null", "0", "127", "32767", "null", "null",
];
assert_eq!(
u16_expected,
get_cast_values::<UInt16Type>(&i64_array, &DataType::UInt16)
);
let u8_expected = vec![
"null", "null", "null", "null", "0", "127", "null", "null", "null",
];
assert_eq!(
u8_expected,
get_cast_values::<UInt8Type>(&i64_array, &DataType::UInt8)
);
}
#[test]
fn test_cast_from_int32() {
let i32_values: Vec<i32> = vec![
i32::MIN as i32,
i16::MIN as i32,
i8::MIN as i32,
0,
i8::MAX as i32,
i16::MAX as i32,
i32::MAX as i32,
];
let i32_array: ArrayRef = Arc::new(Int32Array::from(i32_values));
let f64_expected = vec![
"-2147483648.0",
"-32768.0",
"-128.0",
"0.0",
"127.0",
"32767.0",
"2147483647.0",
];
assert_eq!(
f64_expected,
get_cast_values::<Float64Type>(&i32_array, &DataType::Float64)
);
let f32_expected = vec![
"-2147483600.0",
"-32768.0",
"-128.0",
"0.0",
"127.0",
"32767.0",
"2147483600.0",
];
assert_eq!(
f32_expected,
get_cast_values::<Float32Type>(&i32_array, &DataType::Float32)
);
let i16_expected = vec!["null", "-32768", "-128", "0", "127", "32767", "null"];
assert_eq!(
i16_expected,
get_cast_values::<Int16Type>(&i32_array, &DataType::Int16)
);
let i8_expected = vec!["null", "null", "-128", "0", "127", "null", "null"];
assert_eq!(
i8_expected,
get_cast_values::<Int8Type>(&i32_array, &DataType::Int8)
);
let u64_expected =
vec!["null", "null", "null", "0", "127", "32767", "2147483647"];
assert_eq!(
u64_expected,
get_cast_values::<UInt64Type>(&i32_array, &DataType::UInt64)
);
let u32_expected =
vec!["null", "null", "null", "0", "127", "32767", "2147483647"];
assert_eq!(
u32_expected,
get_cast_values::<UInt32Type>(&i32_array, &DataType::UInt32)
);
let u16_expected = vec!["null", "null", "null", "0", "127", "32767", "null"];
assert_eq!(
u16_expected,
get_cast_values::<UInt16Type>(&i32_array, &DataType::UInt16)
);
let u8_expected = vec!["null", "null", "null", "0", "127", "null", "null"];
assert_eq!(
u8_expected,
get_cast_values::<UInt8Type>(&i32_array, &DataType::UInt8)
);
let i64_expected = vec![
"-185542587187200000",
"-2831155200000",
"-11059200000",
"0",
"10972800000",
"2831068800000",
"185542587100800000",
];
assert_eq!(
i64_expected,
get_cast_values::<Date64Type>(&i32_array, &DataType::Date64)
);
}
#[test]
fn test_cast_from_int16() {
let i16_values: Vec<i16> =
vec![i16::MIN, i8::MIN as i16, 0, i8::MAX as i16, i16::MAX];
let i16_array: ArrayRef = Arc::new(Int16Array::from(i16_values));
let f64_expected = vec!["-32768.0", "-128.0", "0.0", "127.0", "32767.0"];
assert_eq!(
f64_expected,
get_cast_values::<Float64Type>(&i16_array, &DataType::Float64)
);
let f32_expected = vec!["-32768.0", "-128.0", "0.0", "127.0", "32767.0"];
assert_eq!(
f32_expected,
get_cast_values::<Float32Type>(&i16_array, &DataType::Float32)
);
let i64_expected = vec!["-32768", "-128", "0", "127", "32767"];
assert_eq!(
i64_expected,
get_cast_values::<Int64Type>(&i16_array, &DataType::Int64)
);
let i32_expected = vec!["-32768", "-128", "0", "127", "32767"];
assert_eq!(
i32_expected,
get_cast_values::<Int32Type>(&i16_array, &DataType::Int32)
);
let i16_expected = vec!["-32768", "-128", "0", "127", "32767"];
assert_eq!(
i16_expected,
get_cast_values::<Int16Type>(&i16_array, &DataType::Int16)
);
let i8_expected = vec!["null", "-128", "0", "127", "null"];
assert_eq!(
i8_expected,
get_cast_values::<Int8Type>(&i16_array, &DataType::Int8)
);
let u64_expected = vec!["null", "null", "0", "127", "32767"];
assert_eq!(
u64_expected,
get_cast_values::<UInt64Type>(&i16_array, &DataType::UInt64)
);
let u32_expected = vec!["null", "null", "0", "127", "32767"];
assert_eq!(
u32_expected,
get_cast_values::<UInt32Type>(&i16_array, &DataType::UInt32)
);
let u16_expected = vec!["null", "null", "0", "127", "32767"];
assert_eq!(
u16_expected,
get_cast_values::<UInt16Type>(&i16_array, &DataType::UInt16)
);
let u8_expected = vec!["null", "null", "0", "127", "null"];
assert_eq!(
u8_expected,
get_cast_values::<UInt8Type>(&i16_array, &DataType::UInt8)
);
}
#[test]
fn test_cast_from_date32() {
let i32_values: Vec<i32> = vec![
i32::MIN as i32,
i16::MIN as i32,
i8::MIN as i32,
0,
i8::MAX as i32,
i16::MAX as i32,
i32::MAX as i32,
];
let date32_array: ArrayRef = Arc::new(Date32Array::from(i32_values));
let i64_expected = vec![
"-2147483648",
"-32768",
"-128",
"0",
"127",
"32767",
"2147483647",
];
assert_eq!(
i64_expected,
get_cast_values::<Int64Type>(&date32_array, &DataType::Int64)
);
}
#[test]
fn test_cast_from_int8() {
let i8_values: Vec<i8> = vec![i8::MIN, 0, i8::MAX];
let i8_array: ArrayRef = Arc::new(Int8Array::from(i8_values));
let f64_expected = vec!["-128.0", "0.0", "127.0"];
assert_eq!(
f64_expected,
get_cast_values::<Float64Type>(&i8_array, &DataType::Float64)
);
let f32_expected = vec!["-128.0", "0.0", "127.0"];
assert_eq!(
f32_expected,
get_cast_values::<Float32Type>(&i8_array, &DataType::Float32)
);
let i64_expected = vec!["-128", "0", "127"];
assert_eq!(
i64_expected,
get_cast_values::<Int64Type>(&i8_array, &DataType::Int64)
);
let i32_expected = vec!["-128", "0", "127"];
assert_eq!(
i32_expected,
get_cast_values::<Int32Type>(&i8_array, &DataType::Int32)
);
let i16_expected = vec!["-128", "0", "127"];
assert_eq!(
i16_expected,
get_cast_values::<Int16Type>(&i8_array, &DataType::Int16)
);
let i8_expected = vec!["-128", "0", "127"];
assert_eq!(
i8_expected,
get_cast_values::<Int8Type>(&i8_array, &DataType::Int8)
);
let u64_expected = vec!["null", "0", "127"];
assert_eq!(
u64_expected,
get_cast_values::<UInt64Type>(&i8_array, &DataType::UInt64)
);
let u32_expected = vec!["null", "0", "127"];
assert_eq!(
u32_expected,
get_cast_values::<UInt32Type>(&i8_array, &DataType::UInt32)
);
let u16_expected = vec!["null", "0", "127"];
assert_eq!(
u16_expected,
get_cast_values::<UInt16Type>(&i8_array, &DataType::UInt16)
);
let u8_expected = vec!["null", "0", "127"];
assert_eq!(
u8_expected,
get_cast_values::<UInt8Type>(&i8_array, &DataType::UInt8)
);
}
fn get_cast_values<T>(array: &ArrayRef, dt: &DataType) -> Vec<String>
where
T: ArrowNumericType,
{
let c = cast(array, dt).unwrap();
let a = c.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
let mut v: Vec<String> = vec![];
for i in 0..array.len() {
if a.is_null(i) {
v.push("null".to_string())
} else {
v.push(format!("{:?}", a.value(i)));
}
}
v
}
#[test]
fn test_cast_utf8_dict() {
use DataType::*;
let mut builder = StringDictionaryBuilder::<Int8Type>::new();
builder.append("one").unwrap();
builder.append_null();
builder.append("three").unwrap();
let array: ArrayRef = Arc::new(builder.finish());
let expected = vec!["one", "null", "three"];
let cast_type = Utf8;
let cast_array = cast(&array, &cast_type).expect("cast to UTF-8 failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
let cast_type = Dictionary(Box::new(Int16), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
let cast_type = Dictionary(Box::new(Int32), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
let cast_type = Dictionary(Box::new(Int64), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
let cast_type = Dictionary(Box::new(UInt8), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
let cast_type = Dictionary(Box::new(UInt16), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
let cast_type = Dictionary(Box::new(UInt32), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
let cast_type = Dictionary(Box::new(UInt64), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
}
#[test]
fn test_cast_dict_to_dict_bad_index_value_primitive() {
use DataType::*;
let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int64Type>::new();
for i in 0..200 {
builder.append(i).unwrap();
}
let array: ArrayRef = Arc::new(builder.finish());
let cast_type = Dictionary(Box::new(Int8), Box::new(Utf8));
let res = cast(&array, &cast_type);
assert!(res.is_err());
let actual_error = format!("{:?}", res);
let expected_error = "Could not convert 72 dictionary indexes from Int32 to Int8";
assert!(
actual_error.contains(expected_error),
"did not find expected error '{}' in actual error '{}'",
actual_error,
expected_error
);
}
#[test]
fn test_cast_dict_to_dict_bad_index_value_utf8() {
use DataType::*;
let mut builder = StringDictionaryBuilder::<Int32Type>::new();
for i in 0..200 {
let val = format!("val{}", i);
builder.append(&val).unwrap();
}
let array: ArrayRef = Arc::new(builder.finish());
let cast_type = Dictionary(Box::new(Int8), Box::new(Utf8));
let res = cast(&array, &cast_type);
assert!(res.is_err());
let actual_error = format!("{:?}", res);
let expected_error = "Could not convert 72 dictionary indexes from Int32 to Int8";
assert!(
actual_error.contains(expected_error),
"did not find expected error '{}' in actual error '{}'",
actual_error,
expected_error
);
}
#[test]
fn test_cast_primitive_dict() {
use DataType::*;
let mut builder = PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::new();
builder.append(1).unwrap();
builder.append_null();
builder.append(3).unwrap();
let array: ArrayRef = Arc::new(builder.finish());
let expected = vec!["1", "null", "3"];
let cast_array = cast(&array, &Utf8).expect("cast to UTF-8 failed");
assert_eq!(array_to_strings(&cast_array), expected);
assert_eq!(cast_array.data_type(), &Utf8);
let cast_array = cast(&array, &Int64).expect("cast to int64 failed");
assert_eq!(array_to_strings(&cast_array), expected);
assert_eq!(cast_array.data_type(), &Int64);
}
#[test]
fn test_cast_primitive_array_to_dict() {
use DataType::*;
let mut builder = PrimitiveBuilder::<Int32Type>::new();
builder.append_value(1);
builder.append_null();
builder.append_value(3);
let array: ArrayRef = Arc::new(builder.finish());
let expected = vec!["1", "null", "3"];
let cast_type = Dictionary(Box::new(UInt8), Box::new(Int32));
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
let cast_type = Dictionary(Box::new(UInt8), Box::new(Int8));
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
}
#[test]
fn test_cast_string_array_to_dict() {
use DataType::*;
let array = Arc::new(StringArray::from(vec![Some("one"), None, Some("three")]))
as ArrayRef;
let expected = vec!["one", "null", "three"];
let cast_type = Dictionary(Box::new(UInt8), Box::new(Utf8));
let cast_array = cast(&array, &cast_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(array_to_strings(&cast_array), expected);
}
#[test]
fn test_cast_null_array_to_from_decimal_array() {
let data_type = DataType::Decimal128(12, 4);
let array = new_null_array(&DataType::Null, 4);
assert_eq!(array.data_type(), &DataType::Null);
let cast_array = cast(&array, &data_type).expect("cast failed");
assert_eq!(cast_array.data_type(), &data_type);
for i in 0..4 {
assert!(cast_array.is_null(i));
}
let array = new_null_array(&data_type, 4);
assert_eq!(array.data_type(), &data_type);
let cast_array = cast(&array, &DataType::Null).expect("cast failed");
assert_eq!(cast_array.data_type(), &DataType::Null);
for i in 0..4 {
assert!(cast_array.is_null(i));
}
}
#[test]
fn test_cast_null_array_from_and_to_primitive_array() {
macro_rules! typed_test {
($ARR_TYPE:ident, $DATATYPE:ident, $TYPE:tt) => {{
{
let array = Arc::new(NullArray::new(6)) as ArrayRef;
let expected = $ARR_TYPE::from(vec![None; 6]);
let cast_type = DataType::$DATATYPE;
let cast_array = cast(&array, &cast_type).expect("cast failed");
let cast_array = as_primitive_array::<$TYPE>(&cast_array);
assert_eq!(cast_array.data_type(), &cast_type);
assert_eq!(cast_array, &expected);
}
}};
}
typed_test!(Int16Array, Int16, Int16Type);
typed_test!(Int32Array, Int32, Int32Type);
typed_test!(Int64Array, Int64, Int64Type);
typed_test!(UInt16Array, UInt16, UInt16Type);
typed_test!(UInt32Array, UInt32, UInt32Type);
typed_test!(UInt64Array, UInt64, UInt64Type);
typed_test!(Float32Array, Float32, Float32Type);
typed_test!(Float64Array, Float64, Float64Type);
typed_test!(Date32Array, Date32, Date32Type);
typed_test!(Date64Array, Date64, Date64Type);
}
fn cast_from_null_to_other(data_type: &DataType) {
{
let array = new_null_array(&DataType::Null, 4);
assert_eq!(array.data_type(), &DataType::Null);
let cast_array = cast(&array, data_type).expect("cast failed");
assert_eq!(cast_array.data_type(), data_type);
for i in 0..4 {
assert!(cast_array.is_null(i));
}
}
}
#[test]
fn test_cast_null_from_and_to_variable_sized() {
cast_from_null_to_other(&DataType::Utf8);
cast_from_null_to_other(&DataType::LargeUtf8);
cast_from_null_to_other(&DataType::Binary);
cast_from_null_to_other(&DataType::LargeBinary);
}
#[test]
fn test_cast_null_from_and_to_nested_type() {
let data_type = DataType::Map(
Box::new(Field::new(
"entry",
DataType::Struct(vec![
Field::new("key", DataType::Utf8, false),
Field::new("value", DataType::Int32, true),
]),
false,
)),
false,
);
cast_from_null_to_other(&data_type);
let data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
cast_from_null_to_other(&data_type);
let data_type =
DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
cast_from_null_to_other(&data_type);
let data_type = DataType::FixedSizeList(
Box::new(Field::new("item", DataType::Int32, true)),
4,
);
cast_from_null_to_other(&data_type);
let values = vec![None, None, None, None] as Vec<Option<&str>>;
let array: DictionaryArray<Int8Type> = values.into_iter().collect();
let array = Arc::new(array) as ArrayRef;
let data_type = array.data_type().to_owned();
cast_from_null_to_other(&data_type);
let data_type =
DataType::Struct(vec![Field::new("data", DataType::Int64, false)]);
cast_from_null_to_other(&data_type);
}
fn array_to_strings(array: &ArrayRef) -> Vec<String> {
(0..array.len())
.map(|i| {
if array.is_null(i) {
"null".to_string()
} else {
array_value_to_string(array, i).expect("Convert array to String")
}
})
.collect()
}
#[test]
fn test_cast_utf8_to_date32() {
use chrono::NaiveDate;
let from_ymd = chrono::NaiveDate::from_ymd;
let since = chrono::NaiveDate::signed_duration_since;
let a = StringArray::from(vec![
"2000-01-01", "2000-2-2", "2000-00-00", "2000-01-01T12:00:00", "2000", ]);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Date32).unwrap();
let c = b.as_any().downcast_ref::<Date32Array>().unwrap();
let date_value = since(NaiveDate::from_ymd(2000, 1, 1), from_ymd(1970, 1, 1))
.num_days() as i32;
assert!(c.is_valid(0)); assert_eq!(date_value, c.value(0));
let date_value = since(NaiveDate::from_ymd(2000, 2, 2), from_ymd(1970, 1, 1))
.num_days() as i32;
assert!(c.is_valid(1)); assert_eq!(date_value, c.value(1));
assert!(!c.is_valid(2)); assert!(!c.is_valid(3)); assert!(!c.is_valid(4)); }
#[test]
fn test_cast_utf8_to_date64() {
let a = StringArray::from(vec![
"2000-01-01T12:00:00", "2020-12-15T12:34:56", "2020-2-2T12:34:56", "2000-00-00T12:00:00", "2000-01-01 12:00:00", "2000-01-01", ]);
let array = Arc::new(a) as ArrayRef;
let b = cast(&array, &DataType::Date64).unwrap();
let c = b.as_any().downcast_ref::<Date64Array>().unwrap();
assert!(c.is_valid(0)); assert_eq!(946728000000, c.value(0));
assert!(c.is_valid(1)); assert_eq!(1608035696000, c.value(1));
assert!(c.is_valid(2)); assert_eq!(1580646896000, c.value(2));
assert!(!c.is_valid(3)); assert!(!c.is_valid(4)); assert!(!c.is_valid(5)); }
#[test]
#[cfg_attr(miri, ignore)] #[cfg(feature = "chrono-tz")]
fn test_can_cast_types() {
let all_types = get_all_types();
for array in get_arrays_of_all_types() {
for to_type in &all_types {
println!("Test casting {:?} --> {:?}", array.data_type(), to_type);
let cast_result = cast(&array, to_type);
let reported_cast_ability = can_cast_types(array.data_type(), to_type);
match (cast_result, reported_cast_ability) {
(Ok(_), false) => {
panic!("Was able to cast array {:?} from {:?} to {:?} but can_cast_types reported false",
array, array.data_type(), to_type)
}
(Err(e), true) => {
panic!("Was not able to cast array {:?} from {:?} to {:?} but can_cast_types reported true. \
Error was {:?}",
array, array.data_type(), to_type, e)
}
_ => {}
};
}
}
}
#[test]
fn test_cast_list_containers() {
let array = Arc::new(make_large_list_array()) as ArrayRef;
let list_array = cast(
&array,
&DataType::List(Box::new(Field::new("", DataType::Int32, false))),
)
.unwrap();
let actual = list_array.as_any().downcast_ref::<ListArray>().unwrap();
let expected = array.as_any().downcast_ref::<LargeListArray>().unwrap();
assert_eq!(&expected.value(0), &actual.value(0));
assert_eq!(&expected.value(1), &actual.value(1));
assert_eq!(&expected.value(2), &actual.value(2));
let array = Arc::new(make_list_array()) as ArrayRef;
let large_list_array = cast(
&array,
&DataType::LargeList(Box::new(Field::new("", DataType::Int32, false))),
)
.unwrap();
let actual = large_list_array
.as_any()
.downcast_ref::<LargeListArray>()
.unwrap();
let expected = array.as_any().downcast_ref::<ListArray>().unwrap();
assert_eq!(&expected.value(0), &actual.value(0));
assert_eq!(&expected.value(1), &actual.value(1));
assert_eq!(&expected.value(2), &actual.value(2));
}
#[cfg(feature = "chrono-tz")]
fn get_arrays_of_all_types() -> Vec<ArrayRef> {
let tz_name = String::from("America/New_York");
let binary_data: Vec<&[u8]> = vec![b"foo", b"bar"];
vec![
Arc::new(BinaryArray::from(binary_data.clone())),
Arc::new(LargeBinaryArray::from(binary_data.clone())),
make_dictionary_primitive::<Int8Type>(),
make_dictionary_primitive::<Int16Type>(),
make_dictionary_primitive::<Int32Type>(),
make_dictionary_primitive::<Int64Type>(),
make_dictionary_primitive::<UInt8Type>(),
make_dictionary_primitive::<UInt16Type>(),
make_dictionary_primitive::<UInt32Type>(),
make_dictionary_primitive::<UInt64Type>(),
make_dictionary_utf8::<Int8Type>(),
make_dictionary_utf8::<Int16Type>(),
make_dictionary_utf8::<Int32Type>(),
make_dictionary_utf8::<Int64Type>(),
make_dictionary_utf8::<UInt8Type>(),
make_dictionary_utf8::<UInt16Type>(),
make_dictionary_utf8::<UInt32Type>(),
make_dictionary_utf8::<UInt64Type>(),
Arc::new(make_list_array()),
Arc::new(make_large_list_array()),
Arc::new(make_fixed_size_list_array()),
Arc::new(make_fixed_size_binary_array()),
Arc::new(StructArray::from(vec![
(
Field::new("a", DataType::Boolean, false),
Arc::new(BooleanArray::from(vec![false, false, true, true]))
as Arc<dyn Array>,
),
(
Field::new("b", DataType::Int32, false),
Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
),
])),
Arc::new(make_union_array()),
Arc::new(NullArray::new(10)),
Arc::new(StringArray::from(vec!["foo", "bar"])),
Arc::new(LargeStringArray::from(vec!["foo", "bar"])),
Arc::new(BooleanArray::from(vec![true, false])),
Arc::new(Int8Array::from(vec![1, 2])),
Arc::new(Int16Array::from(vec![1, 2])),
Arc::new(Int32Array::from(vec![1, 2])),
Arc::new(Int64Array::from(vec![1, 2])),
Arc::new(UInt8Array::from(vec![1, 2])),
Arc::new(UInt16Array::from(vec![1, 2])),
Arc::new(UInt32Array::from(vec![1, 2])),
Arc::new(UInt64Array::from(vec![1, 2])),
Arc::new(Float32Array::from(vec![1.0, 2.0])),
Arc::new(Float64Array::from(vec![1.0, 2.0])),
Arc::new(TimestampSecondArray::from_vec(vec![1000, 2000], None)),
Arc::new(TimestampMillisecondArray::from_vec(vec![1000, 2000], None)),
Arc::new(TimestampMicrosecondArray::from_vec(vec![1000, 2000], None)),
Arc::new(TimestampNanosecondArray::from_vec(vec![1000, 2000], None)),
Arc::new(TimestampSecondArray::from_vec(
vec![1000, 2000],
Some(tz_name.clone()),
)),
Arc::new(TimestampMillisecondArray::from_vec(
vec![1000, 2000],
Some(tz_name.clone()),
)),
Arc::new(TimestampMicrosecondArray::from_vec(
vec![1000, 2000],
Some(tz_name.clone()),
)),
Arc::new(TimestampNanosecondArray::from_vec(
vec![1000, 2000],
Some(tz_name),
)),
Arc::new(Date32Array::from(vec![1000, 2000])),
Arc::new(Date64Array::from(vec![1000, 2000])),
Arc::new(Time32SecondArray::from(vec![1000, 2000])),
Arc::new(Time32MillisecondArray::from(vec![1000, 2000])),
Arc::new(Time64MicrosecondArray::from(vec![1000, 2000])),
Arc::new(Time64NanosecondArray::from(vec![1000, 2000])),
Arc::new(IntervalYearMonthArray::from(vec![1000, 2000])),
Arc::new(IntervalDayTimeArray::from(vec![1000, 2000])),
Arc::new(IntervalMonthDayNanoArray::from(vec![1000, 2000])),
Arc::new(DurationSecondArray::from(vec![1000, 2000])),
Arc::new(DurationMillisecondArray::from(vec![1000, 2000])),
Arc::new(DurationMicrosecondArray::from(vec![1000, 2000])),
Arc::new(DurationNanosecondArray::from(vec![1000, 2000])),
Arc::new(
create_decimal_array(vec![Some(1), Some(2), Some(3), None], 38, 0)
.unwrap(),
),
]
}
fn make_list_array() -> ListArray {
let value_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
.build()
.unwrap();
let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8]);
let list_data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
.add_child_data(value_data)
.build()
.unwrap();
ListArray::from(list_data)
}
fn make_large_list_array() -> LargeListArray {
let value_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
.build()
.unwrap();
let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 6, 8]);
let list_data_type =
DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
.add_child_data(value_data)
.build()
.unwrap();
LargeListArray::from(list_data)
}
#[cfg(feature = "chrono-tz")]
fn make_fixed_size_list_array() -> FixedSizeListArray {
let value_data = ArrayData::builder(DataType::Int32)
.len(10)
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
.build()
.unwrap();
let list_data_type = DataType::FixedSizeList(
Box::new(Field::new("item", DataType::Int32, true)),
2,
);
let list_data = ArrayData::builder(list_data_type)
.len(5)
.add_child_data(value_data)
.build()
.unwrap();
FixedSizeListArray::from(list_data)
}
#[cfg(feature = "chrono-tz")]
fn make_fixed_size_binary_array() -> FixedSizeBinaryArray {
let values: [u8; 15] = *b"hellotherearrow";
let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
.len(3)
.add_buffer(Buffer::from(&values[..]))
.build()
.unwrap();
FixedSizeBinaryArray::from(array_data)
}
#[cfg(feature = "chrono-tz")]
fn make_union_array() -> UnionArray {
let mut builder = UnionBuilder::with_capacity_dense(7);
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int64Type>("b", 2).unwrap();
builder.build().unwrap()
}
#[cfg(feature = "chrono-tz")]
fn make_dictionary_primitive<K: ArrowDictionaryKeyType>() -> ArrayRef {
let keys_builder = PrimitiveBuilder::<K>::new();
let values_builder = PrimitiveBuilder::<Int32Type>::new();
let mut b = PrimitiveDictionaryBuilder::new(keys_builder, values_builder);
b.append(1).unwrap();
b.append(2).unwrap();
Arc::new(b.finish())
}
#[cfg(feature = "chrono-tz")]
fn make_dictionary_utf8<K: ArrowDictionaryKeyType>() -> ArrayRef {
let keys_builder = PrimitiveBuilder::<K>::new();
let values_builder = StringBuilder::new();
let mut b = StringDictionaryBuilder::new(keys_builder, values_builder);
b.append("foo").unwrap();
b.append("bar").unwrap();
Arc::new(b.finish())
}
#[cfg(feature = "chrono-tz")]
fn get_all_types() -> Vec<DataType> {
use DataType::*;
let tz_name = String::from("America/New_York");
vec![
Null,
Boolean,
Int8,
Int16,
Int32,
UInt64,
UInt8,
UInt16,
UInt32,
UInt64,
Float16,
Float32,
Float64,
Timestamp(TimeUnit::Second, None),
Timestamp(TimeUnit::Millisecond, None),
Timestamp(TimeUnit::Microsecond, None),
Timestamp(TimeUnit::Nanosecond, None),
Timestamp(TimeUnit::Second, Some(tz_name.clone())),
Timestamp(TimeUnit::Millisecond, Some(tz_name.clone())),
Timestamp(TimeUnit::Microsecond, Some(tz_name.clone())),
Timestamp(TimeUnit::Nanosecond, Some(tz_name)),
Date32,
Date64,
Time32(TimeUnit::Second),
Time32(TimeUnit::Millisecond),
Time64(TimeUnit::Microsecond),
Time64(TimeUnit::Nanosecond),
Duration(TimeUnit::Second),
Duration(TimeUnit::Millisecond),
Duration(TimeUnit::Microsecond),
Duration(TimeUnit::Nanosecond),
Interval(IntervalUnit::YearMonth),
Interval(IntervalUnit::DayTime),
Interval(IntervalUnit::MonthDayNano),
Binary,
FixedSizeBinary(10),
LargeBinary,
Utf8,
LargeUtf8,
List(Box::new(Field::new("item", DataType::Int8, true))),
List(Box::new(Field::new("item", DataType::Utf8, true))),
FixedSizeList(Box::new(Field::new("item", DataType::Int8, true)), 10),
FixedSizeList(Box::new(Field::new("item", DataType::Utf8, false)), 10),
LargeList(Box::new(Field::new("item", DataType::Int8, true))),
LargeList(Box::new(Field::new("item", DataType::Utf8, false))),
Struct(vec![
Field::new("f1", DataType::Int32, false),
Field::new("f2", DataType::Utf8, true),
]),
Union(
vec![
Field::new("f1", DataType::Int32, false),
Field::new("f2", DataType::Utf8, true),
],
vec![0, 1],
UnionMode::Dense,
),
Dictionary(Box::new(DataType::Int8), Box::new(DataType::Int32)),
Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)),
Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)),
Decimal128(38, 0),
]
}
#[test]
fn test_utf8_cast_offsets() {
let str_array = StringArray::from(vec!["a", "b", "c"]);
let str_array = str_array.slice(1, 2);
let out = cast(&str_array, &DataType::LargeUtf8).unwrap();
let large_str_array = out.as_any().downcast_ref::<LargeStringArray>().unwrap();
let strs = large_str_array.into_iter().flatten().collect::<Vec<_>>();
assert_eq!(strs, &["b", "c"])
}
#[test]
fn test_list_cast_offsets() {
let array1 = make_list_array().slice(1, 2);
let array2 = Arc::new(make_list_array()) as ArrayRef;
let dt = DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
let out1 = cast(&array1, &dt).unwrap();
let out2 = cast(&array2, &dt).unwrap();
assert_eq!(&out1, &out2.slice(1, 2))
}
#[test]
#[cfg(feature = "chrono-tz")]
fn test_timestamp_cast_utf8() {
let array: PrimitiveArray<TimestampMicrosecondType> =
vec![Some(37800000000), None, Some(86339000000)].into();
let out = cast(&(Arc::new(array) as ArrayRef), &DataType::Utf8).unwrap();
let expected = StringArray::from(vec![
Some("1970-01-01 10:30:00"),
None,
Some("1970-01-01 23:58:59"),
]);
assert_eq!(
out.as_any().downcast_ref::<StringArray>().unwrap(),
&expected
);
let array: PrimitiveArray<TimestampMicrosecondType> =
vec![Some(37800000000), None, Some(86339000000)].into();
let array = array.with_timezone("Australia/Sydney".to_string());
let out = cast(&(Arc::new(array) as ArrayRef), &DataType::Utf8).unwrap();
let expected = StringArray::from(vec![
Some("1970-01-01 20:30:00"),
None,
Some("1970-01-02 09:58:59"),
]);
assert_eq!(
out.as_any().downcast_ref::<StringArray>().unwrap(),
&expected
);
}
}