use arrow::array::{
Array, ArrayRef, BooleanArray, Date32Array, Float32Array, Float64Array, Int16Array, Int32Array,
Int64Array, Int8Array, LargeStringArray, StringArray, Time64NanosecondArray,
TimestampMicrosecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
};
use arrow::compute;
use arrow::datatypes::DataType;
use std::sync::Arc;
use crate::{GgsqlError, Result};
macro_rules! downcast_fn {
($fn_name:ident, $arrow_type:ty, $type_name:expr) => {
pub fn $fn_name(array: &ArrayRef) -> Result<&$arrow_type> {
array.as_any().downcast_ref::<$arrow_type>().ok_or_else(|| {
GgsqlError::InternalError(format!(
"Expected {} array, got {:?}",
$type_name,
array.data_type()
))
})
}
};
}
downcast_fn!(as_f64, Float64Array, "Float64");
downcast_fn!(as_f32, Float32Array, "Float32");
downcast_fn!(as_i64, Int64Array, "Int64");
downcast_fn!(as_i32, Int32Array, "Int32");
downcast_fn!(as_i16, Int16Array, "Int16");
downcast_fn!(as_i8, Int8Array, "Int8");
downcast_fn!(as_u64, UInt64Array, "UInt64");
downcast_fn!(as_u32, UInt32Array, "UInt32");
downcast_fn!(as_u16, UInt16Array, "UInt16");
downcast_fn!(as_u8, UInt8Array, "UInt8");
downcast_fn!(as_str, StringArray, "String");
downcast_fn!(as_bool, BooleanArray, "Boolean");
downcast_fn!(as_date32, Date32Array, "Date32");
downcast_fn!(
as_timestamp_us,
TimestampMicrosecondArray,
"Timestamp(Microsecond)"
);
downcast_fn!(as_time64_ns, Time64NanosecondArray, "Time64(Nanosecond)");
pub fn cast_array(array: &ArrayRef, to: &DataType) -> Result<ArrayRef> {
let from = array.data_type();
let do_cast = |arr: &ArrayRef, dt: &DataType| -> Result<ArrayRef> {
compute::cast(arr, dt)
.map_err(|e| GgsqlError::InternalError(format!("Failed to cast to {:?}: {}", dt, e)))
};
let bridge = match (from, to) {
(DataType::Date32, DataType::Float32 | DataType::Float64) => Some(DataType::Int32),
(
DataType::Timestamp(_, _) | DataType::Time64(_),
DataType::Float32 | DataType::Float64,
) => Some(DataType::Int64),
(DataType::Float32 | DataType::Float64, DataType::Date32) => Some(DataType::Int32),
(
DataType::Float32 | DataType::Float64,
DataType::Timestamp(_, _) | DataType::Time64(_),
) => Some(DataType::Int64),
_ => None,
};
match bridge {
Some(mid) => {
let intermediate = do_cast(array, &mid)?;
do_cast(&intermediate, to)
}
None => do_cast(array, to),
}
}
pub fn new_f64_array(values: Vec<Option<f64>>) -> ArrayRef {
Arc::new(Float64Array::from(values))
}
pub fn new_f64_array_non_null(values: Vec<f64>) -> ArrayRef {
Arc::new(Float64Array::from(values))
}
pub fn new_i32_array(values: Vec<Option<i32>>) -> ArrayRef {
Arc::new(Int32Array::from(values))
}
pub fn new_i64_array(values: Vec<Option<i64>>) -> ArrayRef {
Arc::new(Int64Array::from(values))
}
pub fn new_str_array(values: Vec<Option<&str>>) -> ArrayRef {
Arc::new(StringArray::from(values))
}
pub fn new_string_array(values: Vec<String>) -> ArrayRef {
let refs: Vec<&str> = values.iter().map(|s| s.as_str()).collect();
Arc::new(StringArray::from(refs))
}
pub fn new_bool_array(values: Vec<Option<bool>>) -> ArrayRef {
Arc::new(BooleanArray::from(values))
}
pub fn new_constant_f64(value: f64, len: usize) -> ArrayRef {
Arc::new(Float64Array::from(vec![value; len]))
}
pub fn new_constant_str(value: &str, len: usize) -> ArrayRef {
Arc::new(StringArray::from(vec![value; len]))
}
pub fn new_constant_bool(value: bool, len: usize) -> ArrayRef {
Arc::new(BooleanArray::from(vec![value; len]))
}
pub fn fill_null_f64(array: &Float64Array, fill: f64) -> Float64Array {
let mut builder = arrow::array::Float64Builder::with_capacity(array.len());
for v in array.iter() {
builder.append_value(v.unwrap_or(fill));
}
builder.finish()
}
pub fn value_to_string(array: &ArrayRef, idx: usize) -> String {
if array.is_null(idx) {
return "null".to_string();
}
match array.data_type() {
DataType::Int8 => as_i8(array).unwrap().value(idx).to_string(),
DataType::Int16 => as_i16(array).unwrap().value(idx).to_string(),
DataType::Int32 => as_i32(array).unwrap().value(idx).to_string(),
DataType::Int64 => as_i64(array).unwrap().value(idx).to_string(),
DataType::UInt8 => as_u8(array).unwrap().value(idx).to_string(),
DataType::UInt16 => as_u16(array).unwrap().value(idx).to_string(),
DataType::UInt32 => as_u32(array).unwrap().value(idx).to_string(),
DataType::UInt64 => as_u64(array).unwrap().value(idx).to_string(),
DataType::Float32 => as_f32(array).unwrap().value(idx).to_string(),
DataType::Float64 => as_f64(array).unwrap().value(idx).to_string(),
DataType::Utf8 => as_str(array).unwrap().value(idx).to_string(),
DataType::LargeUtf8 => array
.as_any()
.downcast_ref::<LargeStringArray>()
.unwrap()
.value(idx)
.to_string(),
DataType::Boolean => as_bool(array).unwrap().value(idx).to_string(),
DataType::Date32 => {
let days = as_date32(array).unwrap().value(idx);
format!("{}", days)
}
DataType::Date64 => {
let ms = array
.as_any()
.downcast_ref::<arrow::array::Date64Array>()
.unwrap()
.value(idx);
format!("{}", ms)
}
_ => arrow::util::display::ArrayFormatter::try_new(array.as_ref(), &Default::default())
.map(|f| f.value(idx).to_string())
.unwrap_or_else(|_| format!("{:?}", array.data_type())),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_downcast_f64() {
let arr: ArrayRef = Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0]));
let f64_arr = as_f64(&arr).unwrap();
assert_eq!(f64_arr.value(0), 1.0);
assert_eq!(f64_arr.value(2), 3.0);
}
#[test]
fn test_downcast_wrong_type() {
let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
assert!(as_f64(&arr).is_err());
}
#[test]
fn test_cast_array() {
let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
let casted = cast_array(&arr, &DataType::Float64).unwrap();
let f64_arr = as_f64(&casted).unwrap();
assert_eq!(f64_arr.value(0), 1.0);
}
#[test]
fn test_cast_date32_to_float64() {
let arr: ArrayRef = Arc::new(Date32Array::from(vec![19723, 19875]));
let casted = cast_array(&arr, &DataType::Float64).unwrap();
let f64_arr = as_f64(&casted).unwrap();
assert_eq!(f64_arr.value(0), 19723.0);
assert_eq!(f64_arr.value(1), 19875.0);
}
#[test]
fn test_cast_float64_to_date32() {
let arr: ArrayRef = Arc::new(Float64Array::from(vec![19723.0, 19875.0]));
let casted = cast_array(&arr, &DataType::Date32).unwrap();
assert_eq!(casted.data_type(), &DataType::Date32);
}
#[test]
fn test_cast_timestamp_to_float64() {
use arrow::datatypes::TimeUnit;
let arr: ArrayRef = Arc::new(TimestampMicrosecondArray::from(vec![
1_000_000_i64,
2_000_000,
]));
let casted = cast_array(&arr, &DataType::Float64).unwrap();
let f64_arr = as_f64(&casted).unwrap();
assert_eq!(f64_arr.value(0), 1_000_000.0);
let back = cast_array(&casted, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
assert!(matches!(back.data_type(), DataType::Timestamp(_, _)));
}
#[test]
fn test_fill_null_f64() {
let arr = Float64Array::from(vec![Some(1.0), None, Some(3.0)]);
let filled = fill_null_f64(&arr, 0.0);
assert_eq!(filled.value(0), 1.0);
assert_eq!(filled.value(1), 0.0);
assert_eq!(filled.value(2), 3.0);
assert!(!filled.is_null(1));
}
#[test]
fn test_new_constant_f64() {
let arr = new_constant_f64(42.0, 3);
let f64_arr = as_f64(&arr).unwrap();
assert_eq!(f64_arr.len(), 3);
assert_eq!(f64_arr.value(0), 42.0);
assert_eq!(f64_arr.value(2), 42.0);
}
#[test]
fn test_value_to_string() {
let arr: ArrayRef = Arc::new(StringArray::from(vec!["hello", "world"]));
assert_eq!(value_to_string(&arr, 0), "hello");
let arr: ArrayRef = Arc::new(Float64Array::from(vec![3.24]));
assert_eq!(value_to_string(&arr, 0), "3.24");
}
}