use std::sync::Arc;
use datafusion::arrow::array::MapFieldNames;
use datafusion::arrow::datatypes::{DataType, Field, Fields};
use datafusion::common::ScalarValue;
use hamelin_lib::types::struct_type::Struct;
use hamelin_lib::types::Type;
use crate::udf::{variant_data_type, variant_fields};
pub fn map_field_names() -> MapFieldNames {
MapFieldNames {
entry: "entries".to_string(),
key: "key".to_string(),
value: "value".to_string(),
}
}
pub fn map_data_type(key_type: DataType, value_type: DataType) -> DataType {
DataType::Map(
Arc::new(Field::new(
"entries",
DataType::Struct(Fields::from(vec![
Field::new("key", key_type, false),
Field::new("value", value_type, true),
])),
false,
)),
false,
)
}
pub fn typed_null_scalar(hamelin_type: &Type) -> ScalarValue {
match hamelin_type {
Type::Int => ScalarValue::Int64(None),
Type::Double => ScalarValue::Float64(None),
Type::String => ScalarValue::Utf8(None),
Type::Boolean => ScalarValue::Boolean(None),
Type::Binary => ScalarValue::Binary(None),
Type::Timestamp => ScalarValue::TimestampMicrosecond(None, Some("+00:00".into())),
Type::Interval => ScalarValue::IntervalDayTime(None),
Type::CalendarInterval => ScalarValue::IntervalYearMonth(None),
Type::Decimal(d) => ScalarValue::Decimal128(None, d.precision as u8, d.scale as i8),
Type::Array(arr) => {
let element_type = hamelin_type_to_arrow(&arr.element_type);
ScalarValue::new_null_list(element_type, true, 1)
}
Type::Struct(s) => {
let fields = struct_to_arrow_fields(s);
ScalarValue::Struct(Arc::new(datafusion::arrow::array::StructArray::new_null(
fields, 1,
)))
}
Type::Map(m) => {
let map_data_type = hamelin_type_to_arrow(&Type::Map(m.clone()));
ScalarValue::try_new_null(&map_data_type).unwrap_or(ScalarValue::Null)
}
Type::Tuple(t) => {
let fields: Fields = t
.elements
.iter()
.enumerate()
.map(|(i, elem_type)| {
Field::new(format!("c{}", i), hamelin_type_to_arrow(elem_type), true)
})
.collect::<Vec<_>>()
.into();
ScalarValue::Struct(Arc::new(datafusion::arrow::array::StructArray::new_null(
fields, 1,
)))
}
Type::Variant => ScalarValue::Struct(Arc::new(
datafusion::arrow::array::StructArray::new_null(variant_fields(), 1),
)),
Type::Unknown | Type::Range(_) | Type::Rows | Type::Function(_) => ScalarValue::Null,
}
}
pub fn hamelin_type_to_arrow(hamelin_type: &Type) -> DataType {
match hamelin_type {
Type::Int => DataType::Int64,
Type::Double => DataType::Float64,
Type::String => DataType::Utf8,
Type::Boolean => DataType::Boolean,
Type::Binary => DataType::Binary,
Type::Timestamp => DataType::Timestamp(
datafusion::arrow::datatypes::TimeUnit::Microsecond,
Some("+00:00".into()),
),
Type::Interval => DataType::Interval(datafusion::arrow::datatypes::IntervalUnit::DayTime),
Type::CalendarInterval => {
DataType::Interval(datafusion::arrow::datatypes::IntervalUnit::YearMonth)
}
Type::Decimal(d) => DataType::Decimal128(d.precision as u8, d.scale as i8),
Type::Array(arr) => {
let element_type = hamelin_type_to_arrow(&arr.element_type);
DataType::List(Arc::new(Field::new("item", element_type, true)))
}
Type::Struct(s) => DataType::Struct(struct_to_arrow_fields(s)),
Type::Map(m) => {
let key_type = hamelin_type_to_arrow(&m.key_type);
let value_type = hamelin_type_to_arrow(&m.value_type);
map_data_type(key_type, value_type)
}
Type::Tuple(t) => {
let fields: Vec<Field> = t
.elements
.iter()
.enumerate()
.map(|(i, elem_type)| {
Field::new(format!("c{}", i), hamelin_type_to_arrow(elem_type), true)
})
.collect();
DataType::Struct(Fields::from(fields))
}
Type::Variant => variant_data_type(),
Type::Range(r) => {
let inner = hamelin_type_to_arrow(&r.of);
DataType::Struct(Fields::from(vec![
Field::new("begin", inner.clone(), true),
Field::new("end", inner, true),
]))
}
Type::Unknown | Type::Rows | Type::Function(_) => DataType::Null,
}
}
fn struct_to_arrow_fields(s: &Struct) -> Fields {
let fields: Vec<Field> = s
.iter()
.map(|(name, field_type)| Field::new(name.name(), hamelin_type_to_arrow(field_type), true))
.collect();
Fields::from(fields)
}