hamelin_datafusion 0.7.8

Translate Hamelin TypedAST to DataFusion LogicalPlans
Documentation
//! Type conversion utilities for Hamelin types to Arrow/DataFusion types.

use std::sync::Arc;

use datafusion::arrow::array::MapFieldNames;
use datafusion::arrow::datatypes::{DataType, Field, Fields};
use datafusion::common::ScalarValue;
use hamelin_lib::types::struct_type::Struct;
use hamelin_lib::types::Type;

use crate::udf::{variant_data_type, variant_fields};

/// Arrow `MapFieldNames` matching DataFusion's built-in `map()` convention.
pub fn map_field_names() -> MapFieldNames {
    MapFieldNames {
        entry: "entries".to_string(),
        key: "key".to_string(),
        value: "value".to_string(),
    }
}

/// Construct an Arrow `DataType::Map` from key and value types.
///
/// Uses DataFusion's field names ("entries", "key", "value") to match the
/// built-in `map()` function. Names are metadata only — all map access is
/// by position.
pub fn map_data_type(key_type: DataType, value_type: DataType) -> DataType {
    DataType::Map(
        Arc::new(Field::new(
            "entries",
            DataType::Struct(Fields::from(vec![
                Field::new("key", key_type, false),
                Field::new("value", value_type, true),
            ])),
            false,
        )),
        false,
    )
}

/// Create a typed NULL ScalarValue for a Hamelin type
pub fn typed_null_scalar(hamelin_type: &Type) -> ScalarValue {
    match hamelin_type {
        Type::Int => ScalarValue::Int64(None),
        Type::Double => ScalarValue::Float64(None),
        Type::String => ScalarValue::Utf8(None),
        Type::Boolean => ScalarValue::Boolean(None),
        Type::Binary => ScalarValue::Binary(None),
        Type::Timestamp => ScalarValue::TimestampMicrosecond(None, Some("+00:00".into())),
        Type::Interval => ScalarValue::IntervalDayTime(None),
        Type::CalendarInterval => ScalarValue::IntervalYearMonth(None),
        Type::Decimal(d) => ScalarValue::Decimal128(None, d.precision as u8, d.scale as i8),
        Type::Array(arr) => {
            let element_type = hamelin_type_to_arrow(&arr.element_type);
            ScalarValue::new_null_list(element_type, true, 1)
        }
        Type::Struct(s) => {
            let fields = struct_to_arrow_fields(s);
            ScalarValue::Struct(Arc::new(datafusion::arrow::array::StructArray::new_null(
                fields, 1,
            )))
        }
        Type::Map(m) => {
            let map_data_type = hamelin_type_to_arrow(&Type::Map(m.clone()));
            ScalarValue::try_new_null(&map_data_type).unwrap_or(ScalarValue::Null)
        }
        Type::Tuple(t) => {
            let fields: Fields = t
                .elements
                .iter()
                .enumerate()
                .map(|(i, elem_type)| {
                    Field::new(format!("f{i}"), hamelin_type_to_arrow(elem_type), true)
                })
                .collect::<Vec<_>>()
                .into();
            ScalarValue::Struct(Arc::new(datafusion::arrow::array::StructArray::new_null(
                fields, 1,
            )))
        }
        Type::Variant => ScalarValue::Struct(Arc::new(
            datafusion::arrow::array::StructArray::new_null(variant_fields(), 1),
        )),
        Type::Unknown | Type::Range(_) | Type::Rows | Type::Function(_) => ScalarValue::Null,
    }
}

/// Convert Hamelin type to Arrow DataType
pub fn hamelin_type_to_arrow(hamelin_type: &Type) -> DataType {
    match hamelin_type {
        Type::Int => DataType::Int64,
        Type::Double => DataType::Float64,
        Type::String => DataType::Utf8,
        Type::Boolean => DataType::Boolean,
        Type::Binary => DataType::Binary,
        Type::Timestamp => DataType::Timestamp(
            datafusion::arrow::datatypes::TimeUnit::Microsecond,
            Some("+00:00".into()),
        ),
        Type::Interval => DataType::Interval(datafusion::arrow::datatypes::IntervalUnit::DayTime),
        Type::CalendarInterval => {
            DataType::Interval(datafusion::arrow::datatypes::IntervalUnit::YearMonth)
        }
        Type::Decimal(d) => DataType::Decimal128(d.precision as u8, d.scale as i8),
        Type::Array(arr) => {
            let element_type = hamelin_type_to_arrow(&arr.element_type);
            DataType::List(Arc::new(Field::new("item", element_type, true)))
        }
        Type::Struct(s) => DataType::Struct(struct_to_arrow_fields(s)),
        Type::Map(m) => {
            let key_type = hamelin_type_to_arrow(&m.key_type);
            let value_type = hamelin_type_to_arrow(&m.value_type);
            map_data_type(key_type, value_type)
        }
        Type::Tuple(t) => {
            let fields: Vec<Field> = t
                .elements
                .iter()
                .enumerate()
                .map(|(i, elem_type)| {
                    Field::new(format!("f{i}"), hamelin_type_to_arrow(elem_type), true)
                })
                .collect();
            DataType::Struct(Fields::from(fields))
        }
        Type::Variant => variant_data_type(),
        Type::Range(r) => {
            let inner = hamelin_type_to_arrow(&r.of);
            DataType::Struct(Fields::from(vec![
                Field::new("begin", inner.clone(), true),
                Field::new("end", inner, true),
            ]))
        }
        Type::Unknown | Type::Rows | Type::Function(_) => DataType::Null,
    }
}

/// Convert Hamelin Struct to Arrow Fields
fn struct_to_arrow_fields(s: &Struct) -> Fields {
    let fields: Vec<Field> = s
        .iter()
        .map(|(name, field_type)| Field::new(name.name(), hamelin_type_to_arrow(field_type), true))
        .collect();
    Fields::from(fields)
}