hamelin_datafusion 0.6.12

Translate Hamelin TypedAST to DataFusion LogicalPlans
Documentation
//! DataFusion translations for map functions.

use datafusion::logical_expr::expr::Case as DFCase;
use datafusion::logical_expr::{lit, Expr as DFExpr};
use datafusion_functions::core::expr_ext::FieldAccessor;
use datafusion_functions_nested::expr_fn as nested_fn;
use datafusion_functions_nested::map::map_udf;

use hamelin_lib::func::defs::{
    GetMap, MapEmpty, MapFromArrays, MapFromKeyValue, MapFromPairs, MapKeys, MapValues,
};

use super::DataFusionTranslationRegistry;

pub fn register(registry: &mut DataFusionTranslationRegistry) {
    // map() -> map_udf().call([])
    // Empty map - call the UDF with empty key/value arrays
    registry.register::<MapEmpty>(|_params| {
        // Create empty arrays for keys and values
        let empty_keys = datafusion_functions_nested::expr_fn::make_array(vec![]);
        let empty_values = datafusion_functions_nested::expr_fn::make_array(vec![]);
        Ok(map_udf().call(vec![empty_keys, empty_values]))
    });

    // map(keys_array, values_array) -> CASE WHEN keys IS NULL OR values IS NULL THEN NULL ELSE map(keys, values) END
    // The null guard prevents a scalar-vs-array mismatch in DataFusion's built-in map() when
    // the optimizer constant-folds one side to a scalar null but not the other.
    registry.register::<MapFromArrays>(|mut params| {
        let keys = params.take()?.expr;
        let values = params.take()?.expr;
        let condition = keys.clone().is_not_null().and(values.clone().is_not_null());
        let map_expr = map_udf().call(vec![keys, values]);
        Ok(DFExpr::Case(DFCase {
            expr: None,
            when_then_expr: vec![(Box::new(condition), Box::new(map_expr))],
            else_expr: Some(Box::new(lit(datafusion::common::ScalarValue::Null))),
        }))
    });

    // map(array_of_pairs) -> hamelin_map_from_entries(array_of_pairs)
    registry.register::<MapFromPairs>(|mut params| {
        let pairs_array = params.take()?.expr;
        Ok(crate::udf::map_from_entries_udf().call(vec![pairs_array]))
    });

    // map(pair1, pair2, ...) -> map([k1, k2, ...], [v1, v2, ...])
    // Each pair is a tuple/row - need to build key and value arrays
    registry.register::<MapFromKeyValue>(|mut params| {
        // Collect all pairs and build separate key/value arrays
        let mut keys = vec![];
        let mut values = vec![];

        while let Ok(pair) = params.take() {
            // Extract first and second elements from the row/tuple
            // In DataFusion, struct field access uses get_field
            let pair_expr = pair.expr;
            keys.push(pair_expr.clone().field("c0"));
            values.push(pair_expr.field("c1"));
        }

        let keys_array = datafusion_functions_nested::expr_fn::make_array(keys);
        let values_array = datafusion_functions_nested::expr_fn::make_array(values);
        Ok(map_udf().call(vec![keys_array, values_array]))
    });

    // get(map, key) -> map_extract(map, key)[1]
    registry.register::<GetMap>(|mut params| {
        let map_expr = params.take()?.expr;
        let key = params.take()?.expr;
        // map_extract returns an array, we want the first element
        let extracted = nested_fn::map_extract(map_expr, key);
        // Get first element (index 1 in 1-based DataFusion)
        Ok(nested_fn::array_element(
            extracted,
            datafusion::logical_expr::lit(1i64),
        ))
    });

    // map_keys(map) -> map_keys(map)
    registry.register::<MapKeys>(|mut params| {
        let map_expr = params.take()?.expr;
        Ok(nested_fn::map_keys(map_expr))
    });

    // map_values(map) -> map_values(map)
    registry.register::<MapValues>(|mut params| {
        let map_expr = params.take()?.expr;
        Ok(nested_fn::map_values(map_expr))
    });
}