hamelin_translation 0.9.9

//! Pipeline pass: Transform lowering.
//!
//! Lowers `transform(arr, x -> body)` calls. Each transform takes one of three
//! lowerings:
//!
//! 1. **Direct vectorized fast path** — body matches a known shape (struct
//!    field, tuple element, variant field, array-element cast, identity, the
//!    `coalesce(x, default)` overload, or `to_json_string(x)` over a variant
//!    array). The transform is replaced with a single vectorized expression
//!    that backends translate to a UDF / vectorized op.
//!
//! 2. **Recursive un-fuse** — body is a chain of operations, each link of
//!    which is *itself* fast-path-eligible once decomposed. The chain is
//!    peeled off one link at a time: the inner sub-body becomes its own
//!    `transform`, the outer link becomes a transform over the inner
//!    replacement, and so on. Un-fuse only fires when every link succeeds —
//!    otherwise splitting just multiplies the explode cost.
//!
//! 3. **Explode/aggregate fallback** — generic lowering for everything else.
//!    Generates a `__row_id` / `__indices` / `EXPLODE` / `AGG` / `DROP`
//!    sequence. Handles arbitrary lambda bodies at the cost of a row-shape
//!    detour.
//!
//! The choice is made per transform; sibling transforms over the same array
//! pick independently.
//!
//! **Transformation pattern (explode/aggregate path):**
//! ```text
//! -- Before (in a SET/SELECT command)
//! result = transform(arr, x -> x * 2)
//!
//! -- After (multiple commands)
//! SET __row_id_0 = uuid()
//! SET __indices_0 = sequence(0, len(arr) - 1, 1)
//! EXPLODE __elem_0 = arr, __idx_0 = __indices_0
//! SET __body_0 = __elem_0 * 2
//! AGG result = array_agg(__body_0) BY __row_id_0, ...other_cols SORT __idx_0
//! DROP __row_id_0, __indices_0, __elem_0, __idx_0, __body_0
//! ```
//!
//! The pass handles:
//! - Multiple transform calls in the same command (each gets unique temp names)
//! - Nested transforms (inner transforms are lowered first)
//! - Transform calls in any expression position (SET, SELECT, WHERE, etc.)

use std::sync::Arc;

use hamelin_lib::{
    err::TranslationError,
    func::defs::{Coalesce, ToJsonString, Transform},
    tree::{
        ast::{
            clause::{SortExpression, SortOrder},
            command::Command,
            expression::Expression,
            identifier::SimpleIdentifier,
            node::Span,
        },
        builder::{
            agg_command, array, call, cast, drop_command, eq, explode_command, field, field_ref,
            is_null, null, pipeline, set_command, subtract, ExpressionBuilder,
        },
        typed_ast::{
            command::TypedCommand,
            context::StatementTranslationContext,
            environment::TypeEnvironment,
            expression::{
                FieldAccess, MapExpressionAlgebra, TypedApply, TypedExpression, TypedExpressionKind,
            },
            pipeline::TypedPipeline,
        },
    },
    types::{array::Array, Type},
};

use crate::unique::UniqueNameGenerator;

// ---------------------------------------------------------------------------
// Transform replacement
// ---------------------------------------------------------------------------

/// How a `transform()` call should be replaced in the command AST.
enum TransformReplacement {
    /// Replace with a direct expression — no extra commands needed.
    /// Used by the un-fuse path (and direct fast paths) which emit a
    /// vectorized expression.
    Vectorized(Arc<Expression>),

    /// Replace via the explode/reaggregate pipeline. The transform call becomes
    /// `if(is_null(arr), null, if(__is_empty, [] AS result_type, __result))`.
    ExplodeAggregate {
        result_name: SimpleIdentifier,
        is_empty_name: SimpleIdentifier,
        array_ast: Arc<Expression>,
        result_type: Arc<Type>,
    },
}

/// Collection of unique name generators for transform lowering.
struct TransformNameGenerators {
    row_id: UniqueNameGenerator,
    indices: UniqueNameGenerator,
    elem: UniqueNameGenerator,
    idx: UniqueNameGenerator,
    body: UniqueNameGenerator,
    result: UniqueNameGenerator,
    is_empty: UniqueNameGenerator,
    /// Lambda variables synthesized when un-fuse chains nested transforms.
    /// Lives on `TransformNameGenerators` so siblings inside the same command
    /// can't collide.
    unfuse_param: UniqueNameGenerator,
}

impl TransformNameGenerators {
    fn new() -> Self {
        Self {
            row_id: UniqueNameGenerator::new("__row_id"),
            indices: UniqueNameGenerator::new("__indices"),
            elem: UniqueNameGenerator::new("__elem"),
            idx: UniqueNameGenerator::new("__idx"),
            body: UniqueNameGenerator::new("__body"),
            result: UniqueNameGenerator::new("__result"),
            is_empty: UniqueNameGenerator::new("__is_empty"),
            unfuse_param: UniqueNameGenerator::new("__unfuse"),
        }
    }
}

// ---------------------------------------------------------------------------
// Public Entry Point
// ---------------------------------------------------------------------------

/// Lower transform() calls to EXPLODE + AGG pattern.
///
/// Contract: `(Arc<TypedPipeline>, &mut ctx) -> Result<Arc<TypedPipeline>, ...>`
pub fn lower_transform(
    pipeline: Arc<TypedPipeline>,
    ctx: &mut StatementTranslationContext,
) -> Result<Arc<TypedPipeline>, Arc<TranslationError>> {
    let mut current = pipeline;
    let mut deferred_drops: Vec<Arc<Command>> = Vec::new();

    // Loop until no more transform() calls remain (handles nested transforms)
    while pipeline_has_transform(&current)? {
        // Transform the pipeline
        let new_ast = transform_pipeline(&current, ctx, &mut deferred_drops)?;

        // Re-typecheck using the same context
        current = Arc::new(TypedPipeline::from_ast_with_context(Arc::new(new_ast), ctx));
    }

    if !deferred_drops.is_empty() {
        use hamelin_lib::tree::ast::command::CommandClass;
        let mut new_ast = (*current.ast).clone();
        // Insert deferred drops before any DML command (e.g. APPEND) so they
        // don't end up after the sink.
        let insert_pos = new_ast
            .commands
            .iter()
            .position(|c| c.kind.command_class() == CommandClass::Dml)
            .unwrap_or(new_ast.commands.len());
        for (i, drop) in deferred_drops.into_iter().enumerate() {
            new_ast.commands.insert(insert_pos + i, drop);
        }
        current = Arc::new(TypedPipeline::from_ast_with_context(Arc::new(new_ast), ctx));
    }

    Ok(current)
}

// ---------------------------------------------------------------------------
// Detection
// ---------------------------------------------------------------------------

/// Check if a pipeline has any transform() calls.
fn pipeline_has_transform(pipeline: &TypedPipeline) -> Result<bool, Arc<TranslationError>> {
    let valid = pipeline.valid_ref()?;
    Ok(valid.commands.iter().any(command_has_transform))
}

/// Check if a command has any transform() calls.
fn command_has_transform(cmd: &Arc<TypedCommand>) -> bool {
    cmd.find_expression(&mut |expr| is_transform_call(expr))
        .is_some()
}

/// Check if an expression is a transform() function call.
fn is_transform_call(expr: &TypedExpression) -> bool {
    if let TypedExpressionKind::Apply(apply) = &expr.kind {
        return apply.function_def.type_id() == std::any::TypeId::of::<Transform>();
    }
    false
}

// ---------------------------------------------------------------------------
// Transform Pipeline
// ---------------------------------------------------------------------------

/// Transform a pipeline, lowering all transform() calls.
fn transform_pipeline(
    in_pipeline: &TypedPipeline,
    ctx: &mut StatementTranslationContext,
    deferred_drops: &mut Vec<Arc<Command>>,
) -> Result<hamelin_lib::tree::ast::pipeline::Pipeline, Arc<TranslationError>> {
    let valid = in_pipeline.valid_ref()?;
    let mut builder = pipeline().at(in_pipeline.ast.span.clone());
    let mut name_gens = TransformNameGenerators::new();

    for cmd in valid.commands.iter() {
        let transformed_cmds = transform_command(cmd, &mut name_gens, ctx, deferred_drops)?;
        for c in transformed_cmds {
            builder = builder.command(c);
        }
    }

    Ok(builder.build())
}

/// Transform a single command - may expand to multiple commands if it contains transform() calls.
fn transform_command(
    cmd: &Arc<TypedCommand>,
    name_gens: &mut TransformNameGenerators,
    ctx: &mut StatementTranslationContext,
    deferred_drops: &mut Vec<Arc<Command>>,
) -> Result<Vec<Arc<Command>>, Arc<TranslationError>> {
    // Find all transform() calls in this command
    let mut transforms = collect_transforms(cmd);

    if transforms.is_empty() {
        // No transforms - pass through unchanged
        return Ok(vec![cmd.ast.clone()]);
    }

    // Process inner (nested) transforms before outer ones. find_expression uses
    // pre-order traversal, so outer transforms appear first in the vec. Reversing
    // ensures inner transforms are lowered first. This matters because the outer
    // transform's generated commands (SET __is_empty, padded EXPLODE) embed the
    // array expression AST, and if that AST still contains an un-lowered
    // transform() call, the pipeline ends up with dangling references after
    // re-typechecking.
    transforms.reverse();

    // For each transform, we need to:
    // 1. Extract it to a temp column name
    // 2. Generate the lowering commands (SET uuid, SET sequence, EXPLODE, SET body, AGG, DROP)
    // 3. Replace the transform call in the original command with a column reference

    let mut result: Vec<Arc<Command>> = Vec::new();
    // Maps each transform's AST Arc (for identity matching) to its replacement.
    let mut replacements: Vec<(Arc<Expression>, TransformReplacement)> = Vec::new();
    // Columns from prior transforms that must be preserved through subsequent AGGs
    let mut prior_result_columns: Vec<SimpleIdentifier> = Vec::new();

    for transform_expr in transforms {
        let TypedExpressionKind::Apply(apply) = &transform_expr.kind else {
            continue;
        };

        // Extract array and lambda from the transform call
        let array_expr = apply.parameter_binding.get_by_name("array").map_err(|e| {
            ctx.error("transform() missing 'array' parameter")
                .at(&*transform_expr.ast)
                .with_source_boxed(e.into())
                .emit()
        })?;
        let lambda_expr = apply.parameter_binding.get_by_name("lambda").map_err(|e| {
            ctx.error("transform() missing 'lambda' parameter")
                .at(&*transform_expr.ast)
                .with_source_boxed(e.into())
                .emit()
        })?;

        let TypedExpressionKind::Lambda(lambda) = &lambda_expr.kind else {
            return Err(ctx
                .error("transform() second argument must be a lambda")
                .at(&*transform_expr.ast)
                .emit());
        };

        // Get the lambda parameter name to substitute
        let lambda_param = lambda.parameters[0].name.clone();

        // Try the recursive un-fuse before falling back to explode/reaggregate.
        // Runs before name generation to avoid advancing the unique-name
        // counters unnecessarily.
        if let Some(replacement_ast) = attempt_vectorize(
            array_expr,
            &lambda.body,
            &lambda_param,
            &cmd.input_schema,
            name_gens,
            ctx,
        ) {
            replacements.push((
                transform_expr.ast.clone(),
                TransformReplacement::Vectorized(replacement_ast),
            ));
            continue;
        }

        // Generate unique names for this transform (only needed for explode/reaggregate)
        let row_id_name = name_gens.row_id.next(&cmd.input_schema);
        let indices_name = name_gens.indices.next(&cmd.input_schema);
        let elem_name = name_gens.elem.next(&cmd.input_schema);
        let idx_name = name_gens.idx.next(&cmd.input_schema);
        let body_name = name_gens.body.next(&cmd.input_schema);
        let result_name = name_gens.result.next(&cmd.input_schema);
        let is_empty_name = name_gens.is_empty.next(&cmd.input_schema);

        let array_ast = array_expr.ast.as_ref().clone();

        // 1. SET __row_id = uuid()
        let set_row_id = set_command()
            .named_field(row_id_name.clone(), call("uuid"))
            .at(cmd.ast.span)
            .build();
        result.push(Arc::new(set_row_id));

        // 2. SET __indices = sequence(0, len(arr) - 1, 1)
        let len_expr = call("len").arg(array_expr.ast.clone());
        let indices_expr = call("sequence").arg(0).arg(subtract(len_expr, 1)).arg(1);
        let set_indices = set_command()
            .named_field(indices_name.clone(), indices_expr)
            .at(cmd.ast.span)
            .build();
        result.push(Arc::new(set_indices));

        // 3. SET __is_empty = coalesce(len(__indices) = 0, true)
        // Computed from __indices rather than from arr directly, because arr may
        // contain nested transform() calls that haven't been lowered yet. Using
        // __indices (a column ref) avoids re-introducing transform() calls into
        // generated commands. coalesce(..., true) handles the null-indices case
        // (from null arrays) — the replacement expression's outer is_null(arr)
        // check handles null arrays separately.
        let set_is_empty = set_command()
            .named_field(
                is_empty_name.clone(),
                call("coalesce")
                    .arg(eq(call("len").arg(field_ref(indices_name.clone())), 0))
                    .arg(true),
            )
            .at(cmd.ast.span)
            .build();
        result.push(Arc::new(set_is_empty));

        // 4. EXPLODE __elem = if(__is_empty, [null] AS array_type, arr),
        //           __idx  = if(__is_empty, [0], __indices)
        // Pad empty arrays with a sentinel so EXPLODE doesn't drop the row.
        // The __is_empty flag lets us restore the correct result afterwards.
        // The sentinel [null] must be cast to the array's type so that backends
        // (e.g. DataFusion) don't coerce nested array types incorrectly when
        // unifying the if-branches.
        let sentinel = cast(
            array().element(null()),
            array_expr.resolved_type.as_ref().clone(),
        );

        let padded_array = call("if")
            .arg(field_ref(is_empty_name.clone()))
            .arg(sentinel)
            .arg(Arc::new(array_ast));
        let padded_indices = call("if")
            .arg(field_ref(is_empty_name.clone()))
            .arg(array().element(0))
            .arg(field_ref(indices_name.clone()));
        let explode = explode_command()
            .named_field(elem_name.clone(), padded_array)
            .named_field(idx_name.clone(), padded_indices)
            .at(cmd.ast.span)
            .build();
        result.push(Arc::new(explode));

        // 5. SET __body = body[lambda_param/__elem]
        // Substitute the lambda parameter with __elem in the body
        let body_with_substitution =
            substitute_lambda_param(&lambda.body, &lambda_param, &elem_name);
        let set_body = set_command()
            .named_field(body_name.clone(), body_with_substitution)
            .at(cmd.ast.span)
            .build();
        result.push(Arc::new(set_body));

        // 6. AGG __result = array_agg(__body), __is_empty = any_value(__is_empty)
        //    BY __row_id, ...other_cols SORT __idx
        // We need to group by the row_id to reconstruct the original rows
        // Also include all other columns from the input schema
        let agg = build_agg_command(
            &result_name,
            &body_name,
            &row_id_name,
            &idx_name,
            &is_empty_name,
            &cmd.input_schema,
            &[
                row_id_name.clone(),
                indices_name.clone(),
                elem_name.clone(),
                idx_name.clone(),
                body_name.clone(),
                is_empty_name.clone(),
            ],
            &prior_result_columns,
            cmd.ast.span,
        );
        result.push(Arc::new(agg));

        // 7. DROP __row_id, __indices, __elem, __idx, __body
        // Note: __is_empty is NOT dropped here — it's used in the replacement expression
        // and dropped afterwards along with __result.
        let drop = drop_command()
            .field(row_id_name)
            .field(indices_name)
            .field(elem_name)
            .field(idx_name)
            .field(body_name)
            .at(cmd.ast.span)
            .build();
        result.push(Arc::new(drop));

        // Track the replacement (use transform's AST Arc for identity comparison)
        // Also record these columns so subsequent AGGs preserve them
        prior_result_columns.push(result_name.clone());
        prior_result_columns.push(is_empty_name.clone());
        replacements.push((
            transform_expr.ast.clone(),
            TransformReplacement::ExplodeAggregate {
                result_name,
                is_empty_name,
                array_ast: array_expr.ast.clone(),
                result_type: transform_expr.resolved_type.clone(),
            },
        ));
    }

    // Now transform the original command, replacing transform() calls
    let transformed_cmd = replace_transforms_in_command(cmd, &replacements);
    result.push(transformed_cmd);

    // 8. DROP all __result and __is_empty columns (after the assignment)
    let drop_fields: Vec<_> = replacements
        .iter()
        .filter_map(|(_, r)| match r {
            TransformReplacement::ExplodeAggregate {
                result_name,
                is_empty_name,
                ..
            } => Some((result_name.clone(), is_empty_name.clone())),
            TransformReplacement::Vectorized(_) => None,
        })
        .collect();
    if !drop_fields.is_empty() {
        let mut drop_results = drop_command().at(cmd.ast.span);
        for (result_name, is_empty_name) in &drop_fields {
            drop_results = drop_results.field(result_name.clone());
            drop_results = drop_results.field(is_empty_name.clone());
        }
        deferred_drops.push(Arc::new(drop_results.build()));
    }

    Ok(result)
}

/// Collect all transform() calls in a command that don't contain nested transforms.
///
/// Only collects "leaf" transforms (those whose sub-expressions contain no further
/// transform calls). This ensures the outer `while` loop in `lower_transform` peels
/// off one nesting layer per iteration, re-typechecking in between.
///
/// Uses `Arc::ptr_eq` on each expression's `ast` field to track visited nodes,
/// which is reliable even for synthetic expressions that lack unique source spans.
fn collect_transforms(cmd: &Arc<TypedCommand>) -> Vec<Arc<TypedExpression>> {
    let mut transforms = Vec::new();
    let mut visited: Vec<Arc<Expression>> = Vec::new();
    loop {
        let found = cmd.find_expression(&mut |expr| {
            is_transform_call(expr)
                && !visited.iter().any(|v| Arc::ptr_eq(v, &expr.ast))
                && !has_nested_transform(expr)
        });
        match found {
            Some(expr) => {
                visited.push(expr.ast.clone());
                transforms.push(Arc::new(expr.clone()));
            }
            None => break,
        }
    }
    transforms
}

/// Check if a transform expression contains nested transform calls in its arguments.
fn has_nested_transform(expr: &TypedExpression) -> bool {
    let TypedExpressionKind::Apply(apply) = &expr.kind else {
        return false;
    };
    apply
        .parameter_binding
        .iter()
        .any(|arg| arg.find(&mut |e| is_transform_call(e)).is_some())
}

/// Substitute lambda parameter references with a column reference.
fn substitute_lambda_param(
    body: &TypedExpression,
    param_name: &SimpleIdentifier,
    replacement_name: &SimpleIdentifier,
) -> Arc<Expression> {
    struct SubstituteAlgebra<'a> {
        param_name: &'a SimpleIdentifier,
        replacement_name: &'a SimpleIdentifier,
    }

    impl MapExpressionAlgebra for SubstituteAlgebra<'_> {
        fn field_reference(
            &mut self,
            node: &hamelin_lib::tree::typed_ast::expression::TypedFieldReference,
            expr: &TypedExpression,
        ) -> Arc<Expression> {
            // Compare the field name with the lambda parameter
            let matches = node
                .field_name
                .valid_ref()
                .map(|name| name == self.param_name)
                .unwrap_or(false);
            if matches {
                // Replace with the new field reference
                Arc::new(field_ref(self.replacement_name.clone()).build())
            } else {
                // Keep the original
                expr.ast.clone()
            }
        }
    }

    let mut alg = SubstituteAlgebra {
        param_name,
        replacement_name,
    };
    body.cata(&mut alg)
}

/// Build the AGG command that reconstructs the array from exploded elements.
///
/// `prior_result_columns` contains `__result_N` and `__is_empty_N` columns from
/// earlier sibling transforms in the same command that must be preserved through
/// this AGG via `any_value()`.
fn build_agg_command(
    result_name: &SimpleIdentifier,
    body_name: &SimpleIdentifier,
    row_id_name: &SimpleIdentifier,
    idx_name: &SimpleIdentifier,
    is_empty_name: &SimpleIdentifier,
    input_schema: &TypeEnvironment,
    temp_columns: &[SimpleIdentifier],
    prior_result_columns: &[SimpleIdentifier],
    span: Span,
) -> Command {
    // AGG __result = array_agg(__body) BY __row_id, ...other_cols SORT __idx
    let mut agg = agg_command()
        .named_aggregate(
            result_name.clone(),
            call("array_agg").arg(field_ref(body_name.clone())),
        )
        .at(span);

    // Preserve the __is_empty flag through the AGG
    agg = agg.named_aggregate(
        is_empty_name.clone(),
        call("any_value").arg(field_ref(is_empty_name.clone())),
    );

    // Group by __row_id only - it's unique per original row (from uuid())
    // All other columns are reconstructed using any_value() since they're
    // identical within each group (they were duplicated by EXPLODE)
    agg = agg.group_by(row_id_name.clone());

    // Use any_value() for all other columns from the input schema
    for (field_name, _) in input_schema.as_struct().iter() {
        let field_name_ast = field_name.clone();
        // Skip temp columns - they're not part of the original schema
        if temp_columns.iter().any(|t| t.as_str() == field_name.name()) {
            continue;
        }
        // Use any_value() to pick the (identical) value from the group
        agg = agg.named_aggregate(
            field_name_ast.clone(),
            call("any_value").arg(field_ref(field_name_ast)),
        );
    }

    // Preserve columns from prior sibling transforms
    for col in prior_result_columns {
        agg = agg.named_aggregate(col.clone(), call("any_value").arg(field_ref(col.clone())));
    }

    // Sort by __idx to preserve original array order
    agg = agg.sort_expr(SortExpression {
        span: Span::NONE,
        expression: Arc::new(field_ref(idx_name.clone()).build()),
        order: Some(SortOrder::Asc),
    });

    agg.build()
}

/// Replace transform() calls in a command with their computed replacements.
///
/// For `Vectorized` replacements: substitutes the transform call directly.
/// For `ExplodeAggregate` replacements: wraps in null/empty guards:
///   `if(is_null(arr), null, if(__is_empty, [] AS result_type, __result))`
fn replace_transforms_in_command(
    cmd: &Arc<TypedCommand>,
    replacements: &[(Arc<Expression>, TransformReplacement)],
) -> Arc<Command> {
    struct ReplaceTransformsAlgebra<'a> {
        replacements: &'a [(Arc<Expression>, TransformReplacement)],
    }

    impl MapExpressionAlgebra for ReplaceTransformsAlgebra<'_> {
        fn apply(
            &mut self,
            node: &TypedApply,
            expr: &TypedExpression,
            children: hamelin_lib::func::def::ParameterBinding<Arc<Expression>>,
        ) -> Arc<Expression> {
            if node.function_def.type_id() == std::any::TypeId::of::<Transform>() {
                for (transform_ast, replacement) in self.replacements {
                    if !Arc::ptr_eq(&expr.ast, transform_ast) {
                        continue;
                    }
                    return match replacement {
                        TransformReplacement::Vectorized(ast) => ast.clone(),
                        TransformReplacement::ExplodeAggregate {
                            result_name,
                            is_empty_name,
                            array_ast,
                            result_type,
                        } => {
                            // if(is_null(arr), null, if(__is_empty, [] AS result_type, __result))
                            let empty_result = cast(array(), result_type.as_ref().clone());
                            let inner_if = call("if")
                                .arg(field_ref(is_empty_name.clone()))
                                .arg(empty_result)
                                .arg(field_ref(result_name.clone()));
                            let if_expr = call("if")
                                .arg(is_null(array_ast.clone()))
                                .arg(null())
                                .arg(inner_if)
                                .build();
                            Arc::new(if_expr)
                        }
                    };
                }
            }
            node.replace_children_ast(expr, children)
        }
    }

    let mut alg = ReplaceTransformsAlgebra { replacements };
    cmd.cata_expressions(&mut alg)
}

// ---------------------------------------------------------------------------
// Recursive un-fuse / fast paths
// ---------------------------------------------------------------------------

/// Signature of a fast-path matcher: inspects a `transform(arr, x -> body)`
/// shape and returns the vectorized replacement AST when it recognizes the
/// shape, `None` otherwise.
type FastPathMatcher =
    fn(&TypedExpression, &TypedExpression, &SimpleIdentifier) -> Option<Arc<Expression>>;

/// Table of fast-path matchers, in priority order. The first matcher that
/// returns `Some` wins.
///
/// Each matcher inspects the typed body of `transform(arr, x -> body)` and
/// returns a vectorized AST replacement that backends translate to a
/// vectorized op.
///
/// The structural matchers (struct/tuple/variant field, array-element cast)
/// are needed even though the typechecker emits broadcast field access /
/// `ArrayElementCast` structurally on direct user input — un-fuse synthesizes
/// nested `transform(arr, x -> x.field)` shapes whose innermost link must hit
/// a matcher to terminate the recursion.
const TRANSFORM_FAST_PATHS: &[FastPathMatcher] = &[
    vectorize_identity,
    vectorize_struct_field_access,
    vectorize_tuple_element_access,
    vectorize_variant_field_access,
    vectorize_array_element_cast,
    vectorize_to_json_string_variant,
    vectorize_coalesce_with_default,
];

/// Recursively try to lower `transform(arr, x -> body)` without exploding.
///
/// First, every matcher in `TRANSFORM_FAST_PATHS` gets one shot at the
/// `(arr, body, x)` tuple as-is. If none match, we decompose `body` into one
/// outer "link" plus one inner sub-body — the link is the operation directly
/// wrapping the x-bearing slot. We recurse to vectorize the inner transform,
/// then re-typecheck that replacement, synthesize a fresh outer lambda var,
/// and check that the outer link **directly** matches a fast path with the
/// fresh var bound to the inner replacement's element type.
///
/// The outer step is direct-match-only by design: if it required another
/// decompose+recurse, the algorithm would loop indefinitely on shapes where
/// `decompose_link` always succeeds (e.g., `upper(x)` decomposes to identity
/// inside `upper(_)`, but `upper` has no fast path — so the outer step would
/// recurse forever). Restricting the outer step to direct matches is also
/// what the plan requires: un-fuse only fires when *every* link in the chain
/// matches a fast path.
///
/// Soundness of the rewrite is **not** verified here. If a fast-path matcher
/// emits a malformed AST, the post-normalization re-typecheck in
/// `normalize_statement` will catch it and render a Stage::Normalization
/// diagnostic against the synthetic source — that is the existing safety net
/// for "normalization produced a bad AST," and it gives a much nicer error
/// than anything we could synthesize here.
fn attempt_vectorize(
    array_expr: &TypedExpression,
    body: &TypedExpression,
    lambda_param: &SimpleIdentifier,
    bindings: &Arc<TypeEnvironment>,
    name_gens: &mut TransformNameGenerators,
    ctx: &mut StatementTranslationContext,
) -> Option<Arc<Expression>> {
    // 1. Direct fast-path match.
    if let Some(replacement) = match_fast_path(array_expr, body, lambda_param) {
        return Some(replacement);
    }

    // 2. Decompose body into one outer link + one inner sub-body.
    let (inner_sub_body, rebuild_outer) = decompose_link(body, lambda_param)?;

    // 3. Recurse to vectorize the inner transform: transform(arr, x -> inner_sub_body).
    let inner_replacement_ast = attempt_vectorize(
        array_expr,
        inner_sub_body,
        lambda_param,
        bindings,
        name_gens,
        ctx,
    )?;

    // 4. Re-typecheck the inner replacement so we can pass a typed expression
    //    to the outer matchers. Synthesize a fresh outer lambda var name `y`
    //    not shadowed by the input schema.
    let typed_inner_array = type_check_in_env(&inner_replacement_ast, bindings, ctx);

    // The inner replacement must be an array (otherwise the outer transform
    // wouldn't typecheck). Extract the element type to bind the fresh var.
    let Type::Array(arr) = typed_inner_array.resolved_type.as_ref() else {
        return None;
    };
    let element_type = arr.element_type.as_ref().clone();

    let outer_param = name_gens.unfuse_param.next(bindings);

    // 5. Build the outer body AST with the slot filled by `y`, then type-check
    //    it in an environment where `y` has the element type.
    let outer_body_ast = rebuild_outer(Arc::new(field_ref(outer_param.clone()).build()));
    let outer_bindings =
        Arc::new(TypeEnvironment::clone(bindings).with(outer_param.clone().into(), element_type));
    let typed_outer_body = type_check_in_env(&outer_body_ast, &outer_bindings, ctx);

    // 6. Direct match for the outer link. This intentionally does NOT recurse:
    //    a chain only un-fuses if the outer link is a recognized shape.
    match_fast_path(&typed_inner_array, &typed_outer_body, &outer_param)
}

/// Try every matcher in [`TRANSFORM_FAST_PATHS`] in order, returning the
/// first successful match.
fn match_fast_path(
    array_expr: &TypedExpression,
    body: &TypedExpression,
    lambda_param: &SimpleIdentifier,
) -> Option<Arc<Expression>> {
    for matcher in TRANSFORM_FAST_PATHS {
        if let Some(replacement) = matcher(array_expr, body, lambda_param) {
            return Some(replacement);
        }
    }
    None
}

/// Type-check an AST expression in a given environment, returning the typed
/// expression.
fn type_check_in_env(
    ast: &Arc<Expression>,
    bindings: &Arc<TypeEnvironment>,
    ctx: &mut StatementTranslationContext,
) -> TypedExpression {
    let mut expr_ctx = ctx.default_expression_context(bindings);
    TypedExpression::from_ast_with_context(ast.clone(), &mut expr_ctx)
}

/// Identify the body's outer operation when there's exactly one x-bearing
/// argument slot, and produce a closure that rebuilds the outer link's AST
/// when the slot is filled with a fresh expression.
///
/// The link must:
/// - Have *exactly one* argument position that contains a reference to
///   `lambda_param`.
/// - Have all other argument positions reference only outer-scope columns or
///   literals — no occurrence of `lambda_param` in any other arg.
///
/// Recognized link kinds:
/// - `Apply(f, args)`: any function call where one arg contains the lambda
///   var and the others don't.
/// - `FieldLookup(value, field)`: value is the x slot, field is fixed.
/// - `Cast(value, target)`: value is the x slot, target is fixed.
fn decompose_link<'a>(
    body: &'a TypedExpression,
    lambda_param: &SimpleIdentifier,
) -> Option<(
    &'a TypedExpression,
    Box<dyn FnOnce(Arc<Expression>) -> Arc<Expression> + 'a>,
)> {
    match &body.kind {
        TypedExpressionKind::FieldLookup(fl) => {
            // The value subtree must reference lambda_param.
            if !subtree_references_lambda_param(&fl.value, lambda_param) {
                return None;
            }
            // FieldLookup has exactly one value child; nothing else to check.
            let field_id = match &body.ast.kind {
                hamelin_lib::tree::ast::expression::ExpressionKind::FieldLookup(fl_ast) => {
                    fl_ast.field_identifier.clone()
                }
                _ => return None,
            };
            let span = body.ast.span.clone();
            let rebuild: Box<dyn FnOnce(Arc<Expression>) -> Arc<Expression>> =
                Box::new(move |slot| {
                    Arc::new(Expression {
                        span,
                        kind: hamelin_lib::tree::ast::expression::FieldLookup {
                            value: slot,
                            field_identifier: field_id,
                        }
                        .into(),
                    })
                });
            Some((fl.value.as_ref(), rebuild))
        }
        TypedExpressionKind::Cast(cast_node) => {
            if !subtree_references_lambda_param(&cast_node.value, lambda_param) {
                return None;
            }
            let target_type = cast_node.target_type.clone();
            let span = body.ast.span.clone();
            let rebuild: Box<dyn FnOnce(Arc<Expression>) -> Arc<Expression>> =
                Box::new(move |slot| {
                    Arc::new(Expression {
                        span,
                        kind: hamelin_lib::tree::ast::expression::Cast {
                            expression: slot,
                            target_type: Arc::new(target_type),
                        }
                        .into(),
                    })
                });
            Some((cast_node.value.as_ref(), rebuild))
        }
        TypedExpressionKind::Apply(apply) => {
            // Collect indices of args that mention lambda_param. Exactly one
            // must do so.
            let mut x_slot_index: Option<usize> = None;
            for (i, arg) in apply.parameter_binding.iter().enumerate() {
                if subtree_references_lambda_param(arg, lambda_param) {
                    if x_slot_index.is_some() {
                        // More than one x-bearing slot — bail.
                        return None;
                    }
                    x_slot_index = Some(i);
                }
            }
            let x_slot_index = x_slot_index?;
            let inner_sub_body = apply.parameter_binding.get_by_index(x_slot_index).ok()?;

            // Build a rebuild closure that constructs an AST for this Apply
            // with the x slot replaced by a new expression. We rebuild as a
            // FunctionCall so the typechecker re-resolves the function based
            // on the new arg type — important for cases like `coalesce(x, lit)`
            // where the chosen overload depends on whether the first arg is a
            // scalar or an array.
            let function_name = apply.function_def.name().to_string();
            // Pre-snapshot the non-x arg ASTs, in their existing positional
            // order. The slot index will be filled in at build time.
            let arg_asts: Vec<Arc<Expression>> = apply
                .parameter_binding
                .iter()
                .map(|a| a.ast.clone())
                .collect();
            let span = body.ast.span.clone();
            let rebuild: Box<dyn FnOnce(Arc<Expression>) -> Arc<Expression>> =
                Box::new(move |slot| {
                    let mut positional = arg_asts;
                    positional[x_slot_index] = slot;
                    Arc::new(Expression {
                        span,
                        kind: hamelin_lib::tree::ast::expression::FunctionCall {
                            name: SimpleIdentifier::new(function_name).into(),
                            positional_args: positional,
                            named_args: Default::default(),
                        }
                        .into(),
                    })
                });
            Some((inner_sub_body.as_ref(), rebuild))
        }
        _ => None,
    }
}

/// Does any subtree of `expr` reference the lambda parameter as a field?
fn subtree_references_lambda_param(expr: &TypedExpression, param_name: &SimpleIdentifier) -> bool {
    expr.find(&mut |e| {
        if let TypedExpressionKind::FieldReference(field_ref) = &e.kind {
            return field_ref
                .field_name
                .valid_ref()
                .map(|name| name == param_name)
                .unwrap_or(false);
        }
        false
    })
    .is_some()
}

/// Is `expr` a direct reference to the lambda parameter?
fn is_lambda_param_ref(expr: &TypedExpression, param_name: &SimpleIdentifier) -> bool {
    if let TypedExpressionKind::FieldReference(fr) = &expr.kind {
        return fr
            .field_name
            .valid_ref()
            .map(|name| name == param_name)
            .unwrap_or(false);
    }
    false
}

// ---------------------------------------------------------------------------
// Fast-path matchers
// ---------------------------------------------------------------------------

/// `transform(arr, x -> x)` → `arr`.
///
/// Recursion terminator for chains whose innermost link is identity.
fn vectorize_identity(
    array_expr: &TypedExpression,
    body: &TypedExpression,
    lambda_param: &SimpleIdentifier,
) -> Option<Arc<Expression>> {
    if is_lambda_param_ref(body, lambda_param) {
        Some(array_expr.ast.clone())
    } else {
        None
    }
}

/// `transform(arr<struct{f}>, x -> x.f)` → `arr.f`.
///
/// The typechecker turns the result into a `BroadcastStructField` field
/// access without a transform/explode detour.
fn vectorize_struct_field_access(
    array_expr: &TypedExpression,
    body: &TypedExpression,
    lambda_param: &SimpleIdentifier,
) -> Option<Arc<Expression>> {
    let TypedExpressionKind::FieldLookup(fl) = &body.kind else {
        return None;
    };
    if !is_lambda_param_ref(&fl.value, lambda_param) {
        return None;
    }
    let FieldAccess::StructField(field_id) = &fl.access else {
        return None;
    };
    let field_name = field_id.valid_ref().ok()?;
    Some(Arc::new(
        field(array_expr.ast.clone(), field_name.as_str()).build(),
    ))
}

/// `transform(arr<tuple>, x -> x.fN)` → `arr.fN`.
fn vectorize_tuple_element_access(
    array_expr: &TypedExpression,
    body: &TypedExpression,
    lambda_param: &SimpleIdentifier,
) -> Option<Arc<Expression>> {
    let TypedExpressionKind::FieldLookup(fl) = &body.kind else {
        return None;
    };
    if !is_lambda_param_ref(&fl.value, lambda_param) {
        return None;
    }
    let FieldAccess::TupleElement(idx) = &fl.access else {
        return None;
    };
    let field_name = format!("f{}", idx);
    Some(Arc::new(
        field(array_expr.ast.clone(), field_name.as_str()).build(),
    ))
}

/// `transform(arr<variant>, x -> x.field)` → `arr.field`.
///
/// The typechecker turns the result into a `BroadcastVariantField` field
/// access.
fn vectorize_variant_field_access(
    array_expr: &TypedExpression,
    body: &TypedExpression,
    lambda_param: &SimpleIdentifier,
) -> Option<Arc<Expression>> {
    let TypedExpressionKind::FieldLookup(fl) = &body.kind else {
        return None;
    };
    if !is_lambda_param_ref(&fl.value, lambda_param) {
        return None;
    }
    let FieldAccess::VariantField(field_id) = &fl.access else {
        return None;
    };
    let field_name = field_id.valid_ref().ok()?;
    Some(Arc::new(
        field(array_expr.ast.clone(), field_name.as_str()).build(),
    ))
}

/// `transform(arr, x -> x AS T)` → `cast(arr, array(T))`.
///
/// The typechecker turns the result into an `ArrayElementCast`.
fn vectorize_array_element_cast(
    array_expr: &TypedExpression,
    body: &TypedExpression,
    lambda_param: &SimpleIdentifier,
) -> Option<Arc<Expression>> {
    let TypedExpressionKind::Cast(cast_node) = &body.kind else {
        return None;
    };
    if !is_lambda_param_ref(&cast_node.value, lambda_param) {
        return None;
    }
    let target_array_type = Array::new(cast_node.target_type.clone()).into();
    Some(Arc::new(
        cast(array_expr.ast.clone(), target_array_type).build(),
    ))
}

/// `transform(arr<variant>, x -> to_json_string(x))` →
/// `array_variant_to_json(arr)`.
///
/// Only the scalar `ToJsonString` overload qualifies — the broadcast resolver
/// rewrites `to_json_string(arr)` into `transform(arr, x -> to_json_string(x))`
/// where `x: variant`, so the body is always the scalar `ToJsonString`. The
/// internal `ArrayVariantToJson` only appears at the outer level (as this
/// matcher's *output*) and would never typecheck inside a lambda whose param
/// is `variant`.
fn vectorize_to_json_string_variant(
    array_expr: &TypedExpression,
    body: &TypedExpression,
    lambda_param: &SimpleIdentifier,
) -> Option<Arc<Expression>> {
    let TypedExpressionKind::Apply(apply) = &body.kind else {
        return None;
    };
    if apply.function_def.type_id() != std::any::TypeId::of::<ToJsonString>() {
        return None;
    }
    if apply.parameter_binding.len() != 1 {
        return None;
    }
    let only_arg = apply.parameter_binding.get_by_index(0).ok()?;
    if !is_lambda_param_ref(only_arg, lambda_param) {
        return None;
    }
    Some(Arc::new(
        call("array_variant_to_json")
            .arg(array_expr.ast.clone())
            .build(),
    ))
}

/// `transform(arr, x -> coalesce(x, default))` → `array_coalesce(arr, default)`.
///
/// Only the scalar `Coalesce` overload qualifies — `ArrayCoalesce` would mean
/// the body is already vectorized over an `array<T>` element, and rewriting
/// that to `array_coalesce(outer_arr, default)` would feed `array<array<T>>`
/// into a function that requires `array<T>` with scalar `T`.
///
/// `default` must be a scalar that does not reference `lambda_param`.
fn vectorize_coalesce_with_default(
    array_expr: &TypedExpression,
    body: &TypedExpression,
    lambda_param: &SimpleIdentifier,
) -> Option<Arc<Expression>> {
    let TypedExpressionKind::Apply(apply) = &body.kind else {
        return None;
    };
    if apply.function_def.type_id() != std::any::TypeId::of::<Coalesce>() {
        return None;
    }
    if apply.parameter_binding.len() != 2 {
        return None;
    }
    let first = apply.parameter_binding.get_by_index(0).ok()?;
    let second = apply.parameter_binding.get_by_index(1).ok()?;
    if !is_lambda_param_ref(first, lambda_param) {
        return None;
    }
    if subtree_references_lambda_param(second, lambda_param) {
        return None;
    }
    Some(Arc::new(
        call("array_coalesce")
            .arg(array_expr.ast.clone())
            .arg(second.ast.clone())
            .build(),
    ))
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
//
// **Scope:** these unit tests assert *that* each fast-path matcher fires —
// i.e. the resulting pipeline contains no EXPLODE/AGG commands. They are
// performance-regression guards.
//
// **Out of scope:** correctness. Whether the rewritten expression produces
// the right values is `hamelin_it`'s job (run against real backends end-to-
// end). Resist the temptation to grow these tests into correctness checks —
// asserting exact AST shapes, output schemas, or expected SQL is brittle and
// duplicates IT.
//
// Pattern: build a small typed pipeline that should hit a specific matcher,
// run `lower_transform`, and call `assert_vectorized` (no slow path emitted)
// or `assert_falls_back` (slow path required).
#[cfg(test)]
mod tests {
    use super::*;
    use hamelin_lib::tree::{
        ast::pipeline::Pipeline,
        builder::{
            array, call, cast, field, field_ref, lambda1, pipeline, set_command, struct_literal,
            tuple,
        },
    };
    use hamelin_lib::type_check;
    use std::sync::Arc;

    /// Run `lower_transform` against a builder-constructed pipeline and assert
    /// the result emitted **no** EXPLODE/AGG commands — i.e. every transform
    /// hit a fast path.
    #[track_caller]
    fn assert_vectorized(input: Pipeline) {
        let result = run_lower_transform(input);
        let count = count_explode_agg(&result);
        assert!(
            count == 0,
            "expected fully vectorized lowering, but slow path emitted {} EXPLODE/AGG commands.\n\
             pipeline:\n{:#?}",
            count,
            result.ast
        );
    }

    /// Run `lower_transform` against a builder-constructed pipeline and assert
    /// the result emitted at least one EXPLODE/AGG pair — i.e. the slow path
    /// was required.
    #[track_caller]
    fn assert_falls_back(input: Pipeline) {
        let result = run_lower_transform(input);
        let count = count_explode_agg(&result);
        assert!(
            count >= 2,
            "expected fallback to EXPLODE + AGG, got {} EXPLODE/AGG commands.\n\
             pipeline:\n{:#?}",
            count,
            result.ast.commands
        );
    }

    /// Count EXPLODE and AGG commands emitted by the slow path. A vectorized
    /// fast-path lowering never emits either, so 0 ⇒ vectorized.
    fn count_explode_agg(pipeline: &TypedPipeline) -> usize {
        use hamelin_lib::tree::typed_ast::command::TypedCommandKind;
        let valid = pipeline.valid_ref().unwrap();
        valid
            .commands
            .iter()
            .filter(|cmd| {
                matches!(
                    cmd.kind,
                    TypedCommandKind::Explode(_) | TypedCommandKind::Agg(_)
                )
            })
            .count()
    }

    fn run_lower_transform(input: Pipeline) -> Arc<TypedPipeline> {
        let input_typed = type_check(input).output;
        let mut ctx = StatementTranslationContext::default();
        lower_transform(Arc::new(input_typed), &mut ctx).unwrap()
    }

    /// Covers `vectorize_array_element_cast`.
    #[test]
    fn unfuse_array_element_cast() {
        use hamelin_lib::types::STRING;
        assert_vectorized(
            pipeline()
                .command(
                    set_command()
                        .named_field("arr", call("sequence").arg(1).arg(3).arg(1))
                        .build(),
                )
                .command(
                    set_command()
                        .named_field(
                            "casted",
                            call("transform")
                                .arg(field_ref("arr"))
                                .arg(lambda1("x").body(cast(field_ref("x"), STRING))),
                        )
                        .build(),
                )
                .build(),
        );
    }

    /// Covers `vectorize_struct_field_access` (twice over) — also exercises
    /// sibling-transform handling so two fast-pathed transforms in the same
    /// SET don't accidentally trigger a shared EXPLODE.
    #[test]
    fn unfuse_two_sibling_transforms_same_array() {
        assert_vectorized(
            pipeline()
                .command(
                    set_command()
                        .named_field(
                            "arr",
                            array()
                                .element(struct_literal().field("k", "a").field("v", 1))
                                .element(struct_literal().field("k", "b").field("v", 2)),
                        )
                        .build(),
                )
                .command(
                    set_command()
                        .named_field(
                            "keys",
                            call("transform")
                                .arg(field_ref("arr"))
                                .arg(lambda1("x").body(field(field_ref("x"), "k"))),
                        )
                        .named_field(
                            "vals",
                            call("transform")
                                .arg(field_ref("arr"))
                                .arg(lambda1("x").body(field(field_ref("x"), "v"))),
                        )
                        .build(),
                )
                .build(),
        );
    }

    /// Covers `vectorize_tuple_element_access`.
    #[test]
    fn unfuse_tuple_element_access() {
        assert_vectorized(
            pipeline()
                .command(
                    set_command()
                        .named_field(
                            "arr",
                            array()
                                .element(tuple().element(1).element("a"))
                                .element(tuple().element(2).element("b")),
                        )
                        .build(),
                )
                .command(
                    set_command()
                        .named_field(
                            "firsts",
                            call("transform")
                                .arg(field_ref("arr"))
                                .arg(lambda1("x").body(field(field_ref("x"), "f0"))),
                        )
                        .build(),
                )
                .build(),
        );
    }

    /// Covers `vectorize_variant_field_access`.
    #[test]
    fn unfuse_variant_field_access() {
        assert_vectorized(
            pipeline()
                .command(
                    set_command()
                        .named_field(
                            "arr",
                            array()
                                .element(call("parse_json").arg(r#"{"foo": 1}"#))
                                .element(call("parse_json").arg(r#"{"foo": 2}"#)),
                        )
                        .build(),
                )
                .command(
                    set_command()
                        .named_field(
                            "foos",
                            call("transform")
                                .arg(field_ref("arr"))
                                .arg(lambda1("x").body(field(field_ref("x"), "foo"))),
                        )
                        .build(),
                )
                .build(),
        );
    }

    /// Covers `vectorize_to_json_string_variant`. This is the matcher whose
    /// TypeId branch was previously dead code — a structural test here would
    /// have caught that bug had the masking name fallback not existed.
    #[test]
    fn unfuse_to_json_string_variant() {
        assert_vectorized(
            pipeline()
                .command(
                    set_command()
                        .named_field(
                            "arr",
                            array()
                                .element(call("parse_json").arg("1"))
                                .element(call("parse_json").arg("2")),
                        )
                        .build(),
                )
                .command(
                    set_command()
                        .named_field(
                            "jsons",
                            call("transform")
                                .arg(field_ref("arr"))
                                .arg(lambda1("x").body(call("to_json_string").arg(field_ref("x")))),
                        )
                        .build(),
                )
                .build(),
        );
    }

    /// Covers `vectorize_coalesce_with_default` chained through
    /// `vectorize_array_element_cast` and `vectorize_struct_field_access` —
    /// `transform(arr, x -> coalesce(x.q AS string, "0"))` should fully
    /// un-fuse without falling back.
    #[test]
    fn unfuse_coalesce_of_typed_cast_on_struct_field() {
        use hamelin_lib::types::STRING;
        assert_vectorized(
            pipeline()
                .command(
                    set_command()
                        .named_field(
                            "arr",
                            array()
                                .element(struct_literal().field("q", 1))
                                .element(struct_literal().field("q", 2)),
                        )
                        .build(),
                )
                .command(
                    set_command()
                        .named_field(
                            "result",
                            call("transform").arg(field_ref("arr")).arg(
                                lambda1("x").body(
                                    call("coalesce")
                                        .arg(cast(field(field_ref("x"), "q"), STRING))
                                        .arg("0"),
                                ),
                            ),
                        )
                        .build(),
                )
                .build(),
        );
    }

    /// Negative case: `upper(x)` has no fast path matcher, so un-fuse must
    /// reject this body and the slow path takes over. This guards against a
    /// regression where un-fuse spuriously claims to vectorize unsupported
    /// shapes — which would corrupt user queries silently.
    #[test]
    fn fallback_partial_vectorizable_body() {
        assert_falls_back(
            pipeline()
                .command(
                    set_command()
                        .named_field("arr", array().element("a").element("b"))
                        .build(),
                )
                .command(
                    set_command()
                        .named_field(
                            "uppered",
                            call("transform")
                                .arg(field_ref("arr"))
                                .arg(lambda1("x").body(call("upper").arg(field_ref("x")))),
                        )
                        .build(),
                )
                .build(),
        );
    }
}