hamelin_translation 0.4.2

//! Intermediate Representation (IR) for lowered Hamelin queries.
//!
//! This module defines the IR types that represent normalized, lowered Hamelin queries
//! suitable for backend translation (DataFusion, Trino, etc.).
//!
//! # Invariants
//!
//! The IR enforces several constraints that make backend translation simpler:
//!
//! - **No compound identifiers in assignments**: All assignments use `SimpleIdentifier`
//! - **No Range type**: Range operators are lowered to struct literals
//! - **No Rows type**: Rows values are handled during lowering
//! - **No LET/DROP commands**: Fused into SELECT
//! - **No WITHIN command**: Converted to WHERE
//! - **No UNNEST (struct)**: Converted to SELECT with field extraction
//! - **No PARSE**: Converted to LET + WHERE
//!
//! # Expression Handling
//!
//! `IRExpression` is a thin wrapper around `TypedExpression`. We don't duplicate
//! the expression tree - we just ensure no banned constructs exist.

mod assignment_tree;
mod freeze;

use assignment_tree::AssignmentTree;
use freeze::FreezeAlgebra;

use std::collections::HashMap;
use std::sync::Arc;

use ordermap::OrderMap;

use hamelin_eval::{eval, Environment};
use hamelin_lib::err::TranslationError;
use hamelin_lib::tree::ast::clause::SortOrder;
use hamelin_lib::tree::ast::identifier::{Identifier, SimpleIdentifier};
use hamelin_lib::tree::ast::node::Span;
use hamelin_lib::tree::typed_ast::clause::{Projections, TypedFromClause};
use hamelin_lib::tree::typed_ast::command::{
    SideEffect, TypedAggCommand, TypedCommand, TypedCommandKind, TypedExplodeCommand,
    TypedFromCommand, TypedJoinCommand, TypedLimitCommand, TypedLookupCommand, TypedSelectCommand,
    TypedSortCommand, TypedSortExpression, TypedUnionCommand, TypedWhereCommand,
    TypedWindowCommand,
};
use hamelin_lib::tree::typed_ast::context::StatementTranslationContext;
use hamelin_lib::tree::typed_ast::environment::TypeEnvironment;
use hamelin_lib::tree::typed_ast::expression::{TypedExpression, TypedExpressionKind};
use hamelin_lib::tree::typed_ast::pipeline::TypedPipeline;
use hamelin_lib::tree::typed_ast::query::TypedStatement;
use hamelin_lib::types::Type;

use crate::window_frame::WindowFrame;

/// A lowered side effect for the statement.
///
/// This is the IR equivalent of `SideEffect` from the typed AST.
/// APPEND data is fully lowered here (identifiers extracted, distinct_by validated).
#[derive(Debug, Clone)]
pub enum IRSideEffect {
    /// No side effect - pure query that returns data
    None,

    /// DML - appends data to a table
    Append {
        table: Identifier,
        distinct_by: Vec<SimpleIdentifier>,
    },
}

/// A lowered Hamelin statement with optional CTEs.
#[derive(Debug, Clone)]
pub struct IRStatement {
    /// WITH clauses (CTEs)
    pub with_clauses: Vec<IRWithClause>,

    /// The main pipeline
    pub pipeline: Arc<IRPipeline>,

    /// Side effect (None for pure queries, some for DML/DDL)
    pub side_effect: IRSideEffect,
}

/// A WITH clause (Common Table Expression).
#[derive(Debug, Clone)]
pub struct IRWithClause {
    /// The CTE name (always simple identifier)
    pub name: SimpleIdentifier,

    /// The CTE's pipeline
    pub pipeline: Arc<IRPipeline>,
}

/// A lowered pipeline - a sequence of IR commands.
#[derive(Debug, Clone)]
pub struct IRPipeline {
    /// The sequence of commands
    pub commands: Vec<IRCommand>,

    /// Output schema of this pipeline
    pub output_schema: Arc<TypeEnvironment>,
}

/// A lowered command with its output schema.
#[derive(Debug, Clone)]
pub struct IRCommand {
    /// The command kind
    pub kind: IRCommandKind,

    /// Source location for error reporting
    pub span: Span,

    /// Output schema after this command
    pub output_schema: Arc<TypeEnvironment>,
}

/// The kind of IR command.
///
/// Commands that don't appear here have been lowered away:
/// - `LET` → fused into `SELECT`
/// - `DROP` → fused into `SELECT`
/// - `WITHIN` → converted to `WHERE`
/// - `UNNEST` (struct) → converted to `SELECT`
/// - `PARSE` → converted to `LET` + `WHERE`
/// - `NEST` → converted to `SELECT` with struct literal
/// - `UNION` → represented via `IRFromCommand` with multiple inputs
/// - `JOIN` / `LOOKUP` → both lowered to `Join` with appropriate `JoinType`
/// - `APPEND` → captured as `IRSideEffect::Append` on `IRStatement`
#[derive(Debug, Clone)]
pub enum IRCommandKind {
    From(IRFromCommand),
    Where(IRWhereCommand),
    Select(IRSelectCommand),
    Agg(IRAggCommand),
    Window(IRWindowCommand),
    Sort(IRSortCommand),
    Limit(IRLimitCommand),
    Explode(IRExplodeCommand),
    Join(IRJoinCommand),
}

/// FROM command: union of one or more inputs (tables or WITH references).
///
/// When there are multiple inputs, this represents a UNION ALL operation.
/// The schemas of all inputs must be compatible (handled by lowering).
#[derive(Debug, Clone)]
pub struct IRFromCommand {
    /// The input sources (one or more)
    pub inputs: Vec<IRInput>,
}

/// An input source for FROM.
#[derive(Debug, Clone)]
pub enum IRInput {
    /// Direct table reference (can be catalog.schema.table)
    Table(Identifier),

    /// Reference to a WITH clause, carrying the pipeline for backends that inline CTEs
    With(SimpleIdentifier, Arc<IRPipeline>),
}

/// WHERE command: filter predicate.
#[derive(Debug, Clone)]
pub struct IRWhereCommand {
    /// The filter predicate
    pub predicate: IRExpression,
}

/// SELECT command: projections with SimpleIdentifier-only assignments.
#[derive(Debug, Clone)]
pub struct IRSelectCommand {
    /// The projection assignments
    pub assignments: Vec<IRAssignment>,
}

/// AGG command: aggregation with SimpleIdentifier-only assignments.
#[derive(Debug, Clone)]
pub struct IRAggCommand {
    /// Aggregate expressions (e.g., sum(x), count(*))
    pub aggregates: Vec<IRAssignment>,

    /// Group by expressions
    pub group_by: Vec<IRAssignment>,

    /// Sort expressions for ordered aggregation
    pub sort_by: Vec<IRSortExpression>,
}

/// WINDOW command: window functions with SimpleIdentifier-only assignments.
#[derive(Debug, Clone)]
pub struct IRWindowCommand {
    /// Window function projections
    pub projections: Vec<IRAssignment>,

    /// Partition by expressions
    pub partition_by: Vec<IRAssignment>,

    /// Sort expressions within each partition
    pub sort_by: Vec<IRSortExpression>,

    /// Window frame specification (constructed during lowering via eval)
    pub frame: Option<WindowFrame>,
}

/// SORT command: ordering.
#[derive(Debug, Clone)]
pub struct IRSortCommand {
    /// Sort expressions
    pub sort_by: Vec<IRSortExpression>,
}

/// A sort expression with direction.
#[derive(Debug, Clone)]
pub struct IRSortExpression {
    /// The expression to sort by
    pub expression: IRExpression,

    /// Sort order (ASC or DESC)
    pub order: SortOrder,
}

/// LIMIT command: row count.
#[derive(Debug, Clone)]
pub struct IRLimitCommand {
    /// The count expression (should be a constant after lowering)
    pub count: IRExpression,
}

/// EXPLODE command: in-place array expansion.
///
/// Transforms a column from `ARRAY<T>` to `T`, creating one row per array element.
/// The column keeps its name but changes type.
///
/// All other transformations (assignments, projections) must be done via
/// preceding LET commands and following SELECT commands.
#[derive(Debug, Clone)]
pub struct IRExplodeCommand {
    /// The array column to explode in-place (must be a simple column reference)
    pub column: SimpleIdentifier,
}

/// The type of join operation.
///
/// This enum is introduced at the IR level (not TypedAST) because:
/// - TypedAST preserves user intent (separate JOIN and LOOKUP commands)
/// - IR introduces lowering concepts (both become JOIN with different types)
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum JoinType {
    /// INNER join - only matching rows from both sides
    Inner,

    /// LEFT join - all rows from left, matching rows from right (or NULL)
    Left,
}

/// JOIN command: combines rows from two tables.
///
/// Both Hamelin JOIN (INNER) and LOOKUP (LEFT) commands lower to this IR node.
/// The right side is always a CTE reference (hoisted by the `lower_joins` pass).
///
/// Missing ON conditions are lowered to `true` (CROSS JOIN semantics).
#[derive(Debug, Clone)]
pub struct IRJoinCommand {
    /// The type of join (Inner or Left)
    pub join_type: JoinType,

    /// The right side CTE reference (always a simple identifier after lowering)
    pub right: SimpleIdentifier,

    /// The join condition (always present - defaults to `true` for CROSS JOIN)
    pub condition: IRExpression,
}

/// Assignment to a simple identifier.
///
/// All assignments in the IR use simple identifiers (never compound).
/// Compound identifier assignments are lowered before reaching the IR.
#[derive(Debug, Clone)]
pub struct IRAssignment {
    /// The target identifier (always simple)
    pub identifier: SimpleIdentifier,

    /// The expression to assign
    pub expression: IRExpression,
}

/// A lowered expression - wrapper around TypedExpression.
///
/// # Invariants
///
/// No Range-related expression kinds exist in the tree.
/// Backends do not need to translate range operators.
#[derive(Debug, Clone)]
pub struct IRExpression(pub Arc<TypedExpression>);

impl IRExpression {
    /// Create a new IR expression from a typed expression.
    ///
    /// # Safety (logical)
    ///
    /// The caller must ensure the expression does not contain banned constructs
    /// (Range operators, etc.). This is enforced by the lowering pass.
    pub fn new(expr: Arc<TypedExpression>) -> Self {
        Self(expr)
    }

    /// Get the inner typed expression.
    pub fn inner(&self) -> &TypedExpression {
        &self.0
    }

    /// Get the resolved type of this expression.
    pub fn resolved_type(&self) -> &Type {
        &self.0.resolved_type
    }

    /// Get the source span for error reporting.
    pub fn span(&self) -> &Span {
        &self.0.ast.span
    }

    /// Freeze the expression by evaluating all constant subexpressions.
    ///
    /// Walks the tree bottom-up, attempting to evaluate each node with an empty
    /// environment. Nodes that can be fully evaluated are replaced with their
    /// literal values. Nodes that cannot be evaluated (e.g., column references)
    /// are kept, but their children may be simplified.
    ///
    /// This is used for query-time binding of functions like `now()`, `today()`, etc.
    pub fn freeze(&self) -> Self {
        let mut alg = FreezeAlgebra;
        let result = self.0.cata(&mut alg);
        match result {
            Ok(value) => IRExpression::new(value.into()),
            Err(expr) => IRExpression::new(expr),
        }
    }
}

// ============================================================================
// Conversion from TypedAST to IR
// ============================================================================

impl IRStatement {
    /// Convert a normalized TypedStatement to IR.
    ///
    /// The statement must have been normalized (all passes run) before calling this.
    /// This validates IR invariants and bails on unexpected constructs.
    pub fn from_typed(
        statement: Arc<TypedStatement>,
        ctx: &mut StatementTranslationContext,
    ) -> Result<Self, Arc<TranslationError>> {
        // Convert WITH clauses, building a lookup map for CTE resolution
        let mut with_clauses = Vec::new();
        let mut cte_map: HashMap<String, Arc<IRPipeline>> = HashMap::new();
        for wc in &statement.with_clauses {
            let name = wc
                .name
                .valid_ref()?
                .clone()
                .try_unwrap_simple()
                .map_err(|id| {
                    ctx.error(format!("CTE name must be simple identifier, got: {}", id))
                        .emit()
                })?;
            let pipeline = Arc::new(IRPipeline::from_typed(wc.pipeline.clone(), ctx, &cte_map)?);
            cte_map.insert(name.as_str().to_string(), pipeline.clone());
            with_clauses.push(IRWithClause { name, pipeline });
        }

        // Convert main pipeline with CTE map for resolving references
        let pipeline = IRPipeline::from_typed(statement.pipeline.clone(), ctx, &cte_map)?;

        // Lower the side effect from typed AST to IR
        let side_effect = match &statement.side_effect {
            SideEffect::None => IRSideEffect::None,
            SideEffect::Append { table, distinct_by } => {
                let table_id = table.ast.identifier.valid_ref()?.clone();

                let mut lowered_distinct_by = Vec::new();
                for selection in distinct_by {
                    let id = selection
                        .ast
                        .identifier
                        .valid_ref()?
                        .clone()
                        .try_unwrap_simple()
                        .map_err(|id| {
                            ctx.error(format!(
                                "APPEND DISTINCT BY must use simple identifiers, got: {}",
                                id
                            ))
                            .emit()
                        })?;
                    lowered_distinct_by.push(id);
                }

                IRSideEffect::Append {
                    table: table_id,
                    distinct_by: lowered_distinct_by,
                }
            }
        };

        Ok(Self {
            with_clauses,
            pipeline: Arc::new(pipeline),
            side_effect,
        })
    }
}

impl IRPipeline {
    /// Convert a normalized TypedPipeline to IR.
    pub fn from_typed(
        pipeline: Arc<TypedPipeline>,
        ctx: &mut StatementTranslationContext,
        cte_map: &HashMap<String, Arc<IRPipeline>>,
    ) -> Result<Self, Arc<TranslationError>> {
        let valid = pipeline.valid_ref()?;

        let mut commands = Vec::new();
        for cmd in &valid.commands {
            // Skip APPEND commands — they're captured as IRSideEffect on IRStatement.
            // The align_append_schema normalizer inserts a SELECT before APPEND,
            // so the pipeline's last non-APPEND command produces the correct output_schema.
            if matches!(cmd.kind, TypedCommandKind::Append(_)) {
                continue;
            }
            commands.push(IRCommand::from_typed(cmd, ctx, cte_map)?);
        }

        Ok(Self {
            commands,
            output_schema: valid.final_schema.clone(),
        })
    }
}

impl IRCommand {
    /// Convert a TypedCommand to IR.
    pub fn from_typed(
        cmd: &Arc<TypedCommand>,
        ctx: &mut StatementTranslationContext,
        cte_map: &HashMap<String, Arc<IRPipeline>>,
    ) -> Result<Self, Arc<TranslationError>> {
        let kind = IRCommandKind::from_typed(&cmd.kind, ctx, cte_map)?;

        Ok(Self {
            kind,
            span: cmd.ast.span,
            output_schema: cmd.output_schema.clone(),
        })
    }
}

impl IRCommandKind {
    /// Convert a TypedCommandKind to IR.
    ///
    /// Commands that should have been lowered away (LET, DROP, WITHIN, etc.)
    /// cause an error.
    fn from_typed(
        kind: &TypedCommandKind,
        ctx: &mut StatementTranslationContext,
        cte_map: &HashMap<String, Arc<IRPipeline>>,
    ) -> Result<Self, Arc<TranslationError>> {
        match kind {
            TypedCommandKind::From(from_cmd) => {
                Ok(IRFromCommand::from_typed(from_cmd, ctx, cte_map)?.into())
            }
            TypedCommandKind::Where(where_cmd) => Ok(IRWhereCommand::from_typed(where_cmd).into()),
            TypedCommandKind::Select(select_cmd) => {
                Ok(IRSelectCommand::from_typed(select_cmd)?.into())
            }
            TypedCommandKind::Agg(agg_cmd) => Ok(IRAggCommand::from_typed(agg_cmd)?.into()),
            TypedCommandKind::Window(window_cmd) => {
                Ok(IRWindowCommand::from_typed(window_cmd)?.into())
            }
            TypedCommandKind::Sort(sort_cmd) => Ok(IRSortCommand::from_typed(sort_cmd).into()),
            TypedCommandKind::Limit(limit_cmd) => Ok(IRLimitCommand::from_typed(limit_cmd).into()),
            TypedCommandKind::Explode(explode_cmd) => {
                Ok(IRExplodeCommand::from_typed(explode_cmd, ctx)?.into())
            }

            // Commands that should have been lowered away
            TypedCommandKind::Let(_) => Err(ctx
                .error("LET command should have been fused into SELECT during normalization")
                .emit()),
            TypedCommandKind::Drop(_) => Err(ctx
                .error("DROP command should have been fused into SELECT during normalization")
                .emit()),
            TypedCommandKind::Within(_) => Err(ctx
                .error("WITHIN command should have been converted to WHERE during normalization")
                .emit()),
            TypedCommandKind::Parse(_) => Err(ctx
                .error("PARSE command should have been lowered to LET + WHERE during normalization")
                .emit()),
            TypedCommandKind::Unnest(_) => Err(ctx
                .error("UNNEST command should have been lowered during normalization")
                .emit()),

            TypedCommandKind::Join(join_cmd) => {
                Ok(IRJoinCommand::from_typed_join(join_cmd, ctx)?.into())
            }
            TypedCommandKind::Lookup(lookup_cmd) => {
                Ok(IRJoinCommand::from_typed_lookup(lookup_cmd, ctx)?.into())
            }
            TypedCommandKind::Append(_) => Err(ctx
                .error("APPEND command should be skipped during pipeline lowering (captured as IRSideEffect)")
                .emit()),

            TypedCommandKind::Union(union_cmd) => {
                Ok(IRFromCommand::from_union(union_cmd, ctx, cte_map)?.into())
            }
            TypedCommandKind::Match(_) => Err(ctx
                .error("MATCH command should have been lowered during normalization")
                .emit()),

            // Commands that should have been lowered away by normalization
            TypedCommandKind::Nest(_) => Err(ctx
                .error("NEST command should have been lowered to SELECT during normalization")
                .emit()),

            TypedCommandKind::Error(err) => Err(err.clone()),
        }
    }
}

impl IRFromCommand {
    fn from_typed(
        from_cmd: &TypedFromCommand,
        ctx: &mut StatementTranslationContext,
        cte_map: &HashMap<String, Arc<IRPipeline>>,
    ) -> Result<Self, Arc<TranslationError>> {
        let mut inputs = Vec::new();
        for clause in &from_cmd.clauses {
            inputs.push(IRInput::from_typed(clause, ctx, cte_map)?);
        }
        Ok(Self { inputs })
    }

    fn from_union(
        union_cmd: &TypedUnionCommand,
        ctx: &mut StatementTranslationContext,
        cte_map: &HashMap<String, Arc<IRPipeline>>,
    ) -> Result<Self, Arc<TranslationError>> {
        let mut inputs = Vec::new();
        for clause in &union_cmd.clauses {
            inputs.push(IRInput::from_typed(clause, ctx, cte_map)?);
        }
        Ok(Self { inputs })
    }
}

impl IRInput {
    fn from_typed(
        clause: &TypedFromClause,
        ctx: &mut StatementTranslationContext,
        cte_map: &HashMap<String, Arc<IRPipeline>>,
    ) -> Result<Self, Arc<TranslationError>> {
        match clause {
            TypedFromClause::Reference(table_ref) => {
                let identifier = table_ref.ast.identifier.valid_ref()?.clone();

                // Check if this simple identifier references a CTE
                if let Identifier::Simple(ref simple) = identifier {
                    if let Some(pipeline) = cte_map.get(simple.as_str()) {
                        return Ok(IRInput::With(simple.clone(), pipeline.clone()));
                    }
                }

                Ok(IRInput::Table(identifier))
            }
            TypedFromClause::Alias(_) => Err(ctx
                .error("FROM aliases should have been converted to CTEs during normalization")
                .emit()),
            TypedFromClause::Error(err) => Err(err.clone()),
        }
    }
}

impl IRWhereCommand {
    fn from_typed(where_cmd: &TypedWhereCommand) -> Self {
        Self {
            predicate: IRExpression::new(where_cmd.predicate.clone()),
        }
    }
}

impl IRSelectCommand {
    fn from_typed(select_cmd: &TypedSelectCommand) -> Result<Self, Arc<TranslationError>> {
        let assignments = convert_projections(&select_cmd.projections)?;
        Ok(Self { assignments })
    }
}

impl IRAggCommand {
    fn from_typed(agg_cmd: &TypedAggCommand) -> Result<Self, Arc<TranslationError>> {
        let aggregates = convert_projections(&agg_cmd.aggregates)?;
        let group_by = convert_projections(&agg_cmd.group_by)?;
        let sort_by = convert_sort_expressions(&agg_cmd.sort_by);

        Ok(Self {
            aggregates,
            group_by,
            sort_by,
        })
    }
}

impl IRWindowCommand {
    fn from_typed(window_cmd: &TypedWindowCommand) -> Result<Self, Arc<TranslationError>> {
        let projections = convert_projections(&window_cmd.projections)?;
        let partition_by = convert_projections(&window_cmd.group_by)?;
        let sort_by = convert_sort_expressions(&window_cmd.sort_by);

        // Compute the window frame from the WITHIN expression during lowering
        let frame = window_cmd
            .within
            .as_ref()
            .map(|within_expr| eval_within_to_frame(within_expr))
            .transpose()?;

        Ok(Self {
            projections,
            partition_by,
            sort_by,
            frame,
        })
    }
}

/// Evaluate a WITHIN expression and convert it to a WindowFrame.
///
/// The WITHIN expression must be constant (not reference any columns).
/// Returns an error if evaluation fails or if the value can't be converted to a frame.
fn eval_within_to_frame(
    within_expr: &TypedExpression,
) -> Result<WindowFrame, Arc<TranslationError>> {
    use hamelin_lib::err::Context;
    use hamelin_lib::tree::typed_ast::expression::{TypedErrorExpression, TypedExpressionKind};

    // Check for errors in the expression tree first
    if let Some(err_expr) =
        within_expr.find(&mut |e| matches!(&e.kind, TypedExpressionKind::Error(_)))
    {
        if let TypedExpressionKind::Error(TypedErrorExpression { error }) = &err_expr.kind {
            return Err(error.clone());
        }
    }

    // Check for non-deterministic functions (now(), today(), yesterday(), tomorrow())
    // These are not allowed in WITHIN expressions because window frames must be constant
    if let Some(bad_func_expr) = within_expr.find(&mut |e| {
        matches!(&e.kind, TypedExpressionKind::Apply(apply) if !apply.function_def.is_deterministic())
    }) {
        if let TypedExpressionKind::Apply(apply) = &bad_func_expr.kind {
            let span = bad_func_expr.ast.span.to_range().unwrap_or(0..=0);
            let error = TranslationError::new(Context::new(
                span,
                &format!(
                    "WITHIN expression cannot use non-deterministic function '{}' - window frames must be constant",
                    apply.function_def.name()
                ),
            ));
            return Err(error.into());
        }
    }

    // Try to evaluate the expression with an empty environment
    let empty_env = Environment::default();
    match eval(within_expr, &empty_env) {
        Ok(value) => {
            // Convert the evaluated value to a WindowFrame
            WindowFrame::from_value(value).map_err(|msg| {
                let span = within_expr.ast.span.to_range().unwrap_or(0..=0);
                TranslationError::new(Context::new(
                    span,
                    &format!("Invalid window frame: {}", msg),
                ))
                .into()
            })
        }
        Err(eval_err) => {
            // Evaluation failed - likely because the expression references columns
            let span = within_expr.ast.span.to_range().unwrap_or(0..=0);
            let error = TranslationError::new(Context::new(
                span,
                &format!(
                    "WITHIN expression must be constant (cannot reference columns): {}",
                    eval_err
                ),
            ));
            Err(error.into())
        }
    }
}

impl IRSortCommand {
    fn from_typed(sort_cmd: &TypedSortCommand) -> Self {
        Self {
            sort_by: convert_sort_expressions(&sort_cmd.expressions),
        }
    }
}

impl IRLimitCommand {
    fn from_typed(limit_cmd: &TypedLimitCommand) -> Self {
        Self {
            count: IRExpression::new(limit_cmd.count.clone()),
        }
    }
}

impl IRExplodeCommand {
    fn from_typed(
        explode_cmd: &TypedExplodeCommand,
        ctx: &mut StatementTranslationContext,
    ) -> Result<Self, Arc<TranslationError>> {
        // After normalization, EXPLODE must be in canonical form: EXPLODE col = col
        let column = explode_cmd
            .identifier
            .valid_ref()?
            .clone()
            .try_unwrap_simple()
            .map_err(|id| {
                ctx.error(format!(
                    "EXPLODE identifier must be simple after normalization, got: {}",
                    id
                ))
                .emit()
            })?;

        // Validate the expression is a column reference to the same identifier
        let is_canonical = matches!(
            &explode_cmd.expression.kind,
            TypedExpressionKind::ColumnReference(col_ref)
                if col_ref.column_name.valid_ref()
                    .is_ok_and(|name| name.as_str() == column.as_str())
        );
        if !is_canonical {
            return Err(ctx
                .error(format!(
                    "EXPLODE must be in canonical form (EXPLODE {0} = {0}) after normalization, \
                     but expression is not a column reference to '{0}'",
                    column
                ))
                .emit());
        }

        Ok(Self { column })
    }
}

impl IRJoinCommand {
    /// Convert a TypedJoinCommand to IRJoinCommand (INNER join).
    fn from_typed_join(
        join_cmd: &TypedJoinCommand,
        ctx: &mut StatementTranslationContext,
    ) -> Result<Self, Arc<TranslationError>> {
        Self::from_typed_inner(JoinType::Inner, &join_cmd.right, &join_cmd.condition, ctx)
    }

    /// Convert a TypedLookupCommand to IRJoinCommand (LEFT join).
    fn from_typed_lookup(
        lookup_cmd: &TypedLookupCommand,
        ctx: &mut StatementTranslationContext,
    ) -> Result<Self, Arc<TranslationError>> {
        Self::from_typed_inner(
            JoinType::Left,
            &lookup_cmd.right,
            &lookup_cmd.condition,
            ctx,
        )
    }

    /// Shared implementation for both Join and Lookup.
    fn from_typed_inner(
        join_type: JoinType,
        right: &hamelin_lib::tree::typed_ast::clause::TypedTableAlias,
        condition: &Option<Arc<TypedExpression>>,
        ctx: &mut StatementTranslationContext,
    ) -> Result<Self, Arc<TranslationError>> {
        // After lower_joins pass, the right side should reference a CTE (simple identifier)
        let right_id = right
            .ast
            .table
            .identifier
            .valid_ref()?
            .clone()
            .try_unwrap_simple()
            .map_err(|id| {
                ctx.error(format!(
                    "JOIN right side must be simple identifier after lowering, got: {}",
                    id
                ))
                .emit()
            })?;

        // After lower_joins pass, condition is always present (defaults to `true`)
        let condition = condition.as_ref().ok_or_else(|| {
            ctx.error("JOIN condition should be present after lower_joins normalization")
                .emit()
        })?;

        Ok(Self {
            join_type,
            right: right_id,
            condition: IRExpression::new(condition.clone()),
        })
    }
}

// ============================================================================
// Helper functions
// ============================================================================

/// Convert Projections to Vec<IRAssignment>, packing compound identifiers into struct literals.
///
/// Compound identifier assignments like `a.b.c = expr` are grouped and packed into
/// nested struct literals: `a = {b: {c: expr}}`.
fn convert_projections(
    projections: &Projections,
) -> Result<Vec<IRAssignment>, Arc<TranslationError>> {
    // Group assignments by root identifier, preserving order
    let mut groups: OrderMap<SimpleIdentifier, AssignmentTree> = OrderMap::new();

    for assignment in &projections.assignments {
        let identifier = assignment.identifier.valid_ref()?;
        let expression = assignment.expression.clone();

        match identifier {
            Identifier::Simple(simple) => {
                groups
                    .entry(simple.clone())
                    .or_default()
                    .insert_leaf(expression);
            }
            Identifier::Compound(compound) => {
                let root = compound.first();
                let path = &compound.parts[1..];
                groups
                    .entry(root)
                    .or_default()
                    .insert_at_path(path, expression);
            }
        }
    }

    // Convert groups to IRAssignments
    Ok(groups
        .into_iter()
        .map(|(identifier, tree)| IRAssignment {
            identifier,
            expression: tree.into_ir_expression(),
        })
        .collect())
}

/// Convert TypedSortExpressions to IRSortExpressions.
fn convert_sort_expressions(exprs: &[TypedSortExpression]) -> Vec<IRSortExpression> {
    exprs
        .iter()
        .map(|e| IRSortExpression {
            expression: IRExpression::new(e.expression.clone()),
            order: e.order.clone(),
        })
        .collect()
}

// ============================================================================
// From implementations for IRCommandKind
// ============================================================================

impl From<IRFromCommand> for IRCommandKind {
    fn from(cmd: IRFromCommand) -> Self {
        IRCommandKind::From(cmd)
    }
}

impl From<IRWhereCommand> for IRCommandKind {
    fn from(cmd: IRWhereCommand) -> Self {
        IRCommandKind::Where(cmd)
    }
}

impl From<IRSelectCommand> for IRCommandKind {
    fn from(cmd: IRSelectCommand) -> Self {
        IRCommandKind::Select(cmd)
    }
}

impl From<IRAggCommand> for IRCommandKind {
    fn from(cmd: IRAggCommand) -> Self {
        IRCommandKind::Agg(cmd)
    }
}

impl From<IRWindowCommand> for IRCommandKind {
    fn from(cmd: IRWindowCommand) -> Self {
        IRCommandKind::Window(cmd)
    }
}

impl From<IRSortCommand> for IRCommandKind {
    fn from(cmd: IRSortCommand) -> Self {
        IRCommandKind::Sort(cmd)
    }
}

impl From<IRLimitCommand> for IRCommandKind {
    fn from(cmd: IRLimitCommand) -> Self {
        IRCommandKind::Limit(cmd)
    }
}

impl From<IRExplodeCommand> for IRCommandKind {
    fn from(cmd: IRExplodeCommand) -> Self {
        IRCommandKind::Explode(cmd)
    }
}

impl From<IRJoinCommand> for IRCommandKind {
    fn from(cmd: IRJoinCommand) -> Self {
        IRCommandKind::Join(cmd)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use hamelin_eval::{eval, value::Value, Environment};
    use hamelin_lib::tree::{
        ast::{pipeline::Pipeline, IntoTyped, TypeCheckExecutor},
        builder::{ident, pipeline, select_command},
        typed_ast::expression::TypedExpressionKind,
    };
    use hamelin_lib::types::{struct_type::Struct, INT, STRING};
    use pretty_assertions::assert_eq;
    use rstest::rstest;

    /// Test helper to extract the IRSelectCommand from a simple pipeline
    fn get_ir_select(pipeline: Pipeline) -> IRSelectCommand {
        let typed = pipeline.typed_with().typed();
        let select_cmd = typed.valid_ref().unwrap().commands[0].clone();
        if let TypedCommandKind::Select(select) = &select_cmd.kind {
            IRSelectCommand::from_typed(select).unwrap()
        } else {
            panic!("Expected SELECT command");
        }
    }

    /// Test helper to extract identifier and check expression type from assignment
    fn assignment_info(assignment: &IRAssignment) -> (String, &Type) {
        (
            assignment.identifier.to_string(),
            assignment.expression.resolved_type(),
        )
    }

    #[rstest]
    // Simple assignments pass through unchanged
    #[case::simple_assignments(
        pipeline()
            .command(select_command()
                .named_field("a", 1)
                .named_field("b", "hello")
                .build())
            .build(),
        vec![("a", INT.clone()), ("b", STRING.clone())]
    )]
    fn test_simple_assignments_no_packing(
        #[case] input: Pipeline,
        #[case] expected: Vec<(&str, Type)>,
    ) {
        let ir_select = get_ir_select(input);
        assert_eq!(ir_select.assignments.len(), expected.len());
        for (assignment, (expected_name, expected_type)) in
            ir_select.assignments.iter().zip(expected.iter())
        {
            let (name, resolved) = assignment_info(assignment);
            assert_eq!(name, *expected_name);
            assert_eq!(resolved, expected_type);
        }
    }

    #[rstest]
    // Compound identifiers with same root get packed into struct literal
    #[case::compound_same_root_packs_to_struct(
        pipeline()
            .command(select_command()
                .named_field(ident("x").dot("a"), 1)
                .named_field(ident("x").dot("b"), "hello")
                .build())
            .build(),
        "x",
        Struct::default().with_str("a", INT).with_str("b", STRING).into()
    )]
    // Single compound identifier gets packed
    #[case::single_compound_packs(
        pipeline()
            .command(select_command()
                .named_field(ident("user").dot("id"), 42)
                .build())
            .build(),
        "user",
        Struct::default().with_str("id", INT).into()
    )]
    fn test_compound_identifiers_pack_to_struct(
        #[case] input: Pipeline,
        #[case] expected_name: &str,
        #[case] expected_type: Type,
    ) {
        let ir_select = get_ir_select(input);
        assert_eq!(ir_select.assignments.len(), 1);
        let (name, resolved) = assignment_info(&ir_select.assignments[0]);
        assert_eq!(name, expected_name);
        assert_eq!(resolved, &expected_type);
    }

    #[rstest]
    // Deep nesting: a.b.c = 1 becomes a = {b: {c: 1}}
    #[case::deep_nesting(
        pipeline()
            .command(select_command()
                .named_field(ident("a").dot("b").dot("c"), 1)
                .build())
            .build(),
        "a",
        Struct::default().with_str("b",
            Struct::default().with_str("c", INT).into()).into()
    )]
    fn test_deep_nesting_packs_correctly(
        #[case] input: Pipeline,
        #[case] expected_name: &str,
        #[case] expected_type: Type,
    ) {
        let ir_select = get_ir_select(input);
        assert_eq!(ir_select.assignments.len(), 1);
        let (name, resolved) = assignment_info(&ir_select.assignments[0]);
        assert_eq!(name, expected_name);
        assert_eq!(resolved, &expected_type);
    }

    #[rstest]
    // Mixed: simple and compound with different roots stay separate
    #[case::mixed_simple_and_compound(
        pipeline()
            .command(select_command()
                .named_field("simple", 1)
                .named_field(ident("nested").dot("field"), 2)
                .build())
            .build(),
        vec![
            ("simple", INT.clone()),
            ("nested", Struct::default().with_str("field", INT).into()),
        ]
    )]
    // Multiple different roots each become their own struct
    #[case::multiple_roots(
        pipeline()
            .command(select_command()
                .named_field(ident("a").dot("x"), 1)
                .named_field(ident("b").dot("y"), 2)
                .build())
            .build(),
        vec![
            ("a", Struct::default().with_str("x", INT).into()),
            ("b", Struct::default().with_str("y", INT).into()),
        ]
    )]
    fn test_mixed_assignments(#[case] input: Pipeline, #[case] expected: Vec<(&str, Type)>) {
        let ir_select = get_ir_select(input);
        assert_eq!(ir_select.assignments.len(), expected.len());
        for (assignment, (expected_name, expected_type)) in
            ir_select.assignments.iter().zip(expected.iter())
        {
            let (name, resolved) = assignment_info(assignment);
            assert_eq!(name, *expected_name);
            assert_eq!(resolved, expected_type);
        }
    }

    #[test]
    fn test_order_preserved() {
        // Order of assignments should be preserved based on first occurrence of root
        let input = pipeline()
            .command(
                select_command()
                    .named_field(ident("z").dot("first"), 1)
                    .named_field(ident("a").dot("second"), 2)
                    .named_field(ident("z").dot("third"), 3)
                    .build(),
            )
            .build();

        let ir_select = get_ir_select(input);
        assert_eq!(ir_select.assignments.len(), 2);

        // z should come first (first occurrence), then a
        assert_eq!(ir_select.assignments[0].identifier.to_string(), "z");
        assert_eq!(ir_select.assignments[1].identifier.to_string(), "a");

        // z should have both fields packed
        let z_type = ir_select.assignments[0].expression.resolved_type();
        let expected_z = Struct::default()
            .with_str("first", INT)
            .with_str("third", INT);
        assert_eq!(z_type, &Type::from(expected_z));
    }

    #[test]
    fn test_expressions_preserved_in_packed_structs() {
        // Expressions should be preserved correctly in packed structs
        // Use literals since column_ref without FROM context gets Unknown type
        let input = pipeline()
            .command(
                select_command()
                    .named_field(ident("nested").dot("int_field"), 42)
                    .named_field(ident("nested").dot("str_field"), "hello")
                    .build(),
            )
            .build();

        let ir_select = get_ir_select(input);
        assert_eq!(ir_select.assignments.len(), 1);

        // The nested struct should have both fields with correct types
        let nested = &ir_select.assignments[0];
        assert_eq!(nested.identifier.to_string(), "nested");
        let expected_type: Type = Struct::default()
            .with_str("int_field", INT)
            .with_str("str_field", STRING)
            .into();
        assert_eq!(nested.expression.resolved_type(), &expected_type);
    }

    #[test]
    fn test_freeze_resolves_now_in_within() {
        use crate::lower::lower;
        use hamelin_lib::tree::builder::{call, hours, string};

        // Build: SELECT timestamp = ts("2024-01-15T12:00:00Z") | WITHIN -5h
        let input = pipeline()
            .command(
                select_command()
                    .named_field("timestamp", call("ts").arg(string("2024-01-15T12:00:00Z")))
                    .build(),
            )
            .within(hours(-5))
            .build();

        // Type check and lower
        let typed = input.typed_with().typed();
        let query = hamelin_lib::tree::builder::query()
            .main(Arc::new(typed))
            .build();
        let typed_query = query.typed_with().typed();
        let ir = lower(Arc::new(typed_query)).expect("lowering should succeed");

        // The IR should have: FROM (implicit) -> SELECT -> WHERE
        // WITHIN -5h becomes WHERE timestamp >= now() + -5h AND timestamp <= now()
        // After freeze, now() should become ts("...") with resolved timestamps
        let where_cmd = ir
            .pipeline
            .commands
            .iter()
            .find(|cmd| matches!(cmd.kind, IRCommandKind::Where(_)))
            .expect("should have WHERE command");

        let IRCommandKind::Where(where_cmd) = &where_cmd.kind else {
            panic!("expected WHERE command");
        };

        // Freeze the predicate
        let frozen = where_cmd.predicate.freeze();

        // Check the TypedExpressionKind for now() and ts() calls
        fn has_now_apply(expr: &TypedExpression) -> bool {
            match &expr.kind {
                TypedExpressionKind::Apply(apply) => {
                    if apply.function_def.name() == "now" {
                        return true;
                    }
                    apply.parameter_binding.iter().any(|arg| has_now_apply(arg))
                }
                TypedExpressionKind::ArrayLiteral(arr) => {
                    arr.elements.iter().any(|e| has_now_apply(e))
                }
                TypedExpressionKind::TupleLiteral(tup) => {
                    tup.elements.iter().any(|e| has_now_apply(e))
                }
                TypedExpressionKind::StructLiteral(s) => {
                    s.fields.iter().any(|(_, e)| has_now_apply(e))
                }
                TypedExpressionKind::VariantIndexAccess(v) => has_now_apply(&v.value),
                TypedExpressionKind::FieldLookup(f) => has_now_apply(&f.value),
                TypedExpressionKind::Cast(c) => has_now_apply(&c.value),
                TypedExpressionKind::TsTrunc(t) => has_now_apply(&t.expression),
                TypedExpressionKind::BroadcastApply(b) => {
                    b.parameter_binding.iter().any(|arg| has_now_apply(arg))
                }
                TypedExpressionKind::ColumnReference(_)
                | TypedExpressionKind::Leaf
                | TypedExpressionKind::Lambda(_)
                | TypedExpressionKind::Error(_) => false,
            }
        }

        fn has_ts_apply(expr: &TypedExpression) -> bool {
            match &expr.kind {
                TypedExpressionKind::Apply(apply) => {
                    if apply.function_def.name() == "ts" {
                        return true;
                    }
                    apply.parameter_binding.iter().any(|arg| has_ts_apply(arg))
                }
                TypedExpressionKind::ArrayLiteral(arr) => {
                    arr.elements.iter().any(|e| has_ts_apply(e))
                }
                TypedExpressionKind::TupleLiteral(tup) => {
                    tup.elements.iter().any(|e| has_ts_apply(e))
                }
                TypedExpressionKind::StructLiteral(s) => {
                    s.fields.iter().any(|(_, e)| has_ts_apply(e))
                }
                TypedExpressionKind::VariantIndexAccess(v) => has_ts_apply(&v.value),
                TypedExpressionKind::FieldLookup(f) => has_ts_apply(&f.value),
                TypedExpressionKind::Cast(c) => has_ts_apply(&c.value),
                TypedExpressionKind::TsTrunc(t) => has_ts_apply(&t.expression),
                TypedExpressionKind::BroadcastApply(b) => {
                    b.parameter_binding.iter().any(|arg| has_ts_apply(arg))
                }
                TypedExpressionKind::ColumnReference(_)
                | TypedExpressionKind::Leaf
                | TypedExpressionKind::Lambda(_)
                | TypedExpressionKind::Error(_) => false,
            }
        }

        // Before freeze: should have now() calls
        assert!(
            has_now_apply(where_cmd.predicate.inner()),
            "before freeze should contain now() calls"
        );

        // After freeze: no now() calls, but should have ts() calls
        assert!(
            !has_now_apply(frozen.inner()),
            "frozen expression should not contain now() calls"
        );
        assert!(
            has_ts_apply(frozen.inner()),
            "frozen expression should contain ts() calls for resolved timestamps"
        );

        // Extract and verify the actual timestamp values are correct
        // The frozen expression should be: timestamp >= ts1 AND timestamp <= ts2
        // where ts1 ≈ now - 5h and ts2 ≈ now
        fn collect_ts_timestamps(expr: &TypedExpression) -> Vec<chrono::DateTime<chrono::Utc>> {
            let mut timestamps = Vec::new();
            collect_ts_timestamps_impl(expr, &mut timestamps);
            timestamps
        }

        fn collect_ts_timestamps_impl(
            expr: &TypedExpression,
            out: &mut Vec<chrono::DateTime<chrono::Utc>>,
        ) {
            match &expr.kind {
                TypedExpressionKind::Apply(apply) => {
                    if apply.function_def.name() == "ts" {
                        // Eval the ts() call to get the timestamp value
                        let env = Environment::default();
                        if let Ok(Value::Timestamp(ts)) = eval(expr, &env) {
                            out.push(*ts.instant());
                        }
                    }
                    for arg in apply.parameter_binding.iter() {
                        collect_ts_timestamps_impl(arg, out);
                    }
                }
                TypedExpressionKind::ArrayLiteral(arr) => {
                    for e in &arr.elements {
                        collect_ts_timestamps_impl(e, out);
                    }
                }
                TypedExpressionKind::TupleLiteral(tup) => {
                    for e in &tup.elements {
                        collect_ts_timestamps_impl(e, out);
                    }
                }
                TypedExpressionKind::StructLiteral(s) => {
                    for (_, e) in &s.fields {
                        collect_ts_timestamps_impl(e, out);
                    }
                }
                TypedExpressionKind::VariantIndexAccess(v) => {
                    collect_ts_timestamps_impl(&v.value, out)
                }
                TypedExpressionKind::FieldLookup(f) => collect_ts_timestamps_impl(&f.value, out),
                TypedExpressionKind::Cast(c) => collect_ts_timestamps_impl(&c.value, out),
                TypedExpressionKind::TsTrunc(t) => collect_ts_timestamps_impl(&t.expression, out),
                TypedExpressionKind::BroadcastApply(b) => {
                    for arg in b.parameter_binding.iter() {
                        collect_ts_timestamps_impl(arg, out);
                    }
                }
                TypedExpressionKind::ColumnReference(_)
                | TypedExpressionKind::Leaf
                | TypedExpressionKind::Lambda(_)
                | TypedExpressionKind::Error(_) => {}
            }
        }

        let timestamps = collect_ts_timestamps(frozen.inner());
        assert_eq!(
            timestamps.len(),
            2,
            "should have exactly 2 timestamp literals"
        );

        let now = chrono::Utc::now();
        let five_hours = chrono::Duration::hours(5);
        let one_minute = chrono::Duration::minutes(1);

        // One timestamp should be close to now - 5h, the other close to now
        let (earlier, later) = if timestamps[0] < timestamps[1] {
            (timestamps[0], timestamps[1])
        } else {
            (timestamps[1], timestamps[0])
        };

        let expected_earlier = now - five_hours;
        let expected_later = now;

        assert!(
            (earlier - expected_earlier).abs() < one_minute,
            "earlier timestamp {:?} should be within 1 minute of now - 5h ({:?})",
            earlier,
            expected_earlier
        );
        assert!(
            (later - expected_later).abs() < one_minute,
            "later timestamp {:?} should be within 1 minute of now ({:?})",
            later,
            expected_later
        );
    }
}