use std::rc::Rc;
use hamelin_lib::err::TranslationError;
use hamelin_lib::tree::{
ast::{command::Command, identifier::Identifier, identifier::SimpleIdentifier},
builder::{self, column_ref, drop_command, explode_command, let_command},
typed_ast::{
command::{TypedCommand, TypedCommandKind, TypedExplodeCommand},
context::StatementTranslationContext,
expression::TypedExpressionKind,
pipeline::TypedPipeline,
},
};
use super::super::unique::UniqueNameGenerator;
pub fn normalize_explode(
pipeline: Rc<TypedPipeline>,
ctx: &mut StatementTranslationContext,
) -> Result<Rc<TypedPipeline>, Rc<TranslationError>> {
if !pipeline
.valid_ref()?
.commands
.iter()
.any(explode_needs_normalization)
{
return Ok(pipeline);
}
let valid = pipeline.valid_ref()?;
let mut name_gen = UniqueNameGenerator::new("__explode");
let mut pipe_builder = builder::pipeline();
for cmd in &valid.commands {
for c in normalize_command(cmd, &mut name_gen)? {
pipe_builder = pipe_builder.command(c);
}
}
let new_ast = pipe_builder.build().at(pipeline.ast.span);
Ok(Rc::new(TypedPipeline::from_ast_with_context(
Rc::new(new_ast),
ctx,
)))
}
fn explode_needs_normalization(cmd: &Rc<TypedCommand>) -> bool {
let TypedCommandKind::Explode(explode_cmd) = &cmd.kind else {
return false;
};
!is_canonical_explode(explode_cmd)
}
fn is_canonical_explode(explode_cmd: &TypedExplodeCommand) -> bool {
let Ok(Identifier::Simple(simple_id)) = explode_cmd.identifier.valid_ref() else {
return false;
};
let TypedExpressionKind::ColumnReference(col_ref) = &explode_cmd.expression.kind else {
return false;
};
let Ok(col_name) = col_ref.column_name.valid_ref() else {
return false;
};
simple_id.as_str() == col_name.as_str()
}
fn normalize_command(
cmd: &Rc<TypedCommand>,
name_gen: &mut UniqueNameGenerator,
) -> Result<Vec<Rc<Command>>, Rc<TranslationError>> {
let TypedCommandKind::Explode(explode_cmd) = &cmd.kind else {
return Ok(vec![cmd.ast.clone()]);
};
if is_canonical_explode(explode_cmd) {
return Ok(vec![cmd.ast.clone()]);
}
transform_explode(explode_cmd, cmd, name_gen)
}
fn transform_explode(
explode_cmd: &TypedExplodeCommand,
cmd: &TypedCommand,
name_gen: &mut UniqueNameGenerator,
) -> Result<Vec<Rc<Command>>, Rc<TranslationError>> {
let identifier = explode_cmd.identifier.valid_ref()?;
match identifier {
Identifier::Simple(simple_id) => {
let col_name = simple_id.clone();
let let_cmd = let_command()
.named_field(
col_name.clone(),
explode_cmd.expression.ast.as_ref().clone(),
)
.at(cmd.ast.span)
.build();
let explode = explode_command()
.named_field(col_name.clone(), column_ref(col_name.as_str()))
.at(cmd.ast.span)
.build();
Ok(vec![Rc::new(let_cmd), Rc::new(explode)])
}
Identifier::Compound(compound) => {
let temp_name: SimpleIdentifier = name_gen.next();
let let_expr = let_command()
.named_field(
temp_name.clone(),
explode_cmd.expression.ast.as_ref().clone(),
)
.at(cmd.ast.span)
.build();
let explode = explode_command()
.named_field(temp_name.clone(), column_ref(temp_name.as_str()))
.at(cmd.ast.span)
.build();
let original: Identifier = compound.clone().into();
let restore = let_command()
.named_field(original, column_ref(temp_name.as_str()))
.at(cmd.ast.span)
.build();
let cleanup = drop_command().field(temp_name).at(cmd.ast.span).build();
Ok(vec![
Rc::new(let_expr),
Rc::new(explode),
Rc::new(restore),
Rc::new(cleanup),
])
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use hamelin_lib::{
tree::{
ast::{pipeline::Pipeline, IntoTyped, TypeCheckExecutor},
builder::{
array, column_ref, drop_command, explode_command, let_command, pipeline,
select_command,
},
},
types::{array::Array, struct_type::Struct, INT},
};
use pretty_assertions::assert_eq;
use rstest::rstest;
use std::rc::Rc;
#[rstest]
#[case::canonical_unchanged(
pipeline()
.command(select_command().named_field("arr", array().element(1).element(2)).build())
.command(explode_command().named_field("arr", column_ref("arr")).build())
.build(),
pipeline()
.command(select_command().named_field("arr", array().element(1).element(2)).build())
.command(explode_command().named_field("arr", column_ref("arr")).build())
.build(),
Struct::default().with_str("arr", INT)
)]
#[case::simple_id_different_expr(
pipeline()
.command(select_command().named_field("arr", array().element(1).element(2)).build())
.command(explode_command().named_field("x", column_ref("arr")).build())
.build(),
pipeline()
.command(select_command().named_field("arr", array().element(1).element(2)).build())
.command(let_command().named_field("x", column_ref("arr")).build())
.command(explode_command().named_field("x", column_ref("x")).build())
.build(),
Struct::default().with_str("x", INT).with_str("arr", Array::new(INT).into())
)]
#[case::no_explode_passthrough(
pipeline()
.command(select_command().named_field("a", 1).named_field("b", 2).build())
.build(),
pipeline()
.command(select_command().named_field("a", 1).named_field("b", 2).build())
.build(),
Struct::default().with_str("a", INT).with_str("b", INT)
)]
#[case::multiple_explodes(
pipeline()
.command(select_command()
.named_field("arr1", array().element(1))
.named_field("arr2", array().element(2))
.build())
.command(explode_command().named_field("x", column_ref("arr1")).build())
.command(explode_command().named_field("y", column_ref("arr2")).build())
.build(),
pipeline()
.command(select_command()
.named_field("arr1", array().element(1))
.named_field("arr2", array().element(2))
.build())
.command(let_command().named_field("x", column_ref("arr1")).build())
.command(explode_command().named_field("x", column_ref("x")).build())
.command(let_command().named_field("y", column_ref("arr2")).build())
.command(explode_command().named_field("y", column_ref("y")).build())
.build(),
Struct::default()
.with_str("y", INT)
.with_str("x", INT)
.with_str("arr1", Array::new(INT).into())
.with_str("arr2", Array::new(INT).into())
)]
#[case::compound_id(
pipeline()
.command(select_command().named_field("arr", array().element(1)).build())
.command(explode_command()
.named_field(
hamelin_lib::tree::ast::identifier::CompoundIdentifier::new("result".into(), "item".into(), vec![]),
column_ref("arr")
)
.build())
.build(),
pipeline()
.command(select_command().named_field("arr", array().element(1)).build())
.command(let_command().named_field("__explode_0", column_ref("arr")).build())
.command(explode_command().named_field("__explode_0", column_ref("__explode_0")).build())
.command(let_command()
.named_field(
hamelin_lib::tree::ast::identifier::CompoundIdentifier::new("result".into(), "item".into(), vec![]),
column_ref("__explode_0")
)
.build())
.command(drop_command().field("__explode_0").build())
.build(),
Struct::default()
.with_str("result", Struct::default().with_str("item", INT).into())
.with_str("arr", Array::new(INT).into())
)]
fn test_normalize_explode(
#[case] input: Pipeline,
#[case] expected: Pipeline,
#[case] expected_output_schema: Struct,
) {
let input_typed = input.typed_with().typed();
let expected_typed = expected.typed_with().typed();
let mut ctx = StatementTranslationContext::default();
let result = normalize_explode(Rc::new(input_typed), &mut ctx).unwrap();
assert_eq!(result.ast, expected_typed.ast);
let result_schema = result.environment().flatten();
assert_eq!(result_schema, expected_output_schema);
}
}