use datafusion::functions::regex::expr_fn as regex_fn;
use hamelin_lib::func::defs::{
RegexpCount, RegexpExtract2, RegexpExtract3, RegexpExtractAll2, RegexpExtractAll3, RegexpLike,
RegexpPosition2, RegexpPosition3, RegexpPosition4, RegexpReplace2, RegexpReplace3, RegexpSplit,
};
use super::DataFusionTranslationRegistry;
pub fn register(registry: &mut DataFusionTranslationRegistry) {
registry.register::<RegexpCount>(|mut params| {
let string = params.take()?.expr;
let pattern = params.take()?.expr;
Ok(regex_fn::regexp_count(string, pattern, None, None))
});
registry.register::<RegexpLike>(|mut params| {
let string = params.take()?.expr;
let pattern = params.take()?.expr;
Ok(regex_fn::regexp_like(string, pattern, None))
});
registry.register::<RegexpExtract2>(|mut params| {
use datafusion::logical_expr::{lit, BinaryExpr, Expr, Operator};
let string = params.take()?.expr;
let pattern = params.take()?.expr;
let inner_concat = Expr::BinaryExpr(BinaryExpr {
left: Box::new(lit("(")),
op: Operator::StringConcat,
right: Box::new(pattern),
});
let wrapped_pattern = Expr::BinaryExpr(BinaryExpr {
left: Box::new(inner_concat),
op: Operator::StringConcat,
right: Box::new(lit(")")),
});
let matches = regex_fn::regexp_match(string, wrapped_pattern, None);
Ok(datafusion_functions_nested::expr_fn::array_element(
matches,
lit(1i64),
))
});
registry.register::<RegexpExtract3>(|mut params| {
let string = params.take()?.expr;
let pattern = params.take()?.expr;
let group = params.take()?.expr;
let matches = regex_fn::regexp_match(string, pattern, None);
Ok(datafusion_functions_nested::expr_fn::array_element(
matches, group,
))
});
registry.register::<RegexpExtractAll2>(|mut params| {
let string = params.take()?.expr;
let pattern = params.take()?.expr;
Ok(crate::udf::regexp_extract_all_udf().call(vec![string, pattern]))
});
registry.register::<RegexpExtractAll3>(|mut params| {
let string = params.take()?.expr;
let pattern = params.take()?.expr;
let group = params.take()?.expr;
Ok(crate::udf::regexp_extract_all_udf().call(vec![string, pattern, group]))
});
registry.register::<RegexpReplace2>(|mut params| {
let string = params.take()?.expr;
let pattern = params.take()?.expr;
Ok(regex_fn::regexp_replace(
string,
pattern,
datafusion::logical_expr::lit(""),
Some(datafusion::logical_expr::lit("g")), ))
});
registry.register::<RegexpReplace3>(|mut params| {
let string = params.take()?.expr;
let pattern = params.take()?.expr;
let replacement = params.take()?.expr;
Ok(regex_fn::regexp_replace(
string,
pattern,
replacement,
Some(datafusion::logical_expr::lit("g")), ))
});
registry.register::<RegexpSplit>(|mut params| {
let string = params.take()?.expr;
let pattern = params.take()?.expr;
Ok(crate::udf::regexp_split_udf().call(vec![string, pattern]))
});
registry.register::<RegexpPosition2>(|mut params| {
let string = params.take()?.expr;
let pattern = params.take()?.expr;
Ok(regex_fn::regexp_instr(
string, pattern, None, None, None, None, None,
))
});
registry.register::<RegexpPosition3>(|mut params| {
let string = params.take()?.expr;
let pattern = params.take()?.expr;
let start = params.take()?.expr;
Ok(regex_fn::regexp_instr(
string,
pattern,
Some(start),
None,
None,
None,
None,
))
});
registry.register::<RegexpPosition4>(|mut params| {
let string = params.take()?.expr;
let pattern = params.take()?.expr;
let start = params.take()?.expr;
let occurrence = params.take()?.expr;
Ok(regex_fn::regexp_instr(
string,
pattern,
Some(start),
Some(occurrence),
None,
None,
None,
))
});
}