use std::any::Any;
use std::sync::Arc;
use datafusion::arrow::array::{ArrayRef, BooleanArray};
use datafusion::arrow::datatypes::DataType;
use datafusion::common::Result as DfResult;
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct TextMatch {
signature: Signature,
}
impl Default for TextMatch {
fn default() -> Self {
Self::new()
}
}
impl TextMatch {
pub fn new() -> Self {
Self {
signature: Signature::exact(vec![DataType::Utf8, DataType::Utf8], Volatility::Stable),
}
}
}
impl ScalarUDFImpl for TextMatch {
fn as_any(&self) -> &dyn Any {
self
}
fn name(&self) -> &str {
"text_match"
}
fn signature(&self) -> &Signature {
&self.signature
}
fn return_type(&self, _arg_types: &[DataType]) -> DfResult<DataType> {
Ok(DataType::Boolean)
}
fn invoke_with_args(
&self,
args: datafusion::logical_expr::ScalarFunctionArgs,
) -> DfResult<ColumnarValue> {
let len = match &args.args[0] {
ColumnarValue::Array(arr) => arr.len(),
ColumnarValue::Scalar(_) => 1,
};
let trues = BooleanArray::from(vec![true; len]);
Ok(ColumnarValue::Array(Arc::new(trues) as ArrayRef))
}
}
#[cfg(test)]
mod tests {
use super::*;
use datafusion::arrow::array::StringArray;
#[test]
fn returns_boolean() {
let udf = TextMatch::new();
assert_eq!(
udf.return_type(&[DataType::Utf8, DataType::Utf8]).unwrap(),
DataType::Boolean
);
}
#[test]
fn invoke_returns_true() {
use datafusion::arrow::datatypes::Field;
use datafusion::logical_expr::ScalarFunctionArgs;
let udf = TextMatch::new();
let field =
ColumnarValue::Array(Arc::new(StringArray::from(vec!["body", "body"])) as ArrayRef);
let query =
ColumnarValue::Array(
Arc::new(StringArray::from(vec!["test query", "test query"])) as ArrayRef,
);
let args = ScalarFunctionArgs {
args: vec![field, query],
arg_fields: vec![],
number_rows: 2,
return_field: Arc::new(Field::new("", DataType::Boolean, false)),
config_options: Arc::new(datafusion::config::ConfigOptions::new()),
};
let result = udf.invoke_with_args(args).unwrap();
match result {
ColumnarValue::Array(arr) => {
let bool_arr = arr.as_any().downcast_ref::<BooleanArray>().unwrap();
assert_eq!(bool_arr.len(), 2);
assert!(bool_arr.value(0));
assert!(bool_arr.value(1));
}
_ => panic!("expected array"),
}
}
}