datafusion_spark/function/string/
like.rs1use arrow::array::ArrayRef;
19use arrow::compute::like;
20use arrow::datatypes::{DataType, Field, FieldRef};
21use datafusion_common::{Result, exec_err, internal_err};
22use datafusion_expr::ColumnarValue;
23use datafusion_expr::{
24 ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
25};
26use datafusion_functions::utils::make_scalar_function;
27use std::sync::Arc;
28
29#[derive(Debug, PartialEq, Eq, Hash)]
32pub struct SparkLike {
33 signature: Signature,
34}
35
36impl Default for SparkLike {
37 fn default() -> Self {
38 Self::new()
39 }
40}
41
42impl SparkLike {
43 pub fn new() -> Self {
44 Self {
45 signature: Signature::string(2, Volatility::Immutable),
46 }
47 }
48}
49
50impl ScalarUDFImpl for SparkLike {
51 fn name(&self) -> &str {
52 "like"
53 }
54
55 fn signature(&self) -> &Signature {
56 &self.signature
57 }
58
59 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
60 internal_err!("return_field_from_args should be used instead")
61 }
62
63 fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
64 let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
65 Ok(Arc::new(Field::new(
66 self.name(),
67 DataType::Boolean,
68 nullable,
69 )))
70 }
71
72 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
73 make_scalar_function(spark_like, vec![])(&args.args)
74 }
75}
76
77pub fn spark_like(args: &[ArrayRef]) -> Result<ArrayRef> {
79 if args.len() != 2 {
80 return exec_err!("like function requires exactly 2 arguments");
81 }
82
83 let result = like(&args[0], &args[1])?;
84 Ok(Arc::new(result))
85}
86
87#[cfg(test)]
88mod tests {
89 use super::*;
90 use crate::function::utils::test::test_scalar_function;
91 use arrow::array::{Array, BooleanArray};
92 use arrow::datatypes::DataType::Boolean;
93 use datafusion_common::ScalarValue;
94
95 macro_rules! test_like_string_invoke {
96 ($INPUT1:expr, $INPUT2:expr, $EXPECTED:expr) => {
97 test_scalar_function!(
98 SparkLike::new(),
99 vec![
100 ColumnarValue::Scalar(ScalarValue::Utf8($INPUT1)),
101 ColumnarValue::Scalar(ScalarValue::Utf8($INPUT2))
102 ],
103 $EXPECTED,
104 bool,
105 Boolean,
106 BooleanArray
107 );
108
109 test_scalar_function!(
110 SparkLike::new(),
111 vec![
112 ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT1)),
113 ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT2))
114 ],
115 $EXPECTED,
116 bool,
117 Boolean,
118 BooleanArray
119 );
120
121 test_scalar_function!(
122 SparkLike::new(),
123 vec![
124 ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT1)),
125 ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT2))
126 ],
127 $EXPECTED,
128 bool,
129 Boolean,
130 BooleanArray
131 );
132 };
133 }
134
135 #[test]
136 fn test_like_invoke() -> Result<()> {
137 test_like_string_invoke!(
138 Some(String::from("Spark")),
139 Some(String::from("_park")),
140 Ok(Some(true))
141 );
142 test_like_string_invoke!(
143 Some(String::from("Spark")),
144 Some(String::from("_PARK")),
145 Ok(Some(false)) );
147 test_like_string_invoke!(
148 Some(String::from("SPARK")),
149 Some(String::from("_park")),
150 Ok(Some(false)) );
152 test_like_string_invoke!(
153 Some(String::from("Spark")),
154 Some(String::from("Sp%")),
155 Ok(Some(true))
156 );
157 test_like_string_invoke!(
158 Some(String::from("Spark")),
159 Some(String::from("SP%")),
160 Ok(Some(false)) );
162 test_like_string_invoke!(
163 Some(String::from("Spark")),
164 Some(String::from("%ark")),
165 Ok(Some(true))
166 );
167 test_like_string_invoke!(
168 Some(String::from("Spark")),
169 Some(String::from("%ARK")),
170 Ok(Some(false)) );
172 test_like_string_invoke!(
173 Some(String::from("Spark")),
174 Some(String::from("xyz")),
175 Ok(Some(false))
176 );
177 test_like_string_invoke!(None, Some(String::from("_park")), Ok(None));
178 test_like_string_invoke!(Some(String::from("Spark")), None, Ok(None));
179 test_like_string_invoke!(None, None, Ok(None));
180
181 Ok(())
182 }
183
184 #[test]
185 fn test_like_nullability() {
186 let like = SparkLike::new();
187
188 let non_nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, false));
190 let non_nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, false));
191
192 let both_non_nullable = like
193 .return_field_from_args(ReturnFieldArgs {
194 arg_fields: &[
195 Arc::clone(&non_nullable_field1),
196 Arc::clone(&non_nullable_field2),
197 ],
198 scalar_arguments: &[None, None],
199 })
200 .unwrap();
201
202 assert!(!both_non_nullable.is_nullable());
204 assert_eq!(both_non_nullable.data_type(), &Boolean);
205
206 let nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, true));
208
209 let first_nullable = like
210 .return_field_from_args(ReturnFieldArgs {
211 arg_fields: &[
212 Arc::clone(&nullable_field1),
213 Arc::clone(&non_nullable_field2),
214 ],
215 scalar_arguments: &[None, None],
216 })
217 .unwrap();
218
219 assert!(first_nullable.is_nullable());
221 assert_eq!(first_nullable.data_type(), &Boolean);
222
223 let nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, true));
225
226 let second_nullable = like
227 .return_field_from_args(ReturnFieldArgs {
228 arg_fields: &[
229 Arc::clone(&non_nullable_field1),
230 Arc::clone(&nullable_field2),
231 ],
232 scalar_arguments: &[None, None],
233 })
234 .unwrap();
235
236 assert!(second_nullable.is_nullable());
238 assert_eq!(second_nullable.data_type(), &Boolean);
239
240 let first_second_nullable = like
242 .return_field_from_args(ReturnFieldArgs {
243 arg_fields: &[Arc::clone(&nullable_field1), Arc::clone(&nullable_field2)],
244 scalar_arguments: &[None, None],
245 })
246 .unwrap();
247
248 assert!(first_second_nullable.is_nullable());
250 assert_eq!(first_second_nullable.data_type(), &Boolean);
251 }
252}