datafusion_spark/function/string/
like.rs1use arrow::array::ArrayRef;
19use arrow::compute::like;
20use arrow::datatypes::{DataType, Field, FieldRef};
21use datafusion_common::{Result, exec_err, internal_err};
22use datafusion_expr::ColumnarValue;
23use datafusion_expr::{
24 ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
25};
26use datafusion_functions::utils::make_scalar_function;
27use std::any::Any;
28use std::sync::Arc;
29
30#[derive(Debug, PartialEq, Eq, Hash)]
33pub struct SparkLike {
34 signature: Signature,
35}
36
37impl Default for SparkLike {
38 fn default() -> Self {
39 Self::new()
40 }
41}
42
43impl SparkLike {
44 pub fn new() -> Self {
45 Self {
46 signature: Signature::string(2, Volatility::Immutable),
47 }
48 }
49}
50
51impl ScalarUDFImpl for SparkLike {
52 fn as_any(&self) -> &dyn Any {
53 self
54 }
55
56 fn name(&self) -> &str {
57 "like"
58 }
59
60 fn signature(&self) -> &Signature {
61 &self.signature
62 }
63
64 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
65 internal_err!("return_field_from_args should be used instead")
66 }
67
68 fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
69 let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
70 Ok(Arc::new(Field::new(
71 self.name(),
72 DataType::Boolean,
73 nullable,
74 )))
75 }
76
77 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
78 make_scalar_function(spark_like, vec![])(&args.args)
79 }
80}
81
82pub fn spark_like(args: &[ArrayRef]) -> Result<ArrayRef> {
84 if args.len() != 2 {
85 return exec_err!("like function requires exactly 2 arguments");
86 }
87
88 let result = like(&args[0], &args[1])?;
89 Ok(Arc::new(result))
90}
91
92#[cfg(test)]
93mod tests {
94 use super::*;
95 use crate::function::utils::test::test_scalar_function;
96 use arrow::array::{Array, BooleanArray};
97 use arrow::datatypes::{DataType::Boolean, Field};
98 use datafusion_common::{Result, ScalarValue};
99 use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarUDFImpl};
100
101 macro_rules! test_like_string_invoke {
102 ($INPUT1:expr, $INPUT2:expr, $EXPECTED:expr) => {
103 test_scalar_function!(
104 SparkLike::new(),
105 vec![
106 ColumnarValue::Scalar(ScalarValue::Utf8($INPUT1)),
107 ColumnarValue::Scalar(ScalarValue::Utf8($INPUT2))
108 ],
109 $EXPECTED,
110 bool,
111 Boolean,
112 BooleanArray
113 );
114
115 test_scalar_function!(
116 SparkLike::new(),
117 vec![
118 ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT1)),
119 ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT2))
120 ],
121 $EXPECTED,
122 bool,
123 Boolean,
124 BooleanArray
125 );
126
127 test_scalar_function!(
128 SparkLike::new(),
129 vec![
130 ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT1)),
131 ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT2))
132 ],
133 $EXPECTED,
134 bool,
135 Boolean,
136 BooleanArray
137 );
138 };
139 }
140
141 #[test]
142 fn test_like_invoke() -> Result<()> {
143 test_like_string_invoke!(
144 Some(String::from("Spark")),
145 Some(String::from("_park")),
146 Ok(Some(true))
147 );
148 test_like_string_invoke!(
149 Some(String::from("Spark")),
150 Some(String::from("_PARK")),
151 Ok(Some(false)) );
153 test_like_string_invoke!(
154 Some(String::from("SPARK")),
155 Some(String::from("_park")),
156 Ok(Some(false)) );
158 test_like_string_invoke!(
159 Some(String::from("Spark")),
160 Some(String::from("Sp%")),
161 Ok(Some(true))
162 );
163 test_like_string_invoke!(
164 Some(String::from("Spark")),
165 Some(String::from("SP%")),
166 Ok(Some(false)) );
168 test_like_string_invoke!(
169 Some(String::from("Spark")),
170 Some(String::from("%ark")),
171 Ok(Some(true))
172 );
173 test_like_string_invoke!(
174 Some(String::from("Spark")),
175 Some(String::from("%ARK")),
176 Ok(Some(false)) );
178 test_like_string_invoke!(
179 Some(String::from("Spark")),
180 Some(String::from("xyz")),
181 Ok(Some(false))
182 );
183 test_like_string_invoke!(None, Some(String::from("_park")), Ok(None));
184 test_like_string_invoke!(Some(String::from("Spark")), None, Ok(None));
185 test_like_string_invoke!(None, None, Ok(None));
186
187 Ok(())
188 }
189
190 #[test]
191 fn test_like_nullability() {
192 let like = SparkLike::new();
193
194 let non_nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, false));
196 let non_nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, false));
197
198 let both_non_nullable = like
199 .return_field_from_args(ReturnFieldArgs {
200 arg_fields: &[
201 Arc::clone(&non_nullable_field1),
202 Arc::clone(&non_nullable_field2),
203 ],
204 scalar_arguments: &[None, None],
205 })
206 .unwrap();
207
208 assert!(!both_non_nullable.is_nullable());
210 assert_eq!(both_non_nullable.data_type(), &Boolean);
211
212 let nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, true));
214
215 let first_nullable = like
216 .return_field_from_args(ReturnFieldArgs {
217 arg_fields: &[
218 Arc::clone(&nullable_field1),
219 Arc::clone(&non_nullable_field2),
220 ],
221 scalar_arguments: &[None, None],
222 })
223 .unwrap();
224
225 assert!(first_nullable.is_nullable());
227 assert_eq!(first_nullable.data_type(), &Boolean);
228
229 let nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, true));
231
232 let second_nullable = like
233 .return_field_from_args(ReturnFieldArgs {
234 arg_fields: &[
235 Arc::clone(&non_nullable_field1),
236 Arc::clone(&nullable_field2),
237 ],
238 scalar_arguments: &[None, None],
239 })
240 .unwrap();
241
242 assert!(second_nullable.is_nullable());
244 assert_eq!(second_nullable.data_type(), &Boolean);
245
246 let first_second_nullable = like
248 .return_field_from_args(ReturnFieldArgs {
249 arg_fields: &[Arc::clone(&nullable_field1), Arc::clone(&nullable_field2)],
250 scalar_arguments: &[None, None],
251 })
252 .unwrap();
253
254 assert!(first_second_nullable.is_nullable());
256 assert_eq!(first_second_nullable.data_type(), &Boolean);
257 }
258}