Skip to main content

datafusion_spark/function/string/
like.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::ArrayRef;
19use arrow::compute::like;
20use arrow::datatypes::{DataType, Field, FieldRef};
21use datafusion_common::{Result, exec_err, internal_err};
22use datafusion_expr::ColumnarValue;
23use datafusion_expr::{
24    ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
25};
26use datafusion_functions::utils::make_scalar_function;
27use std::any::Any;
28use std::sync::Arc;
29
30/// LIKE function for case-sensitive pattern matching
31/// <https://spark.apache.org/docs/latest/api/sql/index.html#like>
32#[derive(Debug, PartialEq, Eq, Hash)]
33pub struct SparkLike {
34    signature: Signature,
35}
36
37impl Default for SparkLike {
38    fn default() -> Self {
39        Self::new()
40    }
41}
42
43impl SparkLike {
44    pub fn new() -> Self {
45        Self {
46            signature: Signature::string(2, Volatility::Immutable),
47        }
48    }
49}
50
51impl ScalarUDFImpl for SparkLike {
52    fn as_any(&self) -> &dyn Any {
53        self
54    }
55
56    fn name(&self) -> &str {
57        "like"
58    }
59
60    fn signature(&self) -> &Signature {
61        &self.signature
62    }
63
64    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
65        internal_err!("return_field_from_args should be used instead")
66    }
67
68    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
69        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
70        Ok(Arc::new(Field::new(
71            self.name(),
72            DataType::Boolean,
73            nullable,
74        )))
75    }
76
77    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
78        make_scalar_function(spark_like, vec![])(&args.args)
79    }
80}
81
82/// Returns true if str matches pattern (case sensitive).
83pub fn spark_like(args: &[ArrayRef]) -> Result<ArrayRef> {
84    if args.len() != 2 {
85        return exec_err!("like function requires exactly 2 arguments");
86    }
87
88    let result = like(&args[0], &args[1])?;
89    Ok(Arc::new(result))
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95    use crate::function::utils::test::test_scalar_function;
96    use arrow::array::{Array, BooleanArray};
97    use arrow::datatypes::{DataType::Boolean, Field};
98    use datafusion_common::{Result, ScalarValue};
99    use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarUDFImpl};
100
101    macro_rules! test_like_string_invoke {
102        ($INPUT1:expr, $INPUT2:expr, $EXPECTED:expr) => {
103            test_scalar_function!(
104                SparkLike::new(),
105                vec![
106                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT1)),
107                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT2))
108                ],
109                $EXPECTED,
110                bool,
111                Boolean,
112                BooleanArray
113            );
114
115            test_scalar_function!(
116                SparkLike::new(),
117                vec![
118                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT1)),
119                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT2))
120                ],
121                $EXPECTED,
122                bool,
123                Boolean,
124                BooleanArray
125            );
126
127            test_scalar_function!(
128                SparkLike::new(),
129                vec![
130                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT1)),
131                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT2))
132                ],
133                $EXPECTED,
134                bool,
135                Boolean,
136                BooleanArray
137            );
138        };
139    }
140
141    #[test]
142    fn test_like_invoke() -> Result<()> {
143        test_like_string_invoke!(
144            Some(String::from("Spark")),
145            Some(String::from("_park")),
146            Ok(Some(true))
147        );
148        test_like_string_invoke!(
149            Some(String::from("Spark")),
150            Some(String::from("_PARK")),
151            Ok(Some(false)) // case-sensitive
152        );
153        test_like_string_invoke!(
154            Some(String::from("SPARK")),
155            Some(String::from("_park")),
156            Ok(Some(false)) // case-sensitive
157        );
158        test_like_string_invoke!(
159            Some(String::from("Spark")),
160            Some(String::from("Sp%")),
161            Ok(Some(true))
162        );
163        test_like_string_invoke!(
164            Some(String::from("Spark")),
165            Some(String::from("SP%")),
166            Ok(Some(false)) // case-sensitive
167        );
168        test_like_string_invoke!(
169            Some(String::from("Spark")),
170            Some(String::from("%ark")),
171            Ok(Some(true))
172        );
173        test_like_string_invoke!(
174            Some(String::from("Spark")),
175            Some(String::from("%ARK")),
176            Ok(Some(false)) // case-sensitive
177        );
178        test_like_string_invoke!(
179            Some(String::from("Spark")),
180            Some(String::from("xyz")),
181            Ok(Some(false))
182        );
183        test_like_string_invoke!(None, Some(String::from("_park")), Ok(None));
184        test_like_string_invoke!(Some(String::from("Spark")), None, Ok(None));
185        test_like_string_invoke!(None, None, Ok(None));
186
187        Ok(())
188    }
189
190    #[test]
191    fn test_like_nullability() {
192        let like = SparkLike::new();
193
194        // Test with non-nullable arguments
195        let non_nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, false));
196        let non_nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, false));
197
198        let both_non_nullable = like
199            .return_field_from_args(ReturnFieldArgs {
200                arg_fields: &[
201                    Arc::clone(&non_nullable_field1),
202                    Arc::clone(&non_nullable_field2),
203                ],
204                scalar_arguments: &[None, None],
205            })
206            .unwrap();
207
208        // The result should not be nullable when both inputs are non-nullable
209        assert!(!both_non_nullable.is_nullable());
210        assert_eq!(both_non_nullable.data_type(), &Boolean);
211
212        // Test with first argument nullable
213        let nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, true));
214
215        let first_nullable = like
216            .return_field_from_args(ReturnFieldArgs {
217                arg_fields: &[
218                    Arc::clone(&nullable_field1),
219                    Arc::clone(&non_nullable_field2),
220                ],
221                scalar_arguments: &[None, None],
222            })
223            .unwrap();
224
225        // The result should be nullable when first input is nullable
226        assert!(first_nullable.is_nullable());
227        assert_eq!(first_nullable.data_type(), &Boolean);
228
229        // Test with second argument nullable
230        let nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, true));
231
232        let second_nullable = like
233            .return_field_from_args(ReturnFieldArgs {
234                arg_fields: &[
235                    Arc::clone(&non_nullable_field1),
236                    Arc::clone(&nullable_field2),
237                ],
238                scalar_arguments: &[None, None],
239            })
240            .unwrap();
241
242        // The result should be nullable when second input is nullable
243        assert!(second_nullable.is_nullable());
244        assert_eq!(second_nullable.data_type(), &Boolean);
245
246        // Test with both arguments nullable
247        let first_second_nullable = like
248            .return_field_from_args(ReturnFieldArgs {
249                arg_fields: &[Arc::clone(&nullable_field1), Arc::clone(&nullable_field2)],
250                scalar_arguments: &[None, None],
251            })
252            .unwrap();
253
254        // The result should be nullable when both inputs are nullable
255        assert!(first_second_nullable.is_nullable());
256        assert_eq!(first_second_nullable.data_type(), &Boolean);
257    }
258}