Skip to main content

datafusion_spark/function/string/
like.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::ArrayRef;
19use arrow::compute::like;
20use arrow::datatypes::{DataType, Field, FieldRef};
21use datafusion_common::{Result, exec_err, internal_err};
22use datafusion_expr::ColumnarValue;
23use datafusion_expr::{
24    ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
25};
26use datafusion_functions::utils::make_scalar_function;
27use std::sync::Arc;
28
29/// LIKE function for case-sensitive pattern matching
30/// <https://spark.apache.org/docs/latest/api/sql/index.html#like>
31#[derive(Debug, PartialEq, Eq, Hash)]
32pub struct SparkLike {
33    signature: Signature,
34}
35
36impl Default for SparkLike {
37    fn default() -> Self {
38        Self::new()
39    }
40}
41
42impl SparkLike {
43    pub fn new() -> Self {
44        Self {
45            signature: Signature::string(2, Volatility::Immutable),
46        }
47    }
48}
49
50impl ScalarUDFImpl for SparkLike {
51    fn name(&self) -> &str {
52        "like"
53    }
54
55    fn signature(&self) -> &Signature {
56        &self.signature
57    }
58
59    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
60        internal_err!("return_field_from_args should be used instead")
61    }
62
63    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
64        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
65        Ok(Arc::new(Field::new(
66            self.name(),
67            DataType::Boolean,
68            nullable,
69        )))
70    }
71
72    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
73        make_scalar_function(spark_like, vec![])(&args.args)
74    }
75}
76
77/// Returns true if str matches pattern (case sensitive).
78pub fn spark_like(args: &[ArrayRef]) -> Result<ArrayRef> {
79    if args.len() != 2 {
80        return exec_err!("like function requires exactly 2 arguments");
81    }
82
83    let result = like(&args[0], &args[1])?;
84    Ok(Arc::new(result))
85}
86
87#[cfg(test)]
88mod tests {
89    use super::*;
90    use crate::function::utils::test::test_scalar_function;
91    use arrow::array::{Array, BooleanArray};
92    use arrow::datatypes::DataType::Boolean;
93    use datafusion_common::ScalarValue;
94
95    macro_rules! test_like_string_invoke {
96        ($INPUT1:expr, $INPUT2:expr, $EXPECTED:expr) => {
97            test_scalar_function!(
98                SparkLike::new(),
99                vec![
100                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT1)),
101                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT2))
102                ],
103                $EXPECTED,
104                bool,
105                Boolean,
106                BooleanArray
107            );
108
109            test_scalar_function!(
110                SparkLike::new(),
111                vec![
112                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT1)),
113                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT2))
114                ],
115                $EXPECTED,
116                bool,
117                Boolean,
118                BooleanArray
119            );
120
121            test_scalar_function!(
122                SparkLike::new(),
123                vec![
124                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT1)),
125                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT2))
126                ],
127                $EXPECTED,
128                bool,
129                Boolean,
130                BooleanArray
131            );
132        };
133    }
134
135    #[test]
136    fn test_like_invoke() -> Result<()> {
137        test_like_string_invoke!(
138            Some(String::from("Spark")),
139            Some(String::from("_park")),
140            Ok(Some(true))
141        );
142        test_like_string_invoke!(
143            Some(String::from("Spark")),
144            Some(String::from("_PARK")),
145            Ok(Some(false)) // case-sensitive
146        );
147        test_like_string_invoke!(
148            Some(String::from("SPARK")),
149            Some(String::from("_park")),
150            Ok(Some(false)) // case-sensitive
151        );
152        test_like_string_invoke!(
153            Some(String::from("Spark")),
154            Some(String::from("Sp%")),
155            Ok(Some(true))
156        );
157        test_like_string_invoke!(
158            Some(String::from("Spark")),
159            Some(String::from("SP%")),
160            Ok(Some(false)) // case-sensitive
161        );
162        test_like_string_invoke!(
163            Some(String::from("Spark")),
164            Some(String::from("%ark")),
165            Ok(Some(true))
166        );
167        test_like_string_invoke!(
168            Some(String::from("Spark")),
169            Some(String::from("%ARK")),
170            Ok(Some(false)) // case-sensitive
171        );
172        test_like_string_invoke!(
173            Some(String::from("Spark")),
174            Some(String::from("xyz")),
175            Ok(Some(false))
176        );
177        test_like_string_invoke!(None, Some(String::from("_park")), Ok(None));
178        test_like_string_invoke!(Some(String::from("Spark")), None, Ok(None));
179        test_like_string_invoke!(None, None, Ok(None));
180
181        Ok(())
182    }
183
184    #[test]
185    fn test_like_nullability() {
186        let like = SparkLike::new();
187
188        // Test with non-nullable arguments
189        let non_nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, false));
190        let non_nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, false));
191
192        let both_non_nullable = like
193            .return_field_from_args(ReturnFieldArgs {
194                arg_fields: &[
195                    Arc::clone(&non_nullable_field1),
196                    Arc::clone(&non_nullable_field2),
197                ],
198                scalar_arguments: &[None, None],
199            })
200            .unwrap();
201
202        // The result should not be nullable when both inputs are non-nullable
203        assert!(!both_non_nullable.is_nullable());
204        assert_eq!(both_non_nullable.data_type(), &Boolean);
205
206        // Test with first argument nullable
207        let nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, true));
208
209        let first_nullable = like
210            .return_field_from_args(ReturnFieldArgs {
211                arg_fields: &[
212                    Arc::clone(&nullable_field1),
213                    Arc::clone(&non_nullable_field2),
214                ],
215                scalar_arguments: &[None, None],
216            })
217            .unwrap();
218
219        // The result should be nullable when first input is nullable
220        assert!(first_nullable.is_nullable());
221        assert_eq!(first_nullable.data_type(), &Boolean);
222
223        // Test with second argument nullable
224        let nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, true));
225
226        let second_nullable = like
227            .return_field_from_args(ReturnFieldArgs {
228                arg_fields: &[
229                    Arc::clone(&non_nullable_field1),
230                    Arc::clone(&nullable_field2),
231                ],
232                scalar_arguments: &[None, None],
233            })
234            .unwrap();
235
236        // The result should be nullable when second input is nullable
237        assert!(second_nullable.is_nullable());
238        assert_eq!(second_nullable.data_type(), &Boolean);
239
240        // Test with both arguments nullable
241        let first_second_nullable = like
242            .return_field_from_args(ReturnFieldArgs {
243                arg_fields: &[Arc::clone(&nullable_field1), Arc::clone(&nullable_field2)],
244                scalar_arguments: &[None, None],
245            })
246            .unwrap();
247
248        // The result should be nullable when both inputs are nullable
249        assert!(first_second_nullable.is_nullable());
250        assert_eq!(first_second_nullable.data_type(), &Boolean);
251    }
252}