datafusion_physical_expr/expressions/
like.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::PhysicalExpr;
19use arrow::datatypes::{DataType, Schema};
20use arrow::record_batch::RecordBatch;
21use datafusion_common::{Result, assert_or_internal_err};
22use datafusion_expr::{ColumnarValue, Operator};
23use datafusion_physical_expr_common::datum::apply_cmp;
24use std::hash::Hash;
25use std::{any::Any, sync::Arc};
26
27// Like expression
28#[derive(Debug, Eq)]
29pub struct LikeExpr {
30    negated: bool,
31    case_insensitive: bool,
32    expr: Arc<dyn PhysicalExpr>,
33    pattern: Arc<dyn PhysicalExpr>,
34}
35
36// Manually derive PartialEq and Hash to work around https://github.com/rust-lang/rust/issues/78808
37impl PartialEq for LikeExpr {
38    fn eq(&self, other: &Self) -> bool {
39        self.negated == other.negated
40            && self.case_insensitive == other.case_insensitive
41            && self.expr.eq(&other.expr)
42            && self.pattern.eq(&other.pattern)
43    }
44}
45
46impl Hash for LikeExpr {
47    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
48        self.negated.hash(state);
49        self.case_insensitive.hash(state);
50        self.expr.hash(state);
51        self.pattern.hash(state);
52    }
53}
54
55impl LikeExpr {
56    pub fn new(
57        negated: bool,
58        case_insensitive: bool,
59        expr: Arc<dyn PhysicalExpr>,
60        pattern: Arc<dyn PhysicalExpr>,
61    ) -> Self {
62        Self {
63            negated,
64            case_insensitive,
65            expr,
66            pattern,
67        }
68    }
69
70    /// Is negated
71    pub fn negated(&self) -> bool {
72        self.negated
73    }
74
75    /// Is case insensitive
76    pub fn case_insensitive(&self) -> bool {
77        self.case_insensitive
78    }
79
80    /// Input expression
81    pub fn expr(&self) -> &Arc<dyn PhysicalExpr> {
82        &self.expr
83    }
84
85    /// Pattern expression
86    pub fn pattern(&self) -> &Arc<dyn PhysicalExpr> {
87        &self.pattern
88    }
89
90    /// Operator name
91    fn op_name(&self) -> &str {
92        match (self.negated, self.case_insensitive) {
93            (false, false) => "LIKE",
94            (true, false) => "NOT LIKE",
95            (false, true) => "ILIKE",
96            (true, true) => "NOT ILIKE",
97        }
98    }
99}
100
101impl std::fmt::Display for LikeExpr {
102    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
103        write!(f, "{} {} {}", self.expr, self.op_name(), self.pattern)
104    }
105}
106
107impl PhysicalExpr for LikeExpr {
108    fn as_any(&self) -> &dyn Any {
109        self
110    }
111
112    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
113        Ok(DataType::Boolean)
114    }
115
116    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
117        Ok(self.expr.nullable(input_schema)? || self.pattern.nullable(input_schema)?)
118    }
119
120    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
121        let lhs = self.expr.evaluate(batch)?;
122        let rhs = self.pattern.evaluate(batch)?;
123        match (self.negated, self.case_insensitive) {
124            (false, false) => apply_cmp(Operator::LikeMatch, &lhs, &rhs),
125            (false, true) => apply_cmp(Operator::ILikeMatch, &lhs, &rhs),
126            (true, false) => apply_cmp(Operator::NotLikeMatch, &lhs, &rhs),
127            (true, true) => apply_cmp(Operator::NotILikeMatch, &lhs, &rhs),
128        }
129    }
130
131    fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
132        vec![&self.expr, &self.pattern]
133    }
134
135    fn with_new_children(
136        self: Arc<Self>,
137        children: Vec<Arc<dyn PhysicalExpr>>,
138    ) -> Result<Arc<dyn PhysicalExpr>> {
139        Ok(Arc::new(LikeExpr::new(
140            self.negated,
141            self.case_insensitive,
142            Arc::clone(&children[0]),
143            Arc::clone(&children[1]),
144        )))
145    }
146
147    fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148        self.expr.fmt_sql(f)?;
149        write!(f, " {} ", self.op_name())?;
150        self.pattern.fmt_sql(f)
151    }
152}
153
154/// used for optimize Dictionary like
155fn can_like_type(from_type: &DataType) -> bool {
156    match from_type {
157        DataType::Dictionary(_, inner_type_from) => **inner_type_from == DataType::Utf8,
158        _ => false,
159    }
160}
161
162/// Create a like expression, erroring if the argument types are not compatible.
163pub fn like(
164    negated: bool,
165    case_insensitive: bool,
166    expr: Arc<dyn PhysicalExpr>,
167    pattern: Arc<dyn PhysicalExpr>,
168    input_schema: &Schema,
169) -> Result<Arc<dyn PhysicalExpr>> {
170    let expr_type = &expr.data_type(input_schema)?;
171    let pattern_type = &pattern.data_type(input_schema)?;
172    assert_or_internal_err!(
173        expr_type.eq(pattern_type) || can_like_type(expr_type),
174        "The type of {expr_type} AND {pattern_type} of like physical should be same"
175    );
176    Ok(Arc::new(LikeExpr::new(
177        negated,
178        case_insensitive,
179        expr,
180        pattern,
181    )))
182}
183
184#[cfg(test)]
185mod test {
186    use super::*;
187    use crate::expressions::col;
188    use arrow::array::*;
189    use arrow::datatypes::Field;
190    use datafusion_common::cast::as_boolean_array;
191    use datafusion_physical_expr_common::physical_expr::fmt_sql;
192
193    macro_rules! test_like {
194        ($A_VEC:expr, $B_VEC:expr, $VEC:expr, $NULLABLE: expr, $NEGATED:expr, $CASE_INSENSITIVE:expr,) => {{
195            let schema = Schema::new(vec![
196                Field::new("a", DataType::Utf8, $NULLABLE),
197                Field::new("b", DataType::Utf8, $NULLABLE),
198            ]);
199            let a = StringArray::from($A_VEC);
200            let b = StringArray::from($B_VEC);
201
202            let expression = like(
203                $NEGATED,
204                $CASE_INSENSITIVE,
205                col("a", &schema)?,
206                col("b", &schema)?,
207                &schema,
208            )?;
209            let batch = RecordBatch::try_new(
210                Arc::new(schema.clone()),
211                vec![Arc::new(a), Arc::new(b)],
212            )?;
213
214            // compute
215            let result = expression
216                .evaluate(&batch)?
217                .into_array(batch.num_rows())
218                .expect("Failed to convert to array");
219            let result =
220                as_boolean_array(&result).expect("failed to downcast to BooleanArray");
221            let expected = &BooleanArray::from($VEC);
222            assert_eq!(expected, result);
223        }};
224    }
225
226    #[test]
227    fn like_op() -> Result<()> {
228        test_like!(
229            vec!["hello world", "world"],
230            vec!["%hello%", "%hello%"],
231            vec![true, false],
232            false,
233            false,
234            false,
235        ); // like
236        test_like!(
237            vec![Some("hello world"), None, Some("world")],
238            vec![Some("%hello%"), None, Some("%hello%")],
239            vec![Some(false), None, Some(true)],
240            true,
241            true,
242            false,
243        ); // not like
244        test_like!(
245            vec!["hello world", "world"],
246            vec!["%helLo%", "%helLo%"],
247            vec![true, false],
248            false,
249            false,
250            true,
251        ); // ilike
252        test_like!(
253            vec![Some("hello world"), None, Some("world")],
254            vec![Some("%helLo%"), None, Some("%helLo%")],
255            vec![Some(false), None, Some(true)],
256            true,
257            true,
258            true,
259        ); // not ilike
260
261        Ok(())
262    }
263
264    #[test]
265    fn test_fmt_sql() -> Result<()> {
266        let schema = Schema::new(vec![
267            Field::new("a", DataType::Utf8, false),
268            Field::new("b", DataType::Utf8, false),
269        ]);
270
271        let expr = like(
272            false,
273            false,
274            col("a", &schema)?,
275            col("b", &schema)?,
276            &schema,
277        )?;
278
279        // Display format
280        let display_string = expr.to_string();
281        assert_eq!(display_string, "a@0 LIKE b@1");
282
283        // fmt_sql format
284        let sql_string = fmt_sql(expr.as_ref()).to_string();
285        assert_eq!(sql_string, "a LIKE b");
286
287        Ok(())
288    }
289}