datafusion_physical_expr/expressions/
like.rs1use crate::PhysicalExpr;
19use arrow::datatypes::{DataType, Schema};
20use arrow::record_batch::RecordBatch;
21use datafusion_common::{Result, assert_or_internal_err};
22use datafusion_expr::{ColumnarValue, Operator};
23use datafusion_physical_expr_common::datum::apply_cmp;
24use std::hash::Hash;
25use std::{any::Any, sync::Arc};
26
27#[derive(Debug, Eq)]
29pub struct LikeExpr {
30 negated: bool,
31 case_insensitive: bool,
32 expr: Arc<dyn PhysicalExpr>,
33 pattern: Arc<dyn PhysicalExpr>,
34}
35
36impl PartialEq for LikeExpr {
38 fn eq(&self, other: &Self) -> bool {
39 self.negated == other.negated
40 && self.case_insensitive == other.case_insensitive
41 && self.expr.eq(&other.expr)
42 && self.pattern.eq(&other.pattern)
43 }
44}
45
46impl Hash for LikeExpr {
47 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
48 self.negated.hash(state);
49 self.case_insensitive.hash(state);
50 self.expr.hash(state);
51 self.pattern.hash(state);
52 }
53}
54
55impl LikeExpr {
56 pub fn new(
57 negated: bool,
58 case_insensitive: bool,
59 expr: Arc<dyn PhysicalExpr>,
60 pattern: Arc<dyn PhysicalExpr>,
61 ) -> Self {
62 Self {
63 negated,
64 case_insensitive,
65 expr,
66 pattern,
67 }
68 }
69
70 pub fn negated(&self) -> bool {
72 self.negated
73 }
74
75 pub fn case_insensitive(&self) -> bool {
77 self.case_insensitive
78 }
79
80 pub fn expr(&self) -> &Arc<dyn PhysicalExpr> {
82 &self.expr
83 }
84
85 pub fn pattern(&self) -> &Arc<dyn PhysicalExpr> {
87 &self.pattern
88 }
89
90 fn op_name(&self) -> &str {
92 match (self.negated, self.case_insensitive) {
93 (false, false) => "LIKE",
94 (true, false) => "NOT LIKE",
95 (false, true) => "ILIKE",
96 (true, true) => "NOT ILIKE",
97 }
98 }
99}
100
101impl std::fmt::Display for LikeExpr {
102 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
103 write!(f, "{} {} {}", self.expr, self.op_name(), self.pattern)
104 }
105}
106
107impl PhysicalExpr for LikeExpr {
108 fn as_any(&self) -> &dyn Any {
109 self
110 }
111
112 fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
113 Ok(DataType::Boolean)
114 }
115
116 fn nullable(&self, input_schema: &Schema) -> Result<bool> {
117 Ok(self.expr.nullable(input_schema)? || self.pattern.nullable(input_schema)?)
118 }
119
120 fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
121 let lhs = self.expr.evaluate(batch)?;
122 let rhs = self.pattern.evaluate(batch)?;
123 match (self.negated, self.case_insensitive) {
124 (false, false) => apply_cmp(Operator::LikeMatch, &lhs, &rhs),
125 (false, true) => apply_cmp(Operator::ILikeMatch, &lhs, &rhs),
126 (true, false) => apply_cmp(Operator::NotLikeMatch, &lhs, &rhs),
127 (true, true) => apply_cmp(Operator::NotILikeMatch, &lhs, &rhs),
128 }
129 }
130
131 fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
132 vec![&self.expr, &self.pattern]
133 }
134
135 fn with_new_children(
136 self: Arc<Self>,
137 children: Vec<Arc<dyn PhysicalExpr>>,
138 ) -> Result<Arc<dyn PhysicalExpr>> {
139 Ok(Arc::new(LikeExpr::new(
140 self.negated,
141 self.case_insensitive,
142 Arc::clone(&children[0]),
143 Arc::clone(&children[1]),
144 )))
145 }
146
147 fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148 self.expr.fmt_sql(f)?;
149 write!(f, " {} ", self.op_name())?;
150 self.pattern.fmt_sql(f)
151 }
152}
153
154fn can_like_type(from_type: &DataType) -> bool {
156 match from_type {
157 DataType::Dictionary(_, inner_type_from) => **inner_type_from == DataType::Utf8,
158 _ => false,
159 }
160}
161
162pub fn like(
164 negated: bool,
165 case_insensitive: bool,
166 expr: Arc<dyn PhysicalExpr>,
167 pattern: Arc<dyn PhysicalExpr>,
168 input_schema: &Schema,
169) -> Result<Arc<dyn PhysicalExpr>> {
170 let expr_type = &expr.data_type(input_schema)?;
171 let pattern_type = &pattern.data_type(input_schema)?;
172 assert_or_internal_err!(
173 expr_type.eq(pattern_type) || can_like_type(expr_type),
174 "The type of {expr_type} AND {pattern_type} of like physical should be same"
175 );
176 Ok(Arc::new(LikeExpr::new(
177 negated,
178 case_insensitive,
179 expr,
180 pattern,
181 )))
182}
183
184#[cfg(test)]
185mod test {
186 use super::*;
187 use crate::expressions::col;
188 use arrow::array::*;
189 use arrow::datatypes::Field;
190 use datafusion_common::cast::as_boolean_array;
191 use datafusion_physical_expr_common::physical_expr::fmt_sql;
192
193 macro_rules! test_like {
194 ($A_VEC:expr, $B_VEC:expr, $VEC:expr, $NULLABLE: expr, $NEGATED:expr, $CASE_INSENSITIVE:expr,) => {{
195 let schema = Schema::new(vec![
196 Field::new("a", DataType::Utf8, $NULLABLE),
197 Field::new("b", DataType::Utf8, $NULLABLE),
198 ]);
199 let a = StringArray::from($A_VEC);
200 let b = StringArray::from($B_VEC);
201
202 let expression = like(
203 $NEGATED,
204 $CASE_INSENSITIVE,
205 col("a", &schema)?,
206 col("b", &schema)?,
207 &schema,
208 )?;
209 let batch = RecordBatch::try_new(
210 Arc::new(schema.clone()),
211 vec![Arc::new(a), Arc::new(b)],
212 )?;
213
214 let result = expression
216 .evaluate(&batch)?
217 .into_array(batch.num_rows())
218 .expect("Failed to convert to array");
219 let result =
220 as_boolean_array(&result).expect("failed to downcast to BooleanArray");
221 let expected = &BooleanArray::from($VEC);
222 assert_eq!(expected, result);
223 }};
224 }
225
226 #[test]
227 fn like_op() -> Result<()> {
228 test_like!(
229 vec!["hello world", "world"],
230 vec!["%hello%", "%hello%"],
231 vec![true, false],
232 false,
233 false,
234 false,
235 ); test_like!(
237 vec![Some("hello world"), None, Some("world")],
238 vec![Some("%hello%"), None, Some("%hello%")],
239 vec![Some(false), None, Some(true)],
240 true,
241 true,
242 false,
243 ); test_like!(
245 vec!["hello world", "world"],
246 vec!["%helLo%", "%helLo%"],
247 vec![true, false],
248 false,
249 false,
250 true,
251 ); test_like!(
253 vec![Some("hello world"), None, Some("world")],
254 vec![Some("%helLo%"), None, Some("%helLo%")],
255 vec![Some(false), None, Some(true)],
256 true,
257 true,
258 true,
259 ); Ok(())
262 }
263
264 #[test]
265 fn test_fmt_sql() -> Result<()> {
266 let schema = Schema::new(vec![
267 Field::new("a", DataType::Utf8, false),
268 Field::new("b", DataType::Utf8, false),
269 ]);
270
271 let expr = like(
272 false,
273 false,
274 col("a", &schema)?,
275 col("b", &schema)?,
276 &schema,
277 )?;
278
279 let display_string = expr.to_string();
281 assert_eq!(display_string, "a@0 LIKE b@1");
282
283 let sql_string = fmt_sql(expr.as_ref()).to_string();
285 assert_eq!(sql_string, "a LIKE b");
286
287 Ok(())
288 }
289}