vortex_array/expr/exprs/
like.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Formatter;
5
6use prost::Message;
7use vortex_compute::arrow::IntoArrow;
8use vortex_compute::arrow::IntoVector;
9use vortex_dtype::DType;
10use vortex_error::VortexResult;
11use vortex_error::vortex_bail;
12use vortex_error::vortex_err;
13use vortex_proto::expr as pb;
14use vortex_vector::Datum;
15use vortex_vector::VectorOps;
16
17use crate::ArrayRef;
18use crate::compute::LikeOptions;
19use crate::compute::like as like_compute;
20use crate::expr::Arity;
21use crate::expr::ChildName;
22use crate::expr::ExecutionArgs;
23use crate::expr::ExprId;
24use crate::expr::Expression;
25use crate::expr::VTable;
26use crate::expr::VTableExt;
27
28/// Expression that performs SQL LIKE pattern matching.
29pub struct Like;
30
31impl VTable for Like {
32    type Options = LikeOptions;
33
34    fn id(&self) -> ExprId {
35        ExprId::from("vortex.like")
36    }
37
38    fn serialize(&self, instance: &Self::Options) -> VortexResult<Option<Vec<u8>>> {
39        Ok(Some(
40            pb::LikeOpts {
41                negated: instance.negated,
42                case_insensitive: instance.case_insensitive,
43            }
44            .encode_to_vec(),
45        ))
46    }
47
48    fn deserialize(&self, metadata: &[u8]) -> VortexResult<Self::Options> {
49        let opts = pb::LikeOpts::decode(metadata)?;
50        Ok(LikeOptions {
51            negated: opts.negated,
52            case_insensitive: opts.case_insensitive,
53        })
54    }
55
56    fn arity(&self, _options: &Self::Options) -> Arity {
57        Arity::Exact(2)
58    }
59
60    fn child_name(&self, _instance: &Self::Options, child_idx: usize) -> ChildName {
61        match child_idx {
62            0 => ChildName::from("child"),
63            1 => ChildName::from("pattern"),
64            _ => unreachable!("Invalid child index {} for Like expression", child_idx),
65        }
66    }
67
68    fn fmt_sql(
69        &self,
70        options: &Self::Options,
71        expr: &Expression,
72        f: &mut Formatter<'_>,
73    ) -> std::fmt::Result {
74        expr.child(0).fmt_sql(f)?;
75        if options.negated {
76            write!(f, " not")?;
77        }
78        if options.case_insensitive {
79            write!(f, " ilike ")?;
80        } else {
81            write!(f, " like ")?;
82        }
83        expr.child(1).fmt_sql(f)
84    }
85
86    fn return_dtype(&self, _options: &Self::Options, arg_dtypes: &[DType]) -> VortexResult<DType> {
87        let input = &arg_dtypes[0];
88        let pattern = &arg_dtypes[1];
89
90        if !input.is_utf8() {
91            vortex_bail!("LIKE expression requires UTF8 input dtype, got {}", input);
92        }
93        if !pattern.is_utf8() {
94            vortex_bail!(
95                "LIKE expression requires UTF8 pattern dtype, got {}",
96                pattern
97            );
98        }
99
100        Ok(DType::Bool(
101            (input.is_nullable() || pattern.is_nullable()).into(),
102        ))
103    }
104
105    fn evaluate(
106        &self,
107        options: &Self::Options,
108        expr: &Expression,
109        scope: &ArrayRef,
110    ) -> VortexResult<ArrayRef> {
111        let child = expr.child(0).evaluate(scope)?;
112        let pattern = expr.child(1).evaluate(scope)?;
113        like_compute(&child, &pattern, *options)
114    }
115
116    fn execute(&self, options: &Self::Options, args: ExecutionArgs) -> VortexResult<Datum> {
117        let [child, pattern]: [Datum; _] = args
118            .datums
119            .try_into()
120            .map_err(|_| vortex_err!("Wrong argument count"))?;
121
122        let child = child.into_arrow()?;
123        let pattern = pattern.into_arrow()?;
124
125        let array = match (options.negated, options.case_insensitive) {
126            (false, false) => arrow_string::like::like(child.as_ref(), pattern.as_ref()),
127            (false, true) => arrow_string::like::ilike(child.as_ref(), pattern.as_ref()),
128            (true, false) => arrow_string::like::nlike(child.as_ref(), pattern.as_ref()),
129            (true, true) => arrow_string::like::nilike(child.as_ref(), pattern.as_ref()),
130        }?;
131
132        let vector = array.into_vector()?;
133        if vector.len() == 1 && args.row_count != 1 {
134            // Arrow returns a scalar datum result
135            return Ok(Datum::Scalar(vector.scalar_at(0).into()));
136        }
137
138        Ok(Datum::Vector(array.into_vector()?.into()))
139    }
140
141    fn is_null_sensitive(&self, _instance: &Self::Options) -> bool {
142        false
143    }
144}
145
146pub fn like(child: Expression, pattern: Expression) -> Expression {
147    Like.new_expr(
148        LikeOptions {
149            negated: false,
150            case_insensitive: false,
151        },
152        [child, pattern],
153    )
154}
155
156pub fn ilike(child: Expression, pattern: Expression) -> Expression {
157    Like.new_expr(
158        LikeOptions {
159            negated: false,
160            case_insensitive: true,
161        },
162        [child, pattern],
163    )
164}
165
166pub fn not_like(child: Expression, pattern: Expression) -> Expression {
167    Like.new_expr(
168        LikeOptions {
169            negated: true,
170            case_insensitive: false,
171        },
172        [child, pattern],
173    )
174}
175
176pub fn not_ilike(child: Expression, pattern: Expression) -> Expression {
177    Like.new_expr(
178        LikeOptions {
179            negated: true,
180            case_insensitive: true,
181        },
182        [child, pattern],
183    )
184}
185
186#[cfg(test)]
187mod tests {
188    use vortex_dtype::DType;
189    use vortex_dtype::Nullability;
190
191    use crate::ToCanonical;
192    use crate::arrays::BoolArray;
193    use crate::expr::exprs::get_item::get_item;
194    use crate::expr::exprs::like::like;
195    use crate::expr::exprs::like::not_ilike;
196    use crate::expr::exprs::literal::lit;
197    use crate::expr::exprs::not::not;
198    use crate::expr::exprs::root::root;
199
200    #[test]
201    fn invert_booleans() {
202        let not_expr = not(root());
203        let bools = BoolArray::from_iter([false, true, false, false, true, true]);
204        assert_eq!(
205            not_expr
206                .evaluate(&bools.to_array())
207                .unwrap()
208                .to_bool()
209                .bit_buffer()
210                .iter()
211                .collect::<Vec<_>>(),
212            vec![true, false, true, true, false, false]
213        );
214    }
215
216    #[test]
217    fn dtype() {
218        let dtype = DType::Utf8(Nullability::NonNullable);
219        let like_expr = like(root(), lit("%test%"));
220        assert_eq!(
221            like_expr.return_dtype(&dtype).unwrap(),
222            DType::Bool(Nullability::NonNullable)
223        );
224    }
225
226    #[test]
227    fn test_display() {
228        let expr = like(get_item("name", root()), lit("%john%"));
229        assert_eq!(expr.to_string(), "$.name like \"%john%\"");
230
231        let expr2 = not_ilike(root(), lit("test*"));
232        assert_eq!(expr2.to_string(), "$ not ilike \"test*\"");
233    }
234}