vortex_array/expr/exprs/
like.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Formatter;
5
6use prost::Message;
7use vortex_dtype::DType;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_proto::expr as pb;
11
12use crate::ArrayRef;
13use crate::compute::LikeOptions;
14use crate::compute::like as like_compute;
15use crate::expr::ChildName;
16use crate::expr::ExprId;
17use crate::expr::Expression;
18use crate::expr::ExpressionView;
19use crate::expr::VTable;
20use crate::expr::VTableExt;
21
22/// Expression that performs SQL LIKE pattern matching.
23pub struct Like;
24
25impl VTable for Like {
26    type Instance = LikeOptions;
27
28    fn id(&self) -> ExprId {
29        ExprId::from("vortex.like")
30    }
31
32    fn serialize(&self, instance: &Self::Instance) -> VortexResult<Option<Vec<u8>>> {
33        Ok(Some(
34            pb::LikeOpts {
35                negated: instance.negated,
36                case_insensitive: instance.case_insensitive,
37            }
38            .encode_to_vec(),
39        ))
40    }
41
42    fn deserialize(&self, metadata: &[u8]) -> VortexResult<Option<Self::Instance>> {
43        let opts = pb::LikeOpts::decode(metadata)?;
44        Ok(Some(LikeOptions {
45            negated: opts.negated,
46            case_insensitive: opts.case_insensitive,
47        }))
48    }
49
50    fn validate(&self, expr: &ExpressionView<Self>) -> VortexResult<()> {
51        if expr.children().len() != 2 {
52            vortex_bail!(
53                "Like expression requires exactly 2 children, got {}",
54                expr.children().len()
55            );
56        }
57        Ok(())
58    }
59
60    fn child_name(&self, _instance: &Self::Instance, child_idx: usize) -> ChildName {
61        match child_idx {
62            0 => ChildName::from("child"),
63            1 => ChildName::from("pattern"),
64            _ => unreachable!("Invalid child index {} for Like expression", child_idx),
65        }
66    }
67
68    fn fmt_sql(&self, expr: &ExpressionView<Self>, f: &mut Formatter<'_>) -> std::fmt::Result {
69        expr.child(0).fmt_sql(f)?;
70        if expr.data().negated {
71            write!(f, " not")?;
72        }
73        if expr.data().case_insensitive {
74            write!(f, " ilike ")?;
75        } else {
76            write!(f, " like ")?;
77        }
78        expr.child(1).fmt_sql(f)
79    }
80
81    fn return_dtype(&self, expr: &ExpressionView<Self>, scope: &DType) -> VortexResult<DType> {
82        let input = expr.children()[0].return_dtype(scope)?;
83        let pattern = expr.children()[1].return_dtype(scope)?;
84
85        if !input.is_utf8() {
86            vortex_bail!("LIKE expression requires UTF8 input dtype, got {}", input);
87        }
88        if !pattern.is_utf8() {
89            vortex_bail!(
90                "LIKE expression requires UTF8 pattern dtype, got {}",
91                pattern
92            );
93        }
94
95        Ok(DType::Bool(
96            (input.is_nullable() || pattern.is_nullable()).into(),
97        ))
98    }
99
100    fn evaluate(&self, expr: &ExpressionView<Self>, scope: &ArrayRef) -> VortexResult<ArrayRef> {
101        let child = expr.child(0).evaluate(scope)?;
102        let pattern = expr.child(1).evaluate(scope)?;
103        like_compute(&child, &pattern, *expr.data())
104    }
105
106    fn is_null_sensitive(&self, _instance: &Self::Instance) -> bool {
107        false
108    }
109}
110
111pub fn like(child: Expression, pattern: Expression) -> Expression {
112    Like.new_expr(
113        LikeOptions {
114            negated: false,
115            case_insensitive: false,
116        },
117        [child, pattern],
118    )
119}
120
121pub fn ilike(child: Expression, pattern: Expression) -> Expression {
122    Like.new_expr(
123        LikeOptions {
124            negated: false,
125            case_insensitive: true,
126        },
127        [child, pattern],
128    )
129}
130
131pub fn not_like(child: Expression, pattern: Expression) -> Expression {
132    Like.new_expr(
133        LikeOptions {
134            negated: true,
135            case_insensitive: false,
136        },
137        [child, pattern],
138    )
139}
140
141pub fn not_ilike(child: Expression, pattern: Expression) -> Expression {
142    Like.new_expr(
143        LikeOptions {
144            negated: true,
145            case_insensitive: true,
146        },
147        [child, pattern],
148    )
149}
150
151#[cfg(test)]
152mod tests {
153    use vortex_dtype::DType;
154    use vortex_dtype::Nullability;
155
156    use crate::ToCanonical;
157    use crate::arrays::BoolArray;
158    use crate::expr::exprs::get_item::get_item;
159    use crate::expr::exprs::like::like;
160    use crate::expr::exprs::like::not_ilike;
161    use crate::expr::exprs::literal::lit;
162    use crate::expr::exprs::not::not;
163    use crate::expr::exprs::root::root;
164
165    #[test]
166    fn invert_booleans() {
167        let not_expr = not(root());
168        let bools = BoolArray::from_iter([false, true, false, false, true, true]);
169        assert_eq!(
170            not_expr
171                .evaluate(&bools.to_array())
172                .unwrap()
173                .to_bool()
174                .bit_buffer()
175                .iter()
176                .collect::<Vec<_>>(),
177            vec![true, false, true, true, false, false]
178        );
179    }
180
181    #[test]
182    fn dtype() {
183        let dtype = DType::Utf8(Nullability::NonNullable);
184        let like_expr = like(root(), lit("%test%"));
185        assert_eq!(
186            like_expr.return_dtype(&dtype).unwrap(),
187            DType::Bool(Nullability::NonNullable)
188        );
189    }
190
191    #[test]
192    fn test_display() {
193        let expr = like(get_item("name", root()), lit("%john%"));
194        assert_eq!(expr.to_string(), "$.name like \"%john%\"");
195
196        let expr2 = not_ilike(root(), lit("test*"));
197        assert_eq!(expr2.to_string(), "$ not ilike \"test*\"");
198    }
199}