vortex_array/expr/exprs/
like.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Formatter;
5
6use prost::Message;
7use vortex_dtype::DType;
8use vortex_error::{VortexResult, vortex_bail};
9use vortex_proto::expr as pb;
10
11use crate::ArrayRef;
12use crate::compute::{LikeOptions, like as like_compute};
13use crate::expr::{ChildName, ExprId, Expression, ExpressionView, VTable, VTableExt};
14
15/// Expression that performs SQL LIKE pattern matching.
16pub struct Like;
17
18impl VTable for Like {
19    type Instance = LikeOptions;
20
21    fn id(&self) -> ExprId {
22        ExprId::from("vortex.like")
23    }
24
25    fn serialize(&self, instance: &Self::Instance) -> VortexResult<Option<Vec<u8>>> {
26        Ok(Some(
27            pb::LikeOpts {
28                negated: instance.negated,
29                case_insensitive: instance.case_insensitive,
30            }
31            .encode_to_vec(),
32        ))
33    }
34
35    fn deserialize(&self, metadata: &[u8]) -> VortexResult<Option<Self::Instance>> {
36        let opts = pb::LikeOpts::decode(metadata)?;
37        Ok(Some(LikeOptions {
38            negated: opts.negated,
39            case_insensitive: opts.case_insensitive,
40        }))
41    }
42
43    fn validate(&self, expr: &ExpressionView<Self>) -> VortexResult<()> {
44        if expr.children().len() != 2 {
45            vortex_bail!(
46                "Like expression requires exactly 2 children, got {}",
47                expr.children().len()
48            );
49        }
50        Ok(())
51    }
52
53    fn child_name(&self, _instance: &Self::Instance, child_idx: usize) -> ChildName {
54        match child_idx {
55            0 => ChildName::from("child"),
56            1 => ChildName::from("pattern"),
57            _ => unreachable!("Invalid child index {} for Like expression", child_idx),
58        }
59    }
60
61    fn fmt_sql(&self, expr: &ExpressionView<Self>, f: &mut Formatter<'_>) -> std::fmt::Result {
62        expr.child(0).fmt_sql(f)?;
63        if expr.data().negated {
64            write!(f, " not")?;
65        }
66        if expr.data().case_insensitive {
67            write!(f, " ilike ")?;
68        } else {
69            write!(f, " like ")?;
70        }
71        expr.child(1).fmt_sql(f)
72    }
73
74    fn return_dtype(&self, expr: &ExpressionView<Self>, scope: &DType) -> VortexResult<DType> {
75        let input = expr.children()[0].return_dtype(scope)?;
76        let pattern = expr.children()[1].return_dtype(scope)?;
77
78        if !input.is_utf8() {
79            vortex_bail!("LIKE expression requires UTF8 input dtype, got {}", input);
80        }
81        if !pattern.is_utf8() {
82            vortex_bail!(
83                "LIKE expression requires UTF8 pattern dtype, got {}",
84                pattern
85            );
86        }
87
88        Ok(DType::Bool(
89            (input.is_nullable() || pattern.is_nullable()).into(),
90        ))
91    }
92
93    fn evaluate(&self, expr: &ExpressionView<Self>, scope: &ArrayRef) -> VortexResult<ArrayRef> {
94        let child = expr.child(0).evaluate(scope)?;
95        let pattern = expr.child(1).evaluate(scope)?;
96        like_compute(&child, &pattern, *expr.data())
97    }
98}
99
100pub fn like(child: Expression, pattern: Expression) -> Expression {
101    Like.new_expr(
102        LikeOptions {
103            negated: false,
104            case_insensitive: false,
105        },
106        [child, pattern],
107    )
108}
109
110pub fn ilike(child: Expression, pattern: Expression) -> Expression {
111    Like.new_expr(
112        LikeOptions {
113            negated: false,
114            case_insensitive: true,
115        },
116        [child, pattern],
117    )
118}
119
120pub fn not_like(child: Expression, pattern: Expression) -> Expression {
121    Like.new_expr(
122        LikeOptions {
123            negated: true,
124            case_insensitive: false,
125        },
126        [child, pattern],
127    )
128}
129
130pub fn not_ilike(child: Expression, pattern: Expression) -> Expression {
131    Like.new_expr(
132        LikeOptions {
133            negated: true,
134            case_insensitive: true,
135        },
136        [child, pattern],
137    )
138}
139
140#[cfg(test)]
141mod tests {
142    use vortex_dtype::{DType, Nullability};
143
144    use crate::ToCanonical;
145    use crate::arrays::BoolArray;
146    use crate::expr::exprs::get_item::get_item;
147    use crate::expr::exprs::like::{like, not_ilike};
148    use crate::expr::exprs::literal::lit;
149    use crate::expr::exprs::not::not;
150    use crate::expr::exprs::root::root;
151
152    #[test]
153    fn invert_booleans() {
154        let not_expr = not(root());
155        let bools = BoolArray::from_iter([false, true, false, false, true, true]);
156        assert_eq!(
157            not_expr
158                .evaluate(&bools.to_array())
159                .unwrap()
160                .to_bool()
161                .bit_buffer()
162                .iter()
163                .collect::<Vec<_>>(),
164            vec![true, false, true, true, false, false]
165        );
166    }
167
168    #[test]
169    fn dtype() {
170        let dtype = DType::Utf8(Nullability::NonNullable);
171        let like_expr = like(root(), lit("%test%"));
172        assert_eq!(
173            like_expr.return_dtype(&dtype).unwrap(),
174            DType::Bool(Nullability::NonNullable)
175        );
176    }
177
178    #[test]
179    fn test_display() {
180        let expr = like(get_item("name", root()), lit("%john%"));
181        assert_eq!(expr.to_string(), "$.name like \"%john%\"");
182
183        let expr2 = not_ilike(root(), lit("test*"));
184        assert_eq!(expr2.to_string(), "$ not ilike \"test*\"");
185    }
186}