1use std::fmt::Formatter;
5
6use prost::Message;
7use vortex_compute::arrow::IntoArrow;
8use vortex_compute::arrow::IntoVector;
9use vortex_dtype::DType;
10use vortex_error::VortexResult;
11use vortex_error::vortex_bail;
12use vortex_error::vortex_err;
13use vortex_proto::expr as pb;
14use vortex_vector::Datum;
15use vortex_vector::VectorOps;
16
17use crate::ArrayRef;
18use crate::compute::LikeOptions;
19use crate::compute::like as like_compute;
20use crate::expr::Arity;
21use crate::expr::ChildName;
22use crate::expr::ExecutionArgs;
23use crate::expr::ExprId;
24use crate::expr::Expression;
25use crate::expr::VTable;
26use crate::expr::VTableExt;
27
28pub struct Like;
30
31impl VTable for Like {
32 type Options = LikeOptions;
33
34 fn id(&self) -> ExprId {
35 ExprId::from("vortex.like")
36 }
37
38 fn serialize(&self, instance: &Self::Options) -> VortexResult<Option<Vec<u8>>> {
39 Ok(Some(
40 pb::LikeOpts {
41 negated: instance.negated,
42 case_insensitive: instance.case_insensitive,
43 }
44 .encode_to_vec(),
45 ))
46 }
47
48 fn deserialize(&self, metadata: &[u8]) -> VortexResult<Self::Options> {
49 let opts = pb::LikeOpts::decode(metadata)?;
50 Ok(LikeOptions {
51 negated: opts.negated,
52 case_insensitive: opts.case_insensitive,
53 })
54 }
55
56 fn arity(&self, _options: &Self::Options) -> Arity {
57 Arity::Exact(2)
58 }
59
60 fn child_name(&self, _instance: &Self::Options, child_idx: usize) -> ChildName {
61 match child_idx {
62 0 => ChildName::from("child"),
63 1 => ChildName::from("pattern"),
64 _ => unreachable!("Invalid child index {} for Like expression", child_idx),
65 }
66 }
67
68 fn fmt_sql(
69 &self,
70 options: &Self::Options,
71 expr: &Expression,
72 f: &mut Formatter<'_>,
73 ) -> std::fmt::Result {
74 expr.child(0).fmt_sql(f)?;
75 if options.negated {
76 write!(f, " not")?;
77 }
78 if options.case_insensitive {
79 write!(f, " ilike ")?;
80 } else {
81 write!(f, " like ")?;
82 }
83 expr.child(1).fmt_sql(f)
84 }
85
86 fn return_dtype(&self, _options: &Self::Options, arg_dtypes: &[DType]) -> VortexResult<DType> {
87 let input = &arg_dtypes[0];
88 let pattern = &arg_dtypes[1];
89
90 if !input.is_utf8() {
91 vortex_bail!("LIKE expression requires UTF8 input dtype, got {}", input);
92 }
93 if !pattern.is_utf8() {
94 vortex_bail!(
95 "LIKE expression requires UTF8 pattern dtype, got {}",
96 pattern
97 );
98 }
99
100 Ok(DType::Bool(
101 (input.is_nullable() || pattern.is_nullable()).into(),
102 ))
103 }
104
105 fn evaluate(
106 &self,
107 options: &Self::Options,
108 expr: &Expression,
109 scope: &ArrayRef,
110 ) -> VortexResult<ArrayRef> {
111 let child = expr.child(0).evaluate(scope)?;
112 let pattern = expr.child(1).evaluate(scope)?;
113 like_compute(&child, &pattern, *options)
114 }
115
116 fn execute(&self, options: &Self::Options, args: ExecutionArgs) -> VortexResult<Datum> {
117 let [child, pattern]: [Datum; _] = args
118 .datums
119 .try_into()
120 .map_err(|_| vortex_err!("Wrong argument count"))?;
121
122 let child = child.into_arrow()?;
123 let pattern = pattern.into_arrow()?;
124
125 let array = match (options.negated, options.case_insensitive) {
126 (false, false) => arrow_string::like::like(child.as_ref(), pattern.as_ref()),
127 (false, true) => arrow_string::like::ilike(child.as_ref(), pattern.as_ref()),
128 (true, false) => arrow_string::like::nlike(child.as_ref(), pattern.as_ref()),
129 (true, true) => arrow_string::like::nilike(child.as_ref(), pattern.as_ref()),
130 }?;
131
132 let vector = array.into_vector()?;
133 if vector.len() == 1 && args.row_count != 1 {
134 return Ok(Datum::Scalar(vector.scalar_at(0).into()));
136 }
137
138 Ok(Datum::Vector(array.into_vector()?.into()))
139 }
140
141 fn is_null_sensitive(&self, _instance: &Self::Options) -> bool {
142 false
143 }
144}
145
146pub fn like(child: Expression, pattern: Expression) -> Expression {
147 Like.new_expr(
148 LikeOptions {
149 negated: false,
150 case_insensitive: false,
151 },
152 [child, pattern],
153 )
154}
155
156pub fn ilike(child: Expression, pattern: Expression) -> Expression {
157 Like.new_expr(
158 LikeOptions {
159 negated: false,
160 case_insensitive: true,
161 },
162 [child, pattern],
163 )
164}
165
166pub fn not_like(child: Expression, pattern: Expression) -> Expression {
167 Like.new_expr(
168 LikeOptions {
169 negated: true,
170 case_insensitive: false,
171 },
172 [child, pattern],
173 )
174}
175
176pub fn not_ilike(child: Expression, pattern: Expression) -> Expression {
177 Like.new_expr(
178 LikeOptions {
179 negated: true,
180 case_insensitive: true,
181 },
182 [child, pattern],
183 )
184}
185
186#[cfg(test)]
187mod tests {
188 use vortex_dtype::DType;
189 use vortex_dtype::Nullability;
190
191 use crate::ToCanonical;
192 use crate::arrays::BoolArray;
193 use crate::expr::exprs::get_item::get_item;
194 use crate::expr::exprs::like::like;
195 use crate::expr::exprs::like::not_ilike;
196 use crate::expr::exprs::literal::lit;
197 use crate::expr::exprs::not::not;
198 use crate::expr::exprs::root::root;
199
200 #[test]
201 fn invert_booleans() {
202 let not_expr = not(root());
203 let bools = BoolArray::from_iter([false, true, false, false, true, true]);
204 assert_eq!(
205 not_expr
206 .evaluate(&bools.to_array())
207 .unwrap()
208 .to_bool()
209 .bit_buffer()
210 .iter()
211 .collect::<Vec<_>>(),
212 vec![true, false, true, true, false, false]
213 );
214 }
215
216 #[test]
217 fn dtype() {
218 let dtype = DType::Utf8(Nullability::NonNullable);
219 let like_expr = like(root(), lit("%test%"));
220 assert_eq!(
221 like_expr.return_dtype(&dtype).unwrap(),
222 DType::Bool(Nullability::NonNullable)
223 );
224 }
225
226 #[test]
227 fn test_display() {
228 let expr = like(get_item("name", root()), lit("%john%"));
229 assert_eq!(expr.to_string(), "$.name like \"%john%\"");
230
231 let expr2 = not_ilike(root(), lit("test*"));
232 assert_eq!(expr2.to_string(), "$ not ilike \"test*\"");
233 }
234}