vortex_array/scalar_fn/fns/like/
mod.rs1mod kernel;
5
6use std::borrow::Cow;
7use std::fmt::Display;
8use std::fmt::Formatter;
9
10pub use kernel::*;
11use prost::Message;
12use vortex_error::VortexResult;
13use vortex_error::vortex_bail;
14use vortex_proto::expr as pb;
15use vortex_session::VortexSession;
16use vortex_session::registry::CachedId;
17
18use crate::ArrayRef;
19use crate::ExecutionCtx;
20use crate::arrow::Datum;
21use crate::arrow::from_arrow_columnar;
22use crate::dtype::DType;
23use crate::expr::Expression;
24use crate::expr::and;
25use crate::scalar_fn::Arity;
26use crate::scalar_fn::ChildName;
27use crate::scalar_fn::ExecutionArgs;
28use crate::scalar_fn::ScalarFnId;
29use crate::scalar_fn::ScalarFnVTable;
30
31#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)]
33pub struct LikeOptions {
34 pub negated: bool,
35 pub case_insensitive: bool,
36}
37
38impl Display for LikeOptions {
39 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
40 if self.negated {
41 write!(f, "NOT ")?;
42 }
43 if self.case_insensitive {
44 write!(f, "ILIKE")
45 } else {
46 write!(f, "LIKE")
47 }
48 }
49}
50
51#[derive(Clone)]
53pub struct Like;
54
55impl ScalarFnVTable for Like {
56 type Options = LikeOptions;
57
58 fn id(&self) -> ScalarFnId {
59 static ID: CachedId = CachedId::new("vortex.like");
60 *ID
61 }
62
63 fn serialize(&self, instance: &Self::Options) -> VortexResult<Option<Vec<u8>>> {
64 Ok(Some(
65 pb::LikeOpts {
66 negated: instance.negated,
67 case_insensitive: instance.case_insensitive,
68 }
69 .encode_to_vec(),
70 ))
71 }
72
73 fn deserialize(
74 &self,
75 _metadata: &[u8],
76 _session: &VortexSession,
77 ) -> VortexResult<Self::Options> {
78 let opts = pb::LikeOpts::decode(_metadata)?;
79 Ok(LikeOptions {
80 negated: opts.negated,
81 case_insensitive: opts.case_insensitive,
82 })
83 }
84
85 fn arity(&self, _options: &Self::Options) -> Arity {
86 Arity::Exact(2)
87 }
88
89 fn child_name(&self, _instance: &Self::Options, child_idx: usize) -> ChildName {
90 match child_idx {
91 0 => ChildName::from("child"),
92 1 => ChildName::from("pattern"),
93 _ => unreachable!("Invalid child index {} for Like expression", child_idx),
94 }
95 }
96
97 fn fmt_sql(
98 &self,
99 options: &Self::Options,
100 expr: &Expression,
101 f: &mut Formatter<'_>,
102 ) -> std::fmt::Result {
103 expr.child(0).fmt_sql(f)?;
104 if options.negated {
105 write!(f, " not")?;
106 }
107 if options.case_insensitive {
108 write!(f, " ilike ")?;
109 } else {
110 write!(f, " like ")?;
111 }
112 expr.child(1).fmt_sql(f)
113 }
114
115 fn return_dtype(&self, _options: &Self::Options, arg_dtypes: &[DType]) -> VortexResult<DType> {
116 let input = &arg_dtypes[0];
117 let pattern = &arg_dtypes[1];
118
119 if !input.is_utf8() {
120 vortex_bail!("LIKE expression requires UTF8 input dtype, got {}", input);
121 }
122 if !pattern.is_utf8() {
123 vortex_bail!(
124 "LIKE expression requires UTF8 pattern dtype, got {}",
125 pattern
126 );
127 }
128
129 Ok(DType::Bool(
130 (input.is_nullable() || pattern.is_nullable()).into(),
131 ))
132 }
133
134 fn execute(
135 &self,
136 options: &Self::Options,
137 args: &dyn ExecutionArgs,
138 ctx: &mut ExecutionCtx,
139 ) -> VortexResult<ArrayRef> {
140 let child = args.get(0)?;
141 let pattern = args.get(1)?;
142
143 arrow_like(&child, &pattern, *options, ctx)
144 }
145
146 fn validity(
147 &self,
148 _options: &Self::Options,
149 expression: &Expression,
150 ) -> VortexResult<Option<Expression>> {
151 tracing::warn!("Computing validity for LIKE expression");
152 let child_validity = expression.child(0).validity()?;
153 let pattern_validity = expression.child(1).validity()?;
154 Ok(Some(and(child_validity, pattern_validity)))
155 }
156
157 fn is_null_sensitive(&self, _instance: &Self::Options) -> bool {
158 false
159 }
160
161 fn is_fallible(&self, _options: &Self::Options) -> bool {
162 false
163 }
164}
165
166pub(crate) fn arrow_like(
168 array: &ArrayRef,
169 pattern: &ArrayRef,
170 options: LikeOptions,
171 ctx: &mut ExecutionCtx,
172) -> VortexResult<ArrayRef> {
173 let nullable = array.dtype().is_nullable() | pattern.dtype().is_nullable();
174 let len = array.len();
175 assert_eq!(
176 array.len(),
177 pattern.len(),
178 "Arrow Like: length mismatch for {}",
179 array.encoding_id()
180 );
181
182 let lhs = Datum::try_new(array, ctx)?;
184 let rhs = Datum::try_new_with_target_datatype(pattern, lhs.data_type(), ctx)?;
185
186 let result = match (options.negated, options.case_insensitive) {
187 (false, false) => arrow_string::like::like(&lhs, &rhs)?,
188 (true, false) => arrow_string::like::nlike(&lhs, &rhs)?,
189 (false, true) => arrow_string::like::ilike(&lhs, &rhs)?,
190 (true, true) => arrow_string::like::nilike(&lhs, &rhs)?,
191 };
192
193 from_arrow_columnar(&result, len, nullable, ctx)
194}
195
196#[derive(Debug, PartialEq)]
198pub(crate) enum LikeVariant<'a> {
199 Exact(Cow<'a, str>),
200 Prefix(Cow<'a, str>),
201}
202
203impl<'a> LikeVariant<'a> {
204 pub(crate) fn from_str(string: &'a str) -> Option<LikeVariant<'a>> {
206 let mut literal = None;
207 let mut chars = string.char_indices();
208
209 while let Some((idx, c)) = chars.next() {
210 match c {
211 '\\' => {
212 let literal = literal.get_or_insert_with(|| string[..idx].to_string());
213 match chars.next() {
214 Some((_, escaped)) => literal.push(escaped),
215 None => literal.push('\\'),
216 }
217 }
218 '%' | '_' => {
219 return match literal {
220 Some(literal) => (!literal.is_empty())
221 .then_some(LikeVariant::Prefix(Cow::Owned(literal))),
222 None => {
223 (idx != 0).then_some(LikeVariant::Prefix(Cow::Borrowed(&string[..idx])))
224 }
225 };
226 }
227 c => {
228 if let Some(literal) = &mut literal {
229 literal.push(c);
230 }
231 }
232 }
233 }
234
235 Some(match literal {
236 Some(literal) => LikeVariant::Exact(Cow::Owned(literal)),
237 None => LikeVariant::Exact(Cow::Borrowed(string)),
238 })
239 }
240}
241
242#[cfg(test)]
243mod tests {
244 use std::borrow::Cow;
245
246 use crate::IntoArray;
247 use crate::VortexSessionExecute;
248 use crate::array_session;
249 use crate::arrays::BoolArray;
250 use crate::assert_arrays_eq;
251 use crate::dtype::DType;
252 use crate::dtype::Nullability;
253 use crate::expr::get_item;
254 use crate::expr::like;
255 use crate::expr::lit;
256 use crate::expr::not;
257 use crate::expr::not_ilike;
258 use crate::expr::root;
259 use crate::scalar_fn::fns::like::LikeVariant;
260
261 #[test]
262 fn invert_booleans() {
263 let not_expr = not(root());
264 let bools = BoolArray::from_iter([false, true, false, false, true, true]);
265 let mut ctx = array_session().create_execution_ctx();
266 assert_arrays_eq!(
267 bools.into_array().apply(¬_expr).unwrap(),
268 BoolArray::from_iter([true, false, true, true, false, false]),
269 &mut ctx
270 );
271 }
272
273 #[test]
274 fn dtype() {
275 let dtype = DType::Utf8(Nullability::NonNullable);
276 let like_expr = like(root(), lit("%test%"));
277 assert_eq!(
278 like_expr.return_dtype(&dtype).unwrap(),
279 DType::Bool(Nullability::NonNullable)
280 );
281 }
282
283 #[test]
284 fn signature() {
285 let like_expr = like(root(), lit("%test%"));
286 assert!(!like_expr.signature().is_null_sensitive());
287 assert!(!like_expr.signature().is_fallible());
288 }
289
290 #[test]
291 fn test_display() {
292 let expr = like(get_item("name", root()), lit("%john%"));
293 assert_eq!(expr.to_string(), "$.name like \"%john%\"");
294
295 let expr2 = not_ilike(root(), lit("test*"));
296 assert_eq!(expr2.to_string(), "$ not ilike \"test*\"");
297 }
298
299 fn assert_borrowed_exact(pattern: &str, expected: &str) {
300 let Some(LikeVariant::Exact(actual)) = LikeVariant::from_str(pattern) else {
301 panic!("expected borrowed exact pattern");
302 };
303 assert!(matches!(actual, Cow::Borrowed(_)));
304 assert_eq!(actual.as_ref(), expected);
305 }
306
307 fn assert_owned_exact(pattern: &str, expected: &str) {
308 let Some(LikeVariant::Exact(actual)) = LikeVariant::from_str(pattern) else {
309 panic!("expected owned exact pattern");
310 };
311 assert!(matches!(actual, Cow::Owned(_)));
312 assert_eq!(actual.as_ref(), expected);
313 }
314
315 fn assert_borrowed_prefix(pattern: &str, expected: &str) {
316 let Some(LikeVariant::Prefix(actual)) = LikeVariant::from_str(pattern) else {
317 panic!("expected borrowed prefix pattern");
318 };
319 assert!(matches!(actual, Cow::Borrowed(_)));
320 assert_eq!(actual.as_ref(), expected);
321 }
322
323 fn assert_owned_prefix(pattern: &str, expected: &str) {
324 let Some(LikeVariant::Prefix(actual)) = LikeVariant::from_str(pattern) else {
325 panic!("expected owned prefix pattern");
326 };
327 assert!(matches!(actual, Cow::Owned(_)));
328 assert_eq!(actual.as_ref(), expected);
329 }
330
331 #[test]
332 fn test_like_variant_borrowed_patterns() {
333 assert_borrowed_exact("simple", "simple");
334 assert_borrowed_prefix("prefix%", "prefix");
335 assert_borrowed_prefix("first%rest_stuff", "first");
336 }
337
338 #[test]
339 fn test_like_variant_escaped_patterns() {
340 assert_owned_prefix(r"\%%", "%");
341 assert_owned_prefix(r"\_%", "_");
342 assert_owned_prefix(r"\\%", "\\");
343 assert_owned_exact(r"\%", "%");
344 assert_owned_exact("trailing\\", "trailing\\");
345 }
346
347 #[test]
348 fn test_like_variant_unsupported_patterns() {
349 assert_eq!(LikeVariant::from_str("%suffix"), None);
350 assert_eq!(LikeVariant::from_str(r"%\%%"), None);
351 assert_eq!(LikeVariant::from_str("_pattern"), None);
352 }
353}