1use vortex_array::ArrayRef;
5use vortex_array::ArrayView;
6use vortex_array::ExecutionCtx;
7use vortex_array::IntoArray;
8use vortex_array::ToCanonical;
9use vortex_array::arrays::BoolArray;
10use vortex_array::arrays::varbin::VarBinArrayExt;
11use vortex_array::match_each_integer_ptype;
12use vortex_array::scalar_fn::fns::like::LikeKernel;
13use vortex_array::scalar_fn::fns::like::LikeOptions;
14use vortex_error::VortexResult;
15
16use crate::FSST;
17use crate::FSSTArrayExt;
18use crate::dfa::FsstMatcher;
19use crate::dfa::dfa_scan_to_bitbuf;
20
21impl LikeKernel for FSST {
22 fn like(
23 array: ArrayView<'_, Self>,
24 pattern: &ArrayRef,
25 options: LikeOptions,
26 _ctx: &mut ExecutionCtx,
27 ) -> VortexResult<Option<ArrayRef>> {
28 let Some(pattern_scalar) = pattern.as_constant() else {
29 return Ok(None);
30 };
31
32 if options.case_insensitive {
33 return Ok(None);
34 }
35
36 let pattern_bytes: &[u8] = if let Some(s) = pattern_scalar.as_utf8_opt() {
37 let Some(v) = s.value() else {
38 return Ok(None);
39 };
40 v.as_ref()
41 } else if let Some(b) = pattern_scalar.as_binary_opt() {
42 let Some(v) = b.value() else {
43 return Ok(None);
44 };
45 v
46 } else {
47 return Ok(None);
48 };
49
50 let symbols = array.symbols();
51 let symbol_lengths = array.symbol_lengths();
52
53 let Some(matcher) =
54 FsstMatcher::try_new(symbols.as_slice(), symbol_lengths.as_slice(), pattern_bytes)?
55 else {
56 return Ok(None);
57 };
58
59 let negated = options.negated;
60 let codes = array.codes();
61 let offsets = codes.offsets().to_primitive();
62 let all_bytes = codes.bytes();
63 let all_bytes = all_bytes.as_slice();
64 let n = codes.len();
65
66 let result = match_each_integer_ptype!(offsets.ptype(), |T| {
67 let off = offsets.as_slice::<T>();
68 dfa_scan_to_bitbuf(n, off, all_bytes, negated, |codes| matcher.matches(codes))
69 });
70
71 let validity = array
74 .codes()
75 .validity()?
76 .union_nullability(pattern_scalar.dtype().nullability());
77
78 Ok(Some(BoolArray::new(result, validity).into_array()))
79 }
80}
81
82#[cfg(test)]
83mod tests {
84 use std::sync::LazyLock;
85
86 use vortex_array::Canonical;
87 use vortex_array::IntoArray;
88 use vortex_array::VortexSessionExecute;
89 use vortex_array::arrays::BoolArray;
90 use vortex_array::arrays::ConstantArray;
91 use vortex_array::arrays::VarBinArray;
92 use vortex_array::arrays::scalar_fn::ScalarFnFactoryExt;
93 use vortex_array::assert_arrays_eq;
94 use vortex_array::dtype::DType;
95 use vortex_array::dtype::Nullability;
96 use vortex_array::scalar_fn::fns::like::Like;
97 use vortex_array::scalar_fn::fns::like::LikeKernel;
98 use vortex_array::scalar_fn::fns::like::LikeOptions;
99 use vortex_array::session::ArraySession;
100 use vortex_error::VortexResult;
101 use vortex_session::VortexSession;
102
103 use crate::FSST;
104 use crate::FSSTArray;
105 use crate::fsst_compress;
106 use crate::fsst_train_compressor;
107
108 static SESSION: LazyLock<VortexSession> =
109 LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
110
111 fn make_fsst(strings: &[Option<&str>], nullability: Nullability) -> FSSTArray {
112 let varbin = VarBinArray::from_iter(strings.iter().copied(), DType::Utf8(nullability));
113 let compressor = fsst_train_compressor(&varbin);
114 let len = varbin.len();
115 let dtype = varbin.dtype().clone();
116 fsst_compress(varbin, len, &dtype, &compressor)
117 }
118
119 fn run_like(array: FSSTArray, pattern: &str, opts: LikeOptions) -> VortexResult<BoolArray> {
120 let len = array.len();
121 let arr = array.into_array();
122 let pattern = ConstantArray::new(pattern, len).into_array();
123 let result = Like
124 .try_new_array(len, opts, [arr, pattern])?
125 .into_array()
126 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?;
127 Ok(result.into_bool())
128 }
129
130 fn like(array: FSSTArray, pattern: &str) -> VortexResult<BoolArray> {
131 run_like(array, pattern, LikeOptions::default())
132 }
133
134 #[test]
135 fn test_like_prefix() -> VortexResult<()> {
136 let fsst = make_fsst(
137 &[
138 Some("http://example.com"),
139 Some("http://test.org"),
140 Some("ftp://files.net"),
141 Some("http://vortex.dev"),
142 Some("ssh://server.io"),
143 ],
144 Nullability::NonNullable,
145 );
146 let result = like(fsst, "http%")?;
147 assert_arrays_eq!(
148 &result,
149 &BoolArray::from_iter([true, true, false, true, false])
150 );
151 Ok(())
152 }
153
154 #[test]
155 fn test_like_prefix_with_nulls() -> VortexResult<()> {
156 let fsst = make_fsst(
157 &[Some("hello"), None, Some("help"), None, Some("goodbye")],
158 Nullability::Nullable,
159 );
160 let result = like(fsst, "hel%")?; assert_arrays_eq!(
162 &result,
163 &BoolArray::from_iter([Some(true), None, Some(true), None, Some(false)])
164 );
165 Ok(())
166 }
167
168 #[test]
169 fn test_like_contains() -> VortexResult<()> {
170 let fsst = make_fsst(
171 &[
172 Some("hello world"),
173 Some("say hello"),
174 Some("goodbye"),
175 Some("hellooo"),
176 ],
177 Nullability::NonNullable,
178 );
179 let result = like(fsst, "%hello%")?;
180 assert_arrays_eq!(&result, &BoolArray::from_iter([true, true, false, true]));
181 Ok(())
182 }
183
184 #[test]
185 fn test_like_contains_cross_symbol() -> VortexResult<()> {
186 let fsst = make_fsst(
187 &[
188 Some("the quick brown fox jumps over the lazy dog"),
189 Some("a short string"),
190 Some("the lazy dog sleeps"),
191 Some("no match"),
192 ],
193 Nullability::NonNullable,
194 );
195 let result = like(fsst, "%lazy dog%")?;
196 assert_arrays_eq!(&result, &BoolArray::from_iter([true, false, true, false]));
197 Ok(())
198 }
199
200 #[test]
201 fn test_not_like_contains() -> VortexResult<()> {
202 let fsst = make_fsst(
203 &[Some("foobar_sdf"), Some("sdf_start"), Some("nothing")],
204 Nullability::NonNullable,
205 );
206 let opts = LikeOptions {
207 negated: true,
208 case_insensitive: false,
209 };
210 let result = run_like(fsst, "%sdf%", opts)?;
211 assert_arrays_eq!(&result, &BoolArray::from_iter([false, false, true]));
212 Ok(())
213 }
214
215 #[test]
216 fn test_like_match_all() -> VortexResult<()> {
217 let fsst = make_fsst(
218 &[Some("abc"), Some(""), Some("xyz")],
219 Nullability::NonNullable,
220 );
221 let result = like(fsst, "%")?;
222 assert_arrays_eq!(&result, &BoolArray::from_iter([true, true, true]));
223 Ok(())
224 }
225
226 #[test]
230 fn test_like_prefix_kernel_handles() -> VortexResult<()> {
231 let fsst = make_fsst(
232 &[Some("http://a.com"), Some("ftp://b.com")],
233 Nullability::NonNullable,
234 );
235 let pattern = ConstantArray::new("http%", fsst.len()).into_array();
236 let mut ctx = SESSION.create_execution_ctx();
237
238 let fsst = fsst.as_view();
239 let result = <FSST as LikeKernel>::like(fsst, &pattern, LikeOptions::default(), &mut ctx)?;
240 assert!(result.is_some(), "FSST LikeKernel should handle prefix%");
241 assert_arrays_eq!(result.unwrap(), BoolArray::from_iter([true, false]));
242 Ok(())
243 }
244
245 #[test]
247 fn test_like_contains_kernel_handles() -> VortexResult<()> {
248 let fsst = make_fsst(
249 &[Some("hello world"), Some("goodbye")],
250 Nullability::NonNullable,
251 );
252 let pattern = ConstantArray::new("%world%", fsst.len()).into_array();
253 let mut ctx = SESSION.create_execution_ctx();
254
255 let fsst = fsst.as_view();
256 let result = <FSST as LikeKernel>::like(fsst, &pattern, LikeOptions::default(), &mut ctx)?;
257 assert!(result.is_some(), "FSST LikeKernel should handle %needle%");
258 assert_arrays_eq!(result.unwrap(), BoolArray::from_iter([true, false]));
259 Ok(())
260 }
261
262 #[test]
264 fn test_like_kernel_falls_back_for_complex_pattern() -> VortexResult<()> {
265 let fsst = make_fsst(&[Some("abc"), Some("def")], Nullability::NonNullable);
266 let mut ctx = SESSION.create_execution_ctx();
267
268 let pattern = ConstantArray::new("a_c", fsst.len()).into_array();
270 let fsst_v = fsst.as_view();
271 let result =
272 <FSST as LikeKernel>::like(fsst_v, &pattern, LikeOptions::default(), &mut ctx)?;
273 assert!(result.is_none(), "underscore pattern should fall back");
274
275 let pattern = ConstantArray::new("abc%", fsst.len()).into_array();
277 let opts = LikeOptions {
278 negated: false,
279 case_insensitive: true,
280 };
281 let result = <FSST as LikeKernel>::like(fsst_v, &pattern, opts, &mut ctx)?;
282 assert!(result.is_none(), "ilike should fall back");
283
284 Ok(())
285 }
286
287 #[test]
288 fn test_like_long_prefix_handled_by_flat_dfa() -> VortexResult<()> {
289 let fsst = make_fsst(
290 &[
291 Some("abcdefghijklmn-tail"),
292 Some("abcdefghijklmx-tail"),
293 Some("abcdefghijklmn"),
294 ],
295 Nullability::NonNullable,
296 );
297 let pattern = "abcdefghijklmn%";
298
299 let fsst = fsst.as_view();
300 let direct = <FSST as LikeKernel>::like(
301 fsst,
302 &ConstantArray::new(pattern, fsst.len()).into_array(),
303 LikeOptions::default(),
304 &mut SESSION.create_execution_ctx(),
305 )?;
306 assert!(
307 direct.is_some(),
308 "14-byte prefixes are now handled by the flat prefix DFA"
309 );
310 assert_arrays_eq!(direct.unwrap(), BoolArray::from_iter([true, false, true]));
311 Ok(())
312 }
313
314 #[test]
315 fn test_like_long_contains_falls_back_but_still_matches() -> VortexResult<()> {
316 let needle = "a".repeat(255);
317 let matching = format!("xx{needle}yy");
318 let non_matching = format!("xx{}byy", "a".repeat(254));
319 let exact = needle.clone();
320 let pattern = format!("%{needle}%");
321
322 let fsst = make_fsst(
323 &[Some(&matching), Some(&non_matching), Some(&exact)],
324 Nullability::NonNullable,
325 );
326
327 let fsst_v = fsst.as_view();
328 let direct = <FSST as LikeKernel>::like(
329 fsst_v,
330 &ConstantArray::new(pattern.as_str(), fsst.len()).into_array(),
331 LikeOptions::default(),
332 &mut SESSION.create_execution_ctx(),
333 )?;
334 assert!(
335 direct.is_none(),
336 "contains needles longer than 254 bytes exceed the DFA's u8 state space"
337 );
338
339 let result = like(fsst, &pattern)?;
340 assert_arrays_eq!(&result, &BoolArray::from_iter([true, false, true]));
341 Ok(())
342 }
343
344 #[test]
345 fn test_like_contains_len_254_kernel_handles() -> VortexResult<()> {
346 let needle = "a".repeat(254);
347 let matching = format!("xx{needle}yy");
348 let non_matching = format!("xx{}byy", "a".repeat(253));
349 let pattern = format!("%{needle}%");
350
351 let fsst = make_fsst(
352 &[Some(&matching), Some(&non_matching), Some(needle.as_str())],
353 Nullability::NonNullable,
354 );
355
356 let fsst = fsst.as_view();
357 let direct = <FSST as LikeKernel>::like(
358 fsst,
359 &ConstantArray::new(pattern.as_str(), fsst.len()).into_array(),
360 LikeOptions::default(),
361 &mut SESSION.create_execution_ctx(),
362 )?;
363 assert!(
364 direct.is_some(),
365 "254-byte contains needle should stay on the DFA path"
366 );
367 assert_arrays_eq!(direct.unwrap(), BoolArray::from_iter([true, false, true]));
368 Ok(())
369 }
370}