Skip to main content

arrow_string/
like.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! String predicate kernels for Arrow arrays.
19//!
20//! Provides SQL `LIKE`/`ILIKE` kernels as well as related
21//! string predicates such as `contains`, `starts_with`, `ends_with`, and
22//! ASCII case-insensitive equality.
23
24use crate::predicate::Predicate;
25
26use arrow_array::cast::AsArray;
27use arrow_array::*;
28use arrow_schema::*;
29use arrow_select::take::take;
30
31use std::sync::Arc;
32
33use crate::binary_like::binary_apply;
34pub use arrow_array::StringArrayType;
35
36#[derive(Debug)]
37pub(crate) enum Op {
38    Like(bool),
39    ILike(bool),
40    Contains,
41    EqIgnoreAsciiCase,
42    StartsWith,
43    EndsWith,
44}
45
46impl std::fmt::Display for Op {
47    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48        match self {
49            Op::Like(false) => write!(f, "LIKE"),
50            Op::Like(true) => write!(f, "NLIKE"),
51            Op::ILike(false) => write!(f, "ILIKE"),
52            Op::ILike(true) => write!(f, "NILIKE"),
53            Op::Contains => write!(f, "CONTAINS"),
54            Op::EqIgnoreAsciiCase => write!(f, "EQ_IGNORE_ASCII_CASE"),
55            Op::StartsWith => write!(f, "STARTS_WITH"),
56            Op::EndsWith => write!(f, "ENDS_WITH"),
57        }
58    }
59}
60
61/// Perform SQL `left LIKE right`
62///
63/// # Supported DataTypes
64///
65/// `left` and `right` must be the same type, and one of
66/// - Utf8
67/// - LargeUtf8
68/// - Utf8View
69///
70/// There are two wildcards supported with the LIKE operator:
71///
72/// 1. `%` - The percent sign represents zero, one, or multiple characters
73/// 2. `_` - The underscore represents a single character
74///
75/// Example
76/// ```
77/// # use arrow_array::{StringArray, BooleanArray};
78/// # use arrow_string::like::like;
79/// let strings = StringArray::from(vec!["Arrow", "Arrow", "Arrow", "Ar"]);
80/// let patterns = StringArray::from(vec!["A%", "B%", "A.", "A_"]);
81///
82/// let result = like(&strings, &patterns).unwrap();
83/// assert_eq!(result, BooleanArray::from(vec![true, false, false, true]));
84/// ```
85pub fn like(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
86    like_op(Op::Like(false), left, right)
87}
88
89/// Perform SQL `left ILIKE right`
90///
91/// # Notes
92/// - This is a case-insensitive version of [`like`]
93/// - See the documentation on [`like`] for more details
94/// - Implements loose matching as defined by the Unicode standard. For example,
95///   the `ff` ligature is not equivalent to `FF` and `ß` is not equivalent to `SS`
96pub fn ilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
97    like_op(Op::ILike(false), left, right)
98}
99
100/// Perform SQL `left NOT LIKE right`
101///
102/// # Notes
103/// - This is a negative of [`like`]
104/// - See the documentation on [`like`] for more details
105pub fn nlike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
106    like_op(Op::Like(true), left, right)
107}
108
109/// Perform SQL `left NOT ILIKE right`
110///
111/// # Notes
112/// - This is a negative of [`like`]
113/// - See the documentation on [`ilike`] for more details
114pub fn nilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
115    like_op(Op::ILike(true), left, right)
116}
117
118/// Perform SQL `STARTSWITH(left, right)`
119///
120/// # Supported DataTypes
121///
122/// `left` and `right` must be the same type, and one of
123/// - Utf8
124/// - LargeUtf8
125/// - Utf8View
126/// - Binary
127/// - LargeBinary
128/// - BinaryView
129///
130/// # Example
131/// ```
132/// # use arrow_array::{StringArray, BooleanArray};
133/// # use arrow_string::like::starts_with;
134/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs", "arrow-rs", "Parquet"]);
135/// let patterns = StringArray::from(vec!["arr", "arrow", "arrow-cpp", "p"]);
136///
137/// let result = starts_with(&strings, &patterns).unwrap();
138/// assert_eq!(result, BooleanArray::from(vec![true, true, false, false]));
139/// ```
140pub fn starts_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
141    like_op(Op::StartsWith, left, right)
142}
143
144/// Perform SQL `ENDSWITH(left, right)`
145///
146/// # Supported DataTypes
147///
148/// `left` and `right` must be the same type, and one of
149/// - Utf8
150/// - LargeUtf8
151/// - Utf8View
152/// - Binary
153/// - LargeBinary
154/// - BinaryView
155///
156/// # Example
157/// ```
158/// # use arrow_array::{StringArray, BooleanArray};
159/// # use arrow_string::like::ends_with;
160/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs",  "Parquet"]);
161/// let patterns = StringArray::from(vec!["arr", "-rs", "t"]);
162///
163/// let result = ends_with(&strings, &patterns).unwrap();
164/// assert_eq!(result, BooleanArray::from(vec![false, true, true]));
165/// ```
166pub fn ends_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
167    like_op(Op::EndsWith, left, right)
168}
169
170/// Perform SQL `CONTAINS(left, right)`
171///
172/// # Supported DataTypes
173///
174/// `left` and `right` must be the same type, and one of
175/// - Utf8
176/// - LargeUtf8
177/// - Utf8View
178/// - Binary
179/// - LargeBinary
180/// - BinaryView
181///
182/// # Example
183/// ```
184/// # use arrow_array::{StringArray, BooleanArray};
185/// # use arrow_string::like::contains;
186/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs", "arrow-rs", "Parquet"]);
187/// let patterns = StringArray::from(vec!["arr", "-rs", "arrow-cpp", "X"]);
188///
189/// let result = contains(&strings, &patterns).unwrap();
190/// assert_eq!(result, BooleanArray::from(vec![true, true, false, false]));
191/// ```
192pub fn contains(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
193    like_op(Op::Contains, left, right)
194}
195
196/// Perform equality check on two arrays using an ASCII case-insensitive match.
197///
198/// `left` and `right` must be the same type, and one of
199/// - Utf8
200/// - LargeUtf8
201/// - Utf8View
202///
203/// # Example
204/// ```
205/// # use arrow_array::{StringArray, BooleanArray};
206/// # use arrow_string::like::eq_ignore_ascii_case;
207/// let strings = StringArray::from(vec!["arrow", "rs", "arrow-rS", "Parquet"]);
208/// let patterns = StringArray::from(vec!["ARROW", "rS", "ARROW-rs", "arrow"]);
209///
210/// let result = eq_ignore_ascii_case(&strings, &patterns).unwrap();
211/// assert_eq!(result, BooleanArray::from(vec![true, true, true, false]));
212/// ```
213pub fn eq_ignore_ascii_case(
214    left: &dyn Datum,
215    right: &dyn Datum,
216) -> Result<BooleanArray, ArrowError> {
217    like_op(Op::EqIgnoreAsciiCase, left, right)
218}
219
220fn like_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, ArrowError> {
221    use arrow_schema::DataType::*;
222    let (l, l_s) = lhs.get();
223    let (r, r_s) = rhs.get();
224
225    if l.len() != r.len() && !l_s && !r_s {
226        return Err(ArrowError::InvalidArgumentError(format!(
227            "Cannot compare arrays of different lengths, got {} vs {}",
228            l.len(),
229            r.len()
230        )));
231    }
232
233    let l_v = l.as_any_dictionary_opt();
234    let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l);
235
236    let r_v = r.as_any_dictionary_opt();
237    let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r);
238
239    match (l.data_type(), r.data_type()) {
240        (Utf8, Utf8) => string_apply::<&GenericStringArray<i32>>(
241            op,
242            l.as_string(),
243            l_s,
244            l_v,
245            r.as_string(),
246            r_s,
247            r_v,
248        ),
249        (LargeUtf8, LargeUtf8) => string_apply::<&GenericStringArray<i64>>(
250            op,
251            l.as_string(),
252            l_s,
253            l_v,
254            r.as_string(),
255            r_s,
256            r_v,
257        ),
258        (Utf8View, Utf8View) => string_apply::<&StringViewArray>(
259            op,
260            l.as_string_view(),
261            l_s,
262            l_v,
263            r.as_string_view(),
264            r_s,
265            r_v,
266        ),
267        (Binary, Binary) => binary_apply::<&GenericBinaryArray<i32>>(
268            op.try_into()?,
269            l.as_binary(),
270            l_s,
271            l_v,
272            r.as_binary(),
273            r_s,
274            r_v,
275        ),
276        (LargeBinary, LargeBinary) => binary_apply::<&GenericBinaryArray<i64>>(
277            op.try_into()?,
278            l.as_binary(),
279            l_s,
280            l_v,
281            r.as_binary(),
282            r_s,
283            r_v,
284        ),
285        (BinaryView, BinaryView) => binary_apply::<&BinaryViewArray>(
286            op.try_into()?,
287            l.as_binary_view(),
288            l_s,
289            l_v,
290            r.as_binary_view(),
291            r_s,
292            r_v,
293        ),
294        (l_t, r_t) => Err(ArrowError::InvalidArgumentError(format!(
295            "Invalid string/binary operation: {l_t} {op} {r_t}"
296        ))),
297    }
298}
299
300fn string_apply<'a, T: StringArrayType<'a> + 'a>(
301    op: Op,
302    l: T,
303    l_s: bool,
304    l_v: Option<&'a dyn AnyDictionaryArray>,
305    r: T,
306    r_s: bool,
307    r_v: Option<&'a dyn AnyDictionaryArray>,
308) -> Result<BooleanArray, ArrowError> {
309    let l_len = l_v.map(|l| l.len()).unwrap_or(l.len());
310    if r_s {
311        let idx = match r_v {
312            Some(dict) if dict.null_count() != 0 => return Ok(BooleanArray::new_null(l_len)),
313            Some(dict) => dict.normalized_keys()[0],
314            None => 0,
315        };
316        if r.is_null(idx) {
317            return Ok(BooleanArray::new_null(l_len));
318        }
319        op_scalar::<T>(op, l, l_v, r.value(idx))
320    } else {
321        match (l_s, l_v, r_v) {
322            (true, None, None) => {
323                let v = l.is_valid(0).then(|| l.value(0));
324                op_binary(op, std::iter::repeat(v), r.iter())
325            }
326            (true, Some(l_v), None) => {
327                let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
328                let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
329                op_binary(op, std::iter::repeat(v), r.iter())
330            }
331            (true, None, Some(r_v)) => {
332                let v = l.is_valid(0).then(|| l.value(0));
333                op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
334            }
335            (true, Some(l_v), Some(r_v)) => {
336                let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
337                let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
338                op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
339            }
340            (false, None, None) => op_binary(op, l.iter(), r.iter()),
341            (false, Some(l_v), None) => op_binary(op, vectored_iter(l, l_v), r.iter()),
342            (false, None, Some(r_v)) => op_binary(op, l.iter(), vectored_iter(r, r_v)),
343            (false, Some(l_v), Some(r_v)) => {
344                op_binary(op, vectored_iter(l, l_v), vectored_iter(r, r_v))
345            }
346        }
347    }
348}
349
350#[inline(never)]
351fn op_scalar<'a, T: StringArrayType<'a>>(
352    op: Op,
353    l: T,
354    l_v: Option<&dyn AnyDictionaryArray>,
355    r: &str,
356) -> Result<BooleanArray, ArrowError> {
357    let r = match op {
358        Op::Like(neg) => Predicate::like(r)?.evaluate_array(l, neg),
359        Op::ILike(neg) => Predicate::ilike(r, l.is_ascii())?.evaluate_array(l, neg),
360        Op::Contains => Predicate::contains(r).evaluate_array(l, false),
361        Op::EqIgnoreAsciiCase => Predicate::IEqAscii(r).evaluate_array(l, false),
362        Op::StartsWith => Predicate::StartsWith(r).evaluate_array(l, false),
363        Op::EndsWith => Predicate::EndsWith(r).evaluate_array(l, false),
364    };
365
366    Ok(match l_v {
367        Some(v) => take(&r, v.keys(), None)?.as_boolean().clone(),
368        None => r,
369    })
370}
371
372fn vectored_iter<'a, T: StringArrayType<'a> + 'a>(
373    a: T,
374    a_v: &'a dyn AnyDictionaryArray,
375) -> impl Iterator<Item = Option<&'a str>> + 'a {
376    let nulls = a_v.nulls();
377    let keys = a_v.normalized_keys();
378    keys.into_iter().enumerate().map(move |(idx, key)| {
379        if nulls.map(|n| n.is_null(idx)).unwrap_or_default() || a.is_null(key) {
380            return None;
381        }
382        Some(a.value(key))
383    })
384}
385
386#[inline(never)]
387fn op_binary<'a>(
388    op: Op,
389    l: impl Iterator<Item = Option<&'a str>>,
390    r: impl Iterator<Item = Option<&'a str>>,
391) -> Result<BooleanArray, ArrowError> {
392    match op {
393        Op::Like(neg) => binary_predicate(l, r, neg, Predicate::like),
394        Op::ILike(neg) => binary_predicate(l, r, neg, |s| Predicate::ilike(s, false)),
395        Op::Contains => Ok(l.zip(r).map(|(l, r)| Some(str_contains(l?, r?))).collect()),
396        Op::EqIgnoreAsciiCase => Ok(l
397            .zip(r)
398            .map(|(l, r)| Some(Predicate::IEqAscii(l?).evaluate(r?)))
399            .collect()),
400        Op::StartsWith => Ok(l
401            .zip(r)
402            .map(|(l, r)| Some(Predicate::StartsWith(r?).evaluate(l?)))
403            .collect()),
404        Op::EndsWith => Ok(l
405            .zip(r)
406            .map(|(l, r)| Some(Predicate::EndsWith(r?).evaluate(l?)))
407            .collect()),
408    }
409}
410
411fn str_contains(haystack: &str, needle: &str) -> bool {
412    memchr::memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some()
413}
414
415fn binary_predicate<'a>(
416    l: impl Iterator<Item = Option<&'a str>>,
417    r: impl Iterator<Item = Option<&'a str>>,
418    neg: bool,
419    f: impl Fn(&'a str) -> Result<Predicate<'a>, ArrowError>,
420) -> Result<BooleanArray, ArrowError> {
421    let mut previous = None;
422    l.zip(r)
423        .map(|(l, r)| match (l, r) {
424            (Some(l), Some(r)) => {
425                let p: &Predicate = match previous {
426                    Some((expr, ref predicate)) if expr == r => predicate,
427                    _ => &previous.insert((r, f(r)?)).1,
428                };
429                Ok(Some(p.evaluate(l) != neg))
430            }
431            _ => Ok(None),
432        })
433        .collect()
434}
435
436// Deprecated kernels
437
438fn make_scalar(data_type: &DataType, scalar: &str) -> Result<ArrayRef, ArrowError> {
439    match data_type {
440        DataType::Utf8 => Ok(Arc::new(StringArray::from_iter_values([scalar]))),
441        DataType::LargeUtf8 => Ok(Arc::new(LargeStringArray::from_iter_values([scalar]))),
442        DataType::Dictionary(_, v) => make_scalar(v.as_ref(), scalar),
443        d => Err(ArrowError::InvalidArgumentError(format!(
444            "Unsupported string scalar data type {d:?}",
445        ))),
446    }
447}
448
449macro_rules! legacy_kernels {
450    ($fn_datum:ident, $fn_array:ident, $fn_scalar:ident, $fn_array_dyn:ident, $fn_scalar_dyn:ident, $deprecation:expr) => {
451        #[doc(hidden)]
452        #[deprecated(note = $deprecation)]
453        pub fn $fn_array<O: OffsetSizeTrait>(
454            left: &GenericStringArray<O>,
455            right: &GenericStringArray<O>,
456        ) -> Result<BooleanArray, ArrowError> {
457            $fn_datum(left, right)
458        }
459
460        #[doc(hidden)]
461        #[deprecated(note = $deprecation)]
462        pub fn $fn_scalar<O: OffsetSizeTrait>(
463            left: &GenericStringArray<O>,
464            right: &str,
465        ) -> Result<BooleanArray, ArrowError> {
466            let scalar = GenericStringArray::<O>::from_iter_values([right]);
467            $fn_datum(left, &Scalar::new(&scalar))
468        }
469
470        #[doc(hidden)]
471        #[deprecated(note = $deprecation)]
472        pub fn $fn_array_dyn(
473            left: &dyn Array,
474            right: &dyn Array,
475        ) -> Result<BooleanArray, ArrowError> {
476            $fn_datum(&left, &right)
477        }
478
479        #[doc(hidden)]
480        #[deprecated(note = $deprecation)]
481        pub fn $fn_scalar_dyn(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
482            let scalar = make_scalar(left.data_type(), right)?;
483            $fn_datum(&left, &Scalar::new(&scalar))
484        }
485    };
486}
487
488legacy_kernels!(
489    like,
490    like_utf8,
491    like_utf8_scalar,
492    like_dyn,
493    like_utf8_scalar_dyn,
494    "Use arrow_string::like::like"
495);
496legacy_kernels!(
497    ilike,
498    ilike_utf8,
499    ilike_utf8_scalar,
500    ilike_dyn,
501    ilike_utf8_scalar_dyn,
502    "Use arrow_string::like::ilike"
503);
504legacy_kernels!(
505    nlike,
506    nlike_utf8,
507    nlike_utf8_scalar,
508    nlike_dyn,
509    nlike_utf8_scalar_dyn,
510    "Use arrow_string::like::nlike"
511);
512legacy_kernels!(
513    nilike,
514    nilike_utf8,
515    nilike_utf8_scalar,
516    nilike_dyn,
517    nilike_utf8_scalar_dyn,
518    "Use arrow_string::like::nilike"
519);
520legacy_kernels!(
521    contains,
522    contains_utf8,
523    contains_utf8_scalar,
524    contains_dyn,
525    contains_utf8_scalar_dyn,
526    "Use arrow_string::like::contains"
527);
528legacy_kernels!(
529    starts_with,
530    starts_with_utf8,
531    starts_with_utf8_scalar,
532    starts_with_dyn,
533    starts_with_utf8_scalar_dyn,
534    "Use arrow_string::like::starts_with"
535);
536
537legacy_kernels!(
538    ends_with,
539    ends_with_utf8,
540    ends_with_utf8_scalar,
541    ends_with_dyn,
542    ends_with_utf8_scalar_dyn,
543    "Use arrow_string::like::ends_with"
544);
545
546#[cfg(test)]
547#[allow(deprecated)]
548mod tests {
549    use super::*;
550    use arrow_array::builder::BinaryDictionaryBuilder;
551    use arrow_array::types::{ArrowDictionaryKeyType, Int8Type};
552    use std::iter::zip;
553
554    fn convert_binary_iterator_to_binary_dictionary<
555        'a,
556        K: ArrowDictionaryKeyType,
557        I: IntoIterator<Item = &'a [u8]>,
558    >(
559        iter: I,
560    ) -> DictionaryArray<K> {
561        let it = iter.into_iter();
562        let (lower, _) = it.size_hint();
563        let mut builder = BinaryDictionaryBuilder::with_capacity(lower, 256, 1024);
564        it.for_each(|i| {
565            builder
566                .append(i)
567                .expect("Unable to append a value to a dictionary array.");
568        });
569
570        builder.finish()
571    }
572
573    /// Applying `op(left, right)`, both sides are arrays
574    /// The macro tests four types of array implementations:
575    /// - `StringArray`
576    /// - `LargeStringArray`
577    /// - `StringViewArray`
578    /// - `DictionaryArray`
579    macro_rules! test_utf8 {
580        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
581            #[test]
582            fn $test_name() {
583                let expected = BooleanArray::from($expected);
584
585                let left = StringArray::from($left);
586                let right = StringArray::from($right);
587                let res = $op(&left, &right).unwrap();
588                assert_eq!(res, expected);
589
590                let left = LargeStringArray::from($left);
591                let right = LargeStringArray::from($right);
592                let res = $op(&left, &right).unwrap();
593                assert_eq!(res, expected);
594
595                let left = StringViewArray::from($left);
596                let right = StringViewArray::from($right);
597                let res = $op(&left, &right).unwrap();
598                assert_eq!(res, expected);
599
600                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
601                let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
602                let res = $op(&left, &right).unwrap();
603                assert_eq!(res, expected);
604            }
605        };
606    }
607
608    /// Applying `op(left, right)`, both sides are arrays
609    /// The macro tests four types of array implementations:
610    /// - `StringArray`
611    /// - `LargeStringArray`
612    /// - `StringViewArray`
613    /// - `DictionaryArray`
614    macro_rules! test_utf8_and_binary {
615        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
616            #[test]
617            fn $test_name() {
618                let expected = BooleanArray::from($expected);
619
620                let left = StringArray::from($left);
621                let right = StringArray::from($right);
622                let res = $op(&left, &right).unwrap();
623                assert_eq!(res, expected);
624
625                let left = LargeStringArray::from($left);
626                let right = LargeStringArray::from($right);
627                let res = $op(&left, &right).unwrap();
628                assert_eq!(res, expected);
629
630                let left = StringViewArray::from($left);
631                let right = StringViewArray::from($right);
632                let res = $op(&left, &right).unwrap();
633                assert_eq!(res, expected);
634
635                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
636                let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
637                let res = $op(&left, &right).unwrap();
638                assert_eq!(res, expected);
639
640                let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
641                let right_binary = $right.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
642
643                let left = BinaryArray::from(left_binary.clone());
644                let right = BinaryArray::from(right_binary.clone());
645                let res = $op(&left, &right).unwrap();
646                assert_eq!(res, expected);
647
648                let left = LargeBinaryArray::from(left_binary.clone());
649                let right = LargeBinaryArray::from(right_binary.clone());
650                let res = $op(&left, &right).unwrap();
651                assert_eq!(res, expected);
652
653                let left: DictionaryArray<Int8Type> =
654                    convert_binary_iterator_to_binary_dictionary(left_binary);
655                let right: DictionaryArray<Int8Type> =
656                    convert_binary_iterator_to_binary_dictionary(right_binary);
657                let res = $op(&left, &right).unwrap();
658                assert_eq!(res, expected);
659            }
660        };
661    }
662
663    /// Applying `op(left, right)`, left side is array, right side is scalar
664    /// The macro tests four types of array implementations:
665    /// - `StringArray`
666    /// - `LargeStringArray`
667    /// - `StringViewArray`
668    /// - `DictionaryArray`
669    macro_rules! test_utf8_scalar {
670        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
671            #[test]
672            fn $test_name() {
673                let expected = BooleanArray::from($expected);
674
675                let left = StringArray::from($left);
676                let right = StringArray::from_iter_values([$right]);
677                let res = $op(&left, &Scalar::new(&right)).unwrap();
678                assert_eq!(res, expected);
679
680                let left = LargeStringArray::from($left);
681                let right = LargeStringArray::from_iter_values([$right]);
682                let res = $op(&left, &Scalar::new(&right)).unwrap();
683                assert_eq!(res, expected);
684
685                let left = StringViewArray::from($left);
686                let right = StringViewArray::from_iter_values([$right]);
687                let res = $op(&left, &Scalar::new(&right)).unwrap();
688                assert_eq!(res, expected);
689
690                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
691                let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
692                let res = $op(&left, &Scalar::new(&right)).unwrap();
693                assert_eq!(res, expected);
694            }
695        };
696    }
697
698    /// Applying `op(left, right)`, left side is array, right side is scalar
699    /// The macro tests four types of array implementations:
700    /// - `StringArray`
701    /// - `LargeStringArray`
702    /// - `StringViewArray`
703    /// - `DictionaryArray`
704    macro_rules! test_utf8_and_binary_scalar {
705        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
706            #[test]
707            fn $test_name() {
708                let expected = BooleanArray::from($expected);
709
710                let left = StringArray::from($left);
711                let right = StringArray::from_iter_values([$right]);
712                let res = $op(&left, &Scalar::new(&right)).unwrap();
713                assert_eq!(res, expected);
714
715                let left = LargeStringArray::from($left);
716                let right = LargeStringArray::from_iter_values([$right]);
717                let res = $op(&left, &Scalar::new(&right)).unwrap();
718                assert_eq!(res, expected);
719
720                let left = StringViewArray::from($left);
721                let right = StringViewArray::from_iter_values([$right]);
722                let res = $op(&left, &Scalar::new(&right)).unwrap();
723                assert_eq!(res, expected);
724
725                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
726                let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
727                let res = $op(&left, &Scalar::new(&right)).unwrap();
728                assert_eq!(res, expected);
729
730                let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
731                let right_binary = $right.as_bytes();
732
733                let left = BinaryArray::from(left_binary.clone());
734                let right = BinaryArray::from_iter_values([right_binary]);
735                let res = $op(&left, &Scalar::new(&right)).unwrap();
736                assert_eq!(res, expected);
737
738                let left = LargeBinaryArray::from(left_binary.clone());
739                let right = LargeBinaryArray::from_iter_values([right_binary]);
740                let res = $op(&left, &Scalar::new(&right)).unwrap();
741                assert_eq!(res, expected);
742
743                let left: DictionaryArray<Int8Type> =
744                    convert_binary_iterator_to_binary_dictionary(left_binary);
745                let right: DictionaryArray<Int8Type> =
746                    convert_binary_iterator_to_binary_dictionary([right_binary]);
747                let res = $op(&left, &Scalar::new(&right)).unwrap();
748                assert_eq!(res, expected);
749            }
750        };
751    }
752
753    test_utf8!(
754        test_utf8_array_like,
755        vec![
756            "arrow",
757            "arrow_long_string_more than 12 bytes",
758            "arrow",
759            "arrow",
760            "arrow",
761            "arrows",
762            "arrow",
763            "arrow"
764        ],
765        vec![
766            "arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_", ".*"
767        ],
768        like,
769        vec![true, true, true, false, false, true, false, false]
770    );
771
772    test_utf8_scalar!(
773        test_utf8_array_like_scalar_escape_testing,
774        vec![
775            "varchar(255)",
776            "int(255)longer than 12 bytes",
777            "varchar",
778            "int"
779        ],
780        "%(%)%",
781        like,
782        vec![true, true, false, false]
783    );
784
785    test_utf8_scalar!(
786        test_utf8_array_like_scalar_escape_regex,
787        vec![".*", "a", "*"],
788        ".*",
789        like,
790        vec![true, false, false]
791    );
792
793    test_utf8_scalar!(
794        test_utf8_array_like_scalar_escape_regex_dot,
795        vec![".", "a", "*"],
796        ".",
797        like,
798        vec![true, false, false]
799    );
800
801    test_utf8_scalar!(
802        test_utf8_array_like_scalar,
803        vec![
804            "arrow",
805            "parquet",
806            "datafusion",
807            "flight",
808            "long string arrow test 12 bytes"
809        ],
810        "%ar%",
811        like,
812        vec![true, true, false, false, true]
813    );
814
815    test_utf8_scalar!(
816        test_utf8_array_like_scalar_start,
817        vec![
818            "arrow",
819            "parrow",
820            "arrows",
821            "arr",
822            "arrow long string longer than 12 bytes"
823        ],
824        "arrow%",
825        like,
826        vec![true, false, true, false, true]
827    );
828
829    // Replicates `test_utf8_array_like_scalar_start` `test_utf8_array_like_scalar_dyn_start` to
830    // demonstrate that `SQL STARTSWITH` works as expected.
831    test_utf8_and_binary_scalar!(
832        test_utf8_and_binary_array_starts_with_scalar_start,
833        vec![
834            "arrow",
835            "parrow",
836            "arrows",
837            "arr",
838            "arrow long string longer than 12 bytes"
839        ],
840        "arrow",
841        starts_with,
842        vec![true, false, true, false, true]
843    );
844
845    test_utf8_and_binary!(
846        test_utf8_and_binary_array_starts_with,
847        vec![
848            "arrow",
849            "arrow_long_string_more than 12 bytes",
850            "arrow",
851            "arrow",
852            "arrow",
853            "arrows",
854            "arrow",
855            "arrow"
856        ],
857        vec![
858            "arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"
859        ],
860        starts_with,
861        vec![true, false, false, false, true, false, false, false]
862    );
863
864    test_utf8_scalar!(
865        test_utf8_array_like_scalar_end,
866        vec![
867            "arrow",
868            "parrow",
869            "arrows",
870            "arr",
871            "arrow long string longer than 12 bytes"
872        ],
873        "%arrow",
874        like,
875        vec![true, true, false, false, false]
876    );
877
878    // Replicates `test_utf8_array_like_scalar_end` `test_utf8_array_like_scalar_dyn_end` to
879    // demonstrate that `SQL ENDSWITH` works as expected.
880    test_utf8_and_binary_scalar!(
881        test_utf8_and_binary_array_ends_with_scalar_end,
882        vec![
883            "arrow",
884            "parrow",
885            "arrows",
886            "arr",
887            "arrow long string longer than 12 bytes"
888        ],
889        "arrow",
890        ends_with,
891        vec![true, true, false, false, false]
892    );
893
894    test_utf8_and_binary!(
895        test_utf8_and_binary_array_ends_with,
896        vec![
897            "arrow",
898            "arrow_long_string_more than 12 bytes",
899            "arrow",
900            "arrow",
901            "arrow",
902            "arrows",
903            "arrow",
904            "arrow"
905        ],
906        vec![
907            "arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"
908        ],
909        ends_with,
910        vec![true, false, true, false, false, false, false, false]
911    );
912
913    test_utf8_scalar!(
914        test_utf8_array_like_scalar_equals,
915        vec![
916            "arrow",
917            "parrow",
918            "arrows",
919            "arr",
920            "arrow long string longer than 12 bytes"
921        ],
922        "arrow",
923        like,
924        vec![true, false, false, false, false]
925    );
926
927    test_utf8_scalar!(
928        test_utf8_array_like_scalar_one,
929        vec![
930            "arrow",
931            "arrows",
932            "parrow",
933            "arr",
934            "arrow long string longer than 12 bytes"
935        ],
936        "arrow_",
937        like,
938        vec![false, true, false, false, false]
939    );
940
941    test_utf8_scalar!(
942        test_utf8_scalar_like_escape,
943        vec!["a%", "a\\x", "arrow long string longer than 12 bytes"],
944        "a\\%",
945        like,
946        vec![true, false, false]
947    );
948
949    test_utf8_scalar!(
950        test_utf8_scalar_like_escape_contains,
951        vec!["ba%", "ba\\x", "arrow long string longer than 12 bytes"],
952        "%a\\%",
953        like,
954        vec![true, false, false]
955    );
956
957    test_utf8!(
958        test_utf8_scalar_ilike_regex,
959        vec!["%%%"],
960        vec![r"\%_\%"],
961        ilike,
962        vec![true]
963    );
964
965    test_utf8!(
966        test_utf8_array_nlike,
967        vec![
968            "arrow",
969            "arrow",
970            "arrow long string longer than 12 bytes",
971            "arrow",
972            "arrow",
973            "arrows",
974            "arrow"
975        ],
976        vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
977        nlike,
978        vec![false, false, false, true, true, false, true]
979    );
980
981    test_utf8_scalar!(
982        test_utf8_array_nlike_escape_testing,
983        vec![
984            "varchar(255)",
985            "int(255) arrow long string longer than 12 bytes",
986            "varchar",
987            "int"
988        ],
989        "%(%)%",
990        nlike,
991        vec![false, false, true, true]
992    );
993
994    test_utf8_scalar!(
995        test_utf8_array_nlike_scalar_escape_regex,
996        vec![".*", "a", "*"],
997        ".*",
998        nlike,
999        vec![false, true, true]
1000    );
1001
1002    test_utf8_scalar!(
1003        test_utf8_array_nlike_scalar_escape_regex_dot,
1004        vec![".", "a", "*"],
1005        ".",
1006        nlike,
1007        vec![false, true, true]
1008    );
1009    test_utf8_scalar!(
1010        test_utf8_array_nlike_scalar,
1011        vec![
1012            "arrow",
1013            "parquet",
1014            "datafusion",
1015            "flight",
1016            "arrow long string longer than 12 bytes"
1017        ],
1018        "%ar%",
1019        nlike,
1020        vec![false, false, true, true, false]
1021    );
1022
1023    test_utf8_scalar!(
1024        test_utf8_array_nlike_scalar_start,
1025        vec![
1026            "arrow",
1027            "parrow",
1028            "arrows",
1029            "arr",
1030            "arrow long string longer than 12 bytes"
1031        ],
1032        "arrow%",
1033        nlike,
1034        vec![false, true, false, true, false]
1035    );
1036
1037    test_utf8_scalar!(
1038        test_utf8_array_nlike_scalar_end,
1039        vec![
1040            "arrow",
1041            "parrow",
1042            "arrows",
1043            "arr",
1044            "arrow long string longer than 12 bytes"
1045        ],
1046        "%arrow",
1047        nlike,
1048        vec![false, false, true, true, true]
1049    );
1050
1051    test_utf8_scalar!(
1052        test_utf8_array_nlike_scalar_equals,
1053        vec![
1054            "arrow",
1055            "parrow",
1056            "arrows",
1057            "arr",
1058            "arrow long string longer than 12 bytes"
1059        ],
1060        "arrow",
1061        nlike,
1062        vec![false, true, true, true, true]
1063    );
1064
1065    test_utf8_scalar!(
1066        test_utf8_array_nlike_scalar_one,
1067        vec![
1068            "arrow",
1069            "arrows",
1070            "parrow",
1071            "arr",
1072            "arrow long string longer than 12 bytes"
1073        ],
1074        "arrow_",
1075        nlike,
1076        vec![true, false, true, true, true]
1077    );
1078
1079    test_utf8!(
1080        test_utf8_array_ilike,
1081        vec![
1082            "arrow",
1083            "arrow",
1084            "ARROW long string longer than 12 bytes",
1085            "arrow",
1086            "ARROW",
1087            "ARROWS",
1088            "arROw"
1089        ],
1090        vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1091        ilike,
1092        vec![true, true, true, false, false, true, false]
1093    );
1094
1095    test_utf8_scalar!(
1096        ilike_utf8_scalar_escape_testing,
1097        vec![
1098            "varchar(255)",
1099            "int(255) long string longer than 12 bytes",
1100            "varchar",
1101            "int"
1102        ],
1103        "%(%)%",
1104        ilike,
1105        vec![true, true, false, false]
1106    );
1107
1108    test_utf8_scalar!(
1109        test_utf8_array_ilike_scalar,
1110        vec![
1111            "arrow",
1112            "parquet",
1113            "datafusion",
1114            "flight",
1115            "arrow long string longer than 12 bytes"
1116        ],
1117        "%AR%",
1118        ilike,
1119        vec![true, true, false, false, true]
1120    );
1121
1122    test_utf8_scalar!(
1123        test_utf8_array_ilike_scalar_start,
1124        vec![
1125            "arrow",
1126            "parrow",
1127            "arrows",
1128            "ARR",
1129            "arrow long string longer than 12 bytes"
1130        ],
1131        "aRRow%",
1132        ilike,
1133        vec![true, false, true, false, true]
1134    );
1135
1136    test_utf8_scalar!(
1137        test_utf8_array_ilike_scalar_end,
1138        vec![
1139            "ArroW",
1140            "parrow",
1141            "ARRowS",
1142            "arr",
1143            "arrow long string longer than 12 bytes"
1144        ],
1145        "%arrow",
1146        ilike,
1147        vec![true, true, false, false, false]
1148    );
1149
1150    test_utf8_scalar!(
1151        test_utf8_array_ilike_scalar_equals,
1152        vec![
1153            "arrow",
1154            "parrow",
1155            "arrows",
1156            "arr",
1157            "arrow long string longer than 12 bytes"
1158        ],
1159        "Arrow",
1160        ilike,
1161        vec![true, false, false, false, false]
1162    );
1163
1164    // We only implement loose matching
1165    test_utf8_scalar!(
1166        test_utf8_array_ilike_unicode,
1167        vec![
1168            "FFkoß",
1169            "FFkoSS",
1170            "FFkoss",
1171            "FFkoS",
1172            "FFkos",
1173            "ffkoSS",
1174            "ffkoß",
1175            "FFKoSS",
1176            "longer than 12 bytes FFKoSS"
1177        ],
1178        "FFkoSS",
1179        ilike,
1180        vec![false, true, true, false, false, false, false, true, false]
1181    );
1182
1183    test_utf8_scalar!(
1184        test_utf8_array_ilike_unicode_starts,
1185        vec![
1186            "FFkoßsdlkdf",
1187            "FFkoSSsdlkdf",
1188            "FFkosssdlkdf",
1189            "FFkoS",
1190            "FFkos",
1191            "ffkoSS",
1192            "ffkoß",
1193            "FfkosSsdfd",
1194            "FFKoSS",
1195            "longer than 12 bytes FFKoSS",
1196        ],
1197        "FFkoSS%",
1198        ilike,
1199        vec![
1200            false, true, true, false, false, false, false, true, true, false
1201        ]
1202    );
1203
1204    test_utf8_scalar!(
1205        test_utf8_array_ilike_unicode_ends,
1206        vec![
1207            "sdlkdfFFkoß",
1208            "sdlkdfFFkoSS",
1209            "sdlkdfFFkoss",
1210            "FFkoS",
1211            "FFkos",
1212            "ffkoSS",
1213            "ffkoß",
1214            "h😃klFfkosS",
1215            "FFKoSS",
1216            "longer than 12 bytes FFKoSS",
1217        ],
1218        "%FFkoSS",
1219        ilike,
1220        vec![
1221            false, true, true, false, false, false, false, true, true, true
1222        ]
1223    );
1224
1225    test_utf8_scalar!(
1226        test_utf8_array_ilike_unicode_contains,
1227        vec![
1228            "sdlkdfFkoßsdfs",
1229            "sdlkdfFkoSSdggs",
1230            "sdlkdfFkosssdsd",
1231            "FkoS",
1232            "Fkos",
1233            "ffkoSS",
1234            "ffkoß",
1235            "😃sadlksffkosSsh😃klF",
1236            "😱slgffkosSsh😃klF",
1237            "FFKoSS",
1238            "longer than 12 bytes FFKoSS",
1239        ],
1240        "%FFkoSS%",
1241        ilike,
1242        vec![
1243            false, true, true, false, false, false, false, true, true, true, true
1244        ]
1245    );
1246
1247    // Replicates `test_utf8_array_ilike_unicode_contains` and
1248    // `test_utf8_array_ilike_unicode_contains_dyn` to
1249    // demonstrate that `SQL CONTAINS` works as expected.
1250    //
1251    // NOTE: 5 of the values were changed because the original used a case insensitive `ilike`.
1252    test_utf8_and_binary_scalar!(
1253        test_utf8_and_binary_array_contains_unicode_contains,
1254        vec![
1255            "sdlkdfFkoßsdfs",
1256            "sdlkdFFkoSSdggs", // Original was case insensitive "sdlkdfFkoSSdggs"
1257            "sdlkdFFkoSSsdsd", // Original was case insensitive "sdlkdfFkosssdsd"
1258            "FkoS",
1259            "Fkos",
1260            "ffkoSS",
1261            "ffkoß",
1262            "😃sadlksFFkoSSsh😃klF", // Original was case insensitive "😃sadlksffkosSsh😃klF"
1263            "😱slgFFkoSSsh😃klF",    // Original was case insensitive "😱slgffkosSsh😃klF"
1264            "FFkoSS",                // "FFKoSS"
1265            "longer than 12 bytes FFKoSS",
1266        ],
1267        "FFkoSS",
1268        contains,
1269        vec![
1270            false, true, true, false, false, false, false, true, true, true, false
1271        ]
1272    );
1273
1274    test_utf8_scalar!(
1275        test_utf8_array_ilike_unicode_complex,
1276        vec![
1277            "sdlkdfFooßsdfs",
1278            "sdlkdfFooSSdggs",
1279            "sdlkdfFoosssdsd",
1280            "FooS",
1281            "Foos",
1282            "ffooSS",
1283            "ffooß",
1284            "😃sadlksffofsSsh😃klF",
1285            "😱slgffoesSsh😃klF",
1286            "FFKoSS",
1287            "longer than 12 bytes FFKoSS",
1288        ],
1289        "%FF__SS%",
1290        ilike,
1291        vec![
1292            false, true, true, false, false, false, false, true, true, true, true
1293        ]
1294    );
1295
1296    // 😈 is four bytes long.
1297    test_utf8_scalar!(
1298        test_uff8_array_like_multibyte,
1299        vec![
1300            "sdlkdfFooßsdfs",
1301            "sdlkdfFooSSdggs",
1302            "sdlkdfFoosssdsd",
1303            "FooS",
1304            "Foos",
1305            "ffooSS",
1306            "ffooß",
1307            "😃sadlksffofsSsh😈klF",
1308            "😱slgffoesSsh😈klF",
1309            "FFKoSS",
1310            "longer than 12 bytes FFKoSS",
1311        ],
1312        "%Ssh😈klF",
1313        like,
1314        vec![
1315            false, false, false, false, false, false, false, true, true, false, false
1316        ]
1317    );
1318
1319    test_utf8_scalar!(
1320        test_utf8_array_ilike_scalar_one,
1321        vec![
1322            "arrow",
1323            "arrows",
1324            "parrow",
1325            "arr",
1326            "arrow long string longer than 12 bytes"
1327        ],
1328        "arrow_",
1329        ilike,
1330        vec![false, true, false, false, false]
1331    );
1332
1333    test_utf8!(
1334        test_utf8_array_nilike,
1335        vec![
1336            "arrow",
1337            "arrow",
1338            "ARROW longer than 12 bytes string",
1339            "arrow",
1340            "ARROW",
1341            "ARROWS",
1342            "arROw"
1343        ],
1344        vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1345        nilike,
1346        vec![false, false, false, true, true, false, true]
1347    );
1348
1349    test_utf8_scalar!(
1350        nilike_utf8_scalar_escape_testing,
1351        vec![
1352            "varchar(255)",
1353            "int(255) longer than 12 bytes string",
1354            "varchar",
1355            "int"
1356        ],
1357        "%(%)%",
1358        nilike,
1359        vec![false, false, true, true]
1360    );
1361
1362    test_utf8_scalar!(
1363        test_utf8_array_nilike_scalar,
1364        vec![
1365            "arrow",
1366            "parquet",
1367            "datafusion",
1368            "flight",
1369            "arrow long string longer than 12 bytes"
1370        ],
1371        "%AR%",
1372        nilike,
1373        vec![false, false, true, true, false]
1374    );
1375
1376    test_utf8_scalar!(
1377        test_utf8_array_nilike_scalar_start,
1378        vec![
1379            "arrow",
1380            "parrow",
1381            "arrows",
1382            "ARR",
1383            "arrow long string longer than 12 bytes"
1384        ],
1385        "aRRow%",
1386        nilike,
1387        vec![false, true, false, true, false]
1388    );
1389
1390    test_utf8_scalar!(
1391        test_utf8_array_nilike_scalar_end,
1392        vec![
1393            "ArroW",
1394            "parrow",
1395            "ARRowS",
1396            "arr",
1397            "arrow long string longer than 12 bytes"
1398        ],
1399        "%arrow",
1400        nilike,
1401        vec![false, false, true, true, true]
1402    );
1403
1404    test_utf8_scalar!(
1405        test_utf8_array_nilike_scalar_equals,
1406        vec![
1407            "arRow",
1408            "parrow",
1409            "arrows",
1410            "arr",
1411            "arrow long string longer than 12 bytes"
1412        ],
1413        "Arrow",
1414        nilike,
1415        vec![false, true, true, true, true]
1416    );
1417
1418    test_utf8_scalar!(
1419        test_utf8_array_nilike_scalar_one,
1420        vec![
1421            "arrow",
1422            "arrows",
1423            "parrow",
1424            "arr",
1425            "arrow long string longer than 12 bytes"
1426        ],
1427        "arrow_",
1428        nilike,
1429        vec![true, false, true, true, true]
1430    );
1431
1432    test_utf8!(
1433        test_utf8_array_eq_ignore_ascii_case,
1434        vec!["arrow", "arrow", "arrow", "arrow", "parquet", "parquet"],
1435        vec!["arrow", "ARROW", "arro", "aRrOw", "arrow", "ARROW"],
1436        eq_ignore_ascii_case,
1437        vec![true, true, false, true, false, false]
1438    );
1439
1440    test_utf8_scalar!(
1441        test_utf8_array_eq_ignore_ascii_case_scalar,
1442        vec!["arrow", "aRrOW", "arro", "ARROW", "parquet", "PARQUET"],
1443        "arrow",
1444        eq_ignore_ascii_case,
1445        vec![true, true, false, true, false, false]
1446    );
1447
1448    #[test]
1449    fn test_dict_like_kernels() {
1450        let data = vec![
1451            Some("Earth"),
1452            Some("Fire"),
1453            Some("Water"),
1454            Some("Air"),
1455            None,
1456            Some("Air"),
1457            Some("bbbbb\nAir"),
1458        ];
1459
1460        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1461
1462        assert_eq!(
1463            like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1464            BooleanArray::from(vec![
1465                Some(false),
1466                Some(false),
1467                Some(false),
1468                Some(true),
1469                None,
1470                Some(true),
1471                Some(false),
1472            ]),
1473        );
1474
1475        assert_eq!(
1476            like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1477            BooleanArray::from(vec![
1478                Some(false),
1479                Some(false),
1480                Some(false),
1481                Some(true),
1482                None,
1483                Some(true),
1484                Some(false),
1485            ]),
1486        );
1487
1488        assert_eq!(
1489            like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1490            BooleanArray::from(vec![
1491                Some(false),
1492                Some(false),
1493                Some(true),
1494                Some(false),
1495                None,
1496                Some(false),
1497                Some(false),
1498            ]),
1499        );
1500
1501        assert_eq!(
1502            like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1503            BooleanArray::from(vec![
1504                Some(false),
1505                Some(false),
1506                Some(true),
1507                Some(false),
1508                None,
1509                Some(false),
1510                Some(false),
1511            ]),
1512        );
1513
1514        assert_eq!(
1515            like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1516            BooleanArray::from(vec![
1517                Some(false),
1518                Some(false),
1519                Some(true),
1520                Some(true),
1521                None,
1522                Some(true),
1523                Some(true),
1524            ]),
1525        );
1526
1527        assert_eq!(
1528            like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1529            BooleanArray::from(vec![
1530                Some(false),
1531                Some(false),
1532                Some(true),
1533                Some(true),
1534                None,
1535                Some(true),
1536                Some(true),
1537            ]),
1538        );
1539
1540        assert_eq!(
1541            like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1542            BooleanArray::from(vec![
1543                Some(false),
1544                Some(true),
1545                Some(false),
1546                Some(true),
1547                None,
1548                Some(true),
1549                Some(true),
1550            ]),
1551        );
1552
1553        assert_eq!(
1554            like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1555            BooleanArray::from(vec![
1556                Some(false),
1557                Some(true),
1558                Some(false),
1559                Some(true),
1560                None,
1561                Some(true),
1562                Some(true),
1563            ]),
1564        );
1565
1566        assert_eq!(
1567            like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1568            BooleanArray::from(vec![
1569                Some(true),
1570                Some(false),
1571                Some(true),
1572                Some(false),
1573                None,
1574                Some(false),
1575                Some(false),
1576            ]),
1577        );
1578
1579        assert_eq!(
1580            like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1581            BooleanArray::from(vec![
1582                Some(true),
1583                Some(false),
1584                Some(true),
1585                Some(false),
1586                None,
1587                Some(false),
1588                Some(false),
1589            ]),
1590        );
1591    }
1592
1593    #[test]
1594    fn test_dict_nlike_kernels() {
1595        let data = vec![
1596            Some("Earth"),
1597            Some("Fire"),
1598            Some("Water"),
1599            Some("Air"),
1600            None,
1601            Some("Air"),
1602            Some("bbbbb\nAir"),
1603        ];
1604
1605        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1606
1607        assert_eq!(
1608            nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1609            BooleanArray::from(vec![
1610                Some(true),
1611                Some(true),
1612                Some(true),
1613                Some(false),
1614                None,
1615                Some(false),
1616                Some(true),
1617            ]),
1618        );
1619
1620        assert_eq!(
1621            nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1622            BooleanArray::from(vec![
1623                Some(true),
1624                Some(true),
1625                Some(true),
1626                Some(false),
1627                None,
1628                Some(false),
1629                Some(true),
1630            ]),
1631        );
1632
1633        assert_eq!(
1634            nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1635            BooleanArray::from(vec![
1636                Some(true),
1637                Some(true),
1638                Some(false),
1639                Some(true),
1640                None,
1641                Some(true),
1642                Some(true),
1643            ]),
1644        );
1645
1646        assert_eq!(
1647            nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1648            BooleanArray::from(vec![
1649                Some(true),
1650                Some(true),
1651                Some(false),
1652                Some(true),
1653                None,
1654                Some(true),
1655                Some(true),
1656            ]),
1657        );
1658
1659        assert_eq!(
1660            nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1661            BooleanArray::from(vec![
1662                Some(true),
1663                Some(true),
1664                Some(false),
1665                Some(false),
1666                None,
1667                Some(false),
1668                Some(false),
1669            ]),
1670        );
1671
1672        assert_eq!(
1673            nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1674            BooleanArray::from(vec![
1675                Some(true),
1676                Some(true),
1677                Some(false),
1678                Some(false),
1679                None,
1680                Some(false),
1681                Some(false),
1682            ]),
1683        );
1684
1685        assert_eq!(
1686            nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1687            BooleanArray::from(vec![
1688                Some(true),
1689                Some(false),
1690                Some(true),
1691                Some(false),
1692                None,
1693                Some(false),
1694                Some(false),
1695            ]),
1696        );
1697
1698        assert_eq!(
1699            nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1700            BooleanArray::from(vec![
1701                Some(true),
1702                Some(false),
1703                Some(true),
1704                Some(false),
1705                None,
1706                Some(false),
1707                Some(false),
1708            ]),
1709        );
1710
1711        assert_eq!(
1712            nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1713            BooleanArray::from(vec![
1714                Some(false),
1715                Some(true),
1716                Some(false),
1717                Some(true),
1718                None,
1719                Some(true),
1720                Some(true),
1721            ]),
1722        );
1723
1724        assert_eq!(
1725            nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1726            BooleanArray::from(vec![
1727                Some(false),
1728                Some(true),
1729                Some(false),
1730                Some(true),
1731                None,
1732                Some(true),
1733                Some(true),
1734            ]),
1735        );
1736    }
1737
1738    #[test]
1739    fn test_dict_ilike_kernels() {
1740        let data = vec![
1741            Some("Earth"),
1742            Some("Fire"),
1743            Some("Water"),
1744            Some("Air"),
1745            None,
1746            Some("Air"),
1747            Some("bbbbb\nAir"),
1748        ];
1749
1750        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1751
1752        assert_eq!(
1753            ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1754            BooleanArray::from(vec![
1755                Some(false),
1756                Some(false),
1757                Some(false),
1758                Some(true),
1759                None,
1760                Some(true),
1761                Some(false),
1762            ]),
1763        );
1764
1765        assert_eq!(
1766            ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1767            BooleanArray::from(vec![
1768                Some(false),
1769                Some(false),
1770                Some(false),
1771                Some(true),
1772                None,
1773                Some(true),
1774                Some(false),
1775            ]),
1776        );
1777
1778        assert_eq!(
1779            ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1780            BooleanArray::from(vec![
1781                Some(false),
1782                Some(false),
1783                Some(true),
1784                Some(false),
1785                None,
1786                Some(false),
1787                Some(false),
1788            ]),
1789        );
1790
1791        assert_eq!(
1792            ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1793            BooleanArray::from(vec![
1794                Some(false),
1795                Some(false),
1796                Some(true),
1797                Some(false),
1798                None,
1799                Some(false),
1800                Some(false),
1801            ]),
1802        );
1803
1804        assert_eq!(
1805            ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1806            BooleanArray::from(vec![
1807                Some(false),
1808                Some(false),
1809                Some(true),
1810                Some(true),
1811                None,
1812                Some(true),
1813                Some(true),
1814            ]),
1815        );
1816
1817        assert_eq!(
1818            ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1819            BooleanArray::from(vec![
1820                Some(false),
1821                Some(false),
1822                Some(true),
1823                Some(true),
1824                None,
1825                Some(true),
1826                Some(true),
1827            ]),
1828        );
1829
1830        assert_eq!(
1831            ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1832            BooleanArray::from(vec![
1833                Some(false),
1834                Some(true),
1835                Some(false),
1836                Some(true),
1837                None,
1838                Some(true),
1839                Some(true),
1840            ]),
1841        );
1842
1843        assert_eq!(
1844            ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1845            BooleanArray::from(vec![
1846                Some(false),
1847                Some(true),
1848                Some(false),
1849                Some(true),
1850                None,
1851                Some(true),
1852                Some(true),
1853            ]),
1854        );
1855
1856        assert_eq!(
1857            ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1858            BooleanArray::from(vec![
1859                Some(true),
1860                Some(false),
1861                Some(true),
1862                Some(true),
1863                None,
1864                Some(true),
1865                Some(true),
1866            ]),
1867        );
1868
1869        assert_eq!(
1870            ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1871            BooleanArray::from(vec![
1872                Some(true),
1873                Some(false),
1874                Some(true),
1875                Some(true),
1876                None,
1877                Some(true),
1878                Some(true),
1879            ]),
1880        );
1881    }
1882
1883    #[test]
1884    fn test_dict_nilike_kernels() {
1885        let data = vec![
1886            Some("Earth"),
1887            Some("Fire"),
1888            Some("Water"),
1889            Some("Air"),
1890            None,
1891            Some("Air"),
1892            Some("bbbbb\nAir"),
1893        ];
1894
1895        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1896
1897        assert_eq!(
1898            nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1899            BooleanArray::from(vec![
1900                Some(true),
1901                Some(true),
1902                Some(true),
1903                Some(false),
1904                None,
1905                Some(false),
1906                Some(true),
1907            ]),
1908        );
1909
1910        assert_eq!(
1911            nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1912            BooleanArray::from(vec![
1913                Some(true),
1914                Some(true),
1915                Some(true),
1916                Some(false),
1917                None,
1918                Some(false),
1919                Some(true),
1920            ]),
1921        );
1922
1923        assert_eq!(
1924            nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1925            BooleanArray::from(vec![
1926                Some(true),
1927                Some(true),
1928                Some(false),
1929                Some(true),
1930                None,
1931                Some(true),
1932                Some(true),
1933            ]),
1934        );
1935
1936        assert_eq!(
1937            nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1938            BooleanArray::from(vec![
1939                Some(true),
1940                Some(true),
1941                Some(false),
1942                Some(true),
1943                None,
1944                Some(true),
1945                Some(true),
1946            ]),
1947        );
1948
1949        assert_eq!(
1950            nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1951            BooleanArray::from(vec![
1952                Some(true),
1953                Some(true),
1954                Some(false),
1955                Some(false),
1956                None,
1957                Some(false),
1958                Some(false),
1959            ]),
1960        );
1961
1962        assert_eq!(
1963            nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1964            BooleanArray::from(vec![
1965                Some(true),
1966                Some(true),
1967                Some(false),
1968                Some(false),
1969                None,
1970                Some(false),
1971                Some(false),
1972            ]),
1973        );
1974
1975        assert_eq!(
1976            nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1977            BooleanArray::from(vec![
1978                Some(true),
1979                Some(false),
1980                Some(true),
1981                Some(false),
1982                None,
1983                Some(false),
1984                Some(false),
1985            ]),
1986        );
1987
1988        assert_eq!(
1989            nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1990            BooleanArray::from(vec![
1991                Some(true),
1992                Some(false),
1993                Some(true),
1994                Some(false),
1995                None,
1996                Some(false),
1997                Some(false),
1998            ]),
1999        );
2000
2001        assert_eq!(
2002            nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
2003            BooleanArray::from(vec![
2004                Some(false),
2005                Some(true),
2006                Some(false),
2007                Some(false),
2008                None,
2009                Some(false),
2010                Some(false),
2011            ]),
2012        );
2013
2014        assert_eq!(
2015            nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
2016            BooleanArray::from(vec![
2017                Some(false),
2018                Some(true),
2019                Some(false),
2020                Some(false),
2021                None,
2022                Some(false),
2023                Some(false),
2024            ]),
2025        );
2026    }
2027
2028    #[test]
2029    fn string_null_like_pattern() {
2030        // Different patterns have different execution code paths
2031        for pattern in &[
2032            "",           // can execute as equality check
2033            "_",          // can execute as length check
2034            "%",          // can execute as starts_with("") or non-null check
2035            "a%",         // can execute as starts_with("a")
2036            "%a",         // can execute as ends_with("")
2037            "a%b",        // can execute as starts_with("a") && ends_with("b")
2038            "%a%",        // can_execute as contains("a")
2039            "%a%b_c_d%e", // can_execute as regular expression
2040        ] {
2041            // These tests focus on the null handling, but are case-insensitive
2042            for like_f in [like, ilike, nlike, nilike] {
2043                let a = Scalar::new(StringArray::new_null(1));
2044                let b = StringArray::new_scalar(pattern);
2045                let r = like_f(&a, &b).unwrap();
2046                assert_eq!(r.len(), 1, "With pattern {pattern}");
2047                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2048                assert!(r.is_null(0), "With pattern {pattern}");
2049
2050                let a = Scalar::new(StringArray::new_null(1));
2051                let b = StringArray::from_iter_values([pattern]);
2052                let r = like_f(&a, &b).unwrap();
2053                assert_eq!(r.len(), 1, "With pattern {pattern}");
2054                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2055                assert!(r.is_null(0), "With pattern {pattern}");
2056
2057                let a = StringArray::new_null(1);
2058                let b = StringArray::from_iter_values([pattern]);
2059                let r = like_f(&a, &b).unwrap();
2060                assert_eq!(r.len(), 1, "With pattern {pattern}");
2061                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2062                assert!(r.is_null(0), "With pattern {pattern}");
2063
2064                let a = StringArray::new_null(1);
2065                let b = StringArray::new_scalar(pattern);
2066                let r = like_f(&a, &b).unwrap();
2067                assert_eq!(r.len(), 1, "With pattern {pattern}");
2068                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2069                assert!(r.is_null(0), "With pattern {pattern}");
2070            }
2071        }
2072    }
2073
2074    #[test]
2075    fn string_view_null_like_pattern() {
2076        // Different patterns have different execution code paths
2077        for pattern in &[
2078            "",           // can execute as equality check
2079            "_",          // can execute as length check
2080            "%",          // can execute as starts_with("") or non-null check
2081            "a%",         // can execute as starts_with("a")
2082            "%a",         // can execute as ends_with("")
2083            "a%b",        // can execute as starts_with("a") && ends_with("b")
2084            "%a%",        // can_execute as contains("a")
2085            "%a%b_c_d%e", // can_execute as regular expression
2086        ] {
2087            // These tests focus on the null handling, but are case-insensitive
2088            for like_f in [like, ilike, nlike, nilike] {
2089                let a = Scalar::new(StringViewArray::new_null(1));
2090                let b = StringViewArray::new_scalar(pattern);
2091                let r = like_f(&a, &b).unwrap();
2092                assert_eq!(r.len(), 1, "With pattern {pattern}");
2093                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2094                assert!(r.is_null(0), "With pattern {pattern}");
2095
2096                let a = Scalar::new(StringViewArray::new_null(1));
2097                let b = StringViewArray::from_iter_values([pattern]);
2098                let r = like_f(&a, &b).unwrap();
2099                assert_eq!(r.len(), 1, "With pattern {pattern}");
2100                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2101                assert!(r.is_null(0), "With pattern {pattern}");
2102
2103                let a = StringViewArray::new_null(1);
2104                let b = StringViewArray::from_iter_values([pattern]);
2105                let r = like_f(&a, &b).unwrap();
2106                assert_eq!(r.len(), 1, "With pattern {pattern}");
2107                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2108                assert!(r.is_null(0), "With pattern {pattern}");
2109
2110                let a = StringViewArray::new_null(1);
2111                let b = StringViewArray::new_scalar(pattern);
2112                let r = like_f(&a, &b).unwrap();
2113                assert_eq!(r.len(), 1, "With pattern {pattern}");
2114                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2115                assert!(r.is_null(0), "With pattern {pattern}");
2116            }
2117        }
2118    }
2119
2120    #[test]
2121    fn string_like_scalar_null() {
2122        for like_f in [like, ilike, nlike, nilike] {
2123            let a = StringArray::new_scalar("a");
2124            let b = Scalar::new(StringArray::new_null(1));
2125            let r = like_f(&a, &b).unwrap();
2126            assert_eq!(r.len(), 1);
2127            assert_eq!(r.null_count(), 1);
2128            assert!(r.is_null(0));
2129
2130            let a = StringArray::from_iter_values(["a"]);
2131            let b = Scalar::new(StringArray::new_null(1));
2132            let r = like_f(&a, &b).unwrap();
2133            assert_eq!(r.len(), 1);
2134            assert_eq!(r.null_count(), 1);
2135            assert!(r.is_null(0));
2136
2137            let a = StringArray::from_iter_values(["a"]);
2138            let b = StringArray::new_null(1);
2139            let r = like_f(&a, &b).unwrap();
2140            assert_eq!(r.len(), 1);
2141            assert_eq!(r.null_count(), 1);
2142            assert!(r.is_null(0));
2143
2144            let a = StringArray::new_scalar("a");
2145            let b = StringArray::new_null(1);
2146            let r = like_f(&a, &b).unwrap();
2147            assert_eq!(r.len(), 1);
2148            assert_eq!(r.null_count(), 1);
2149            assert!(r.is_null(0));
2150        }
2151    }
2152
2153    #[test]
2154    fn string_view_like_scalar_null() {
2155        for like_f in [like, ilike, nlike, nilike] {
2156            let a = StringViewArray::new_scalar("a");
2157            let b = Scalar::new(StringViewArray::new_null(1));
2158            let r = like_f(&a, &b).unwrap();
2159            assert_eq!(r.len(), 1);
2160            assert_eq!(r.null_count(), 1);
2161            assert!(r.is_null(0));
2162
2163            let a = StringViewArray::from_iter_values(["a"]);
2164            let b = Scalar::new(StringViewArray::new_null(1));
2165            let r = like_f(&a, &b).unwrap();
2166            assert_eq!(r.len(), 1);
2167            assert_eq!(r.null_count(), 1);
2168            assert!(r.is_null(0));
2169
2170            let a = StringViewArray::from_iter_values(["a"]);
2171            let b = StringViewArray::new_null(1);
2172            let r = like_f(&a, &b).unwrap();
2173            assert_eq!(r.len(), 1);
2174            assert_eq!(r.null_count(), 1);
2175            assert!(r.is_null(0));
2176
2177            let a = StringViewArray::new_scalar("a");
2178            let b = StringViewArray::new_null(1);
2179            let r = like_f(&a, &b).unwrap();
2180            assert_eq!(r.len(), 1);
2181            assert_eq!(r.null_count(), 1);
2182            assert!(r.is_null(0));
2183        }
2184    }
2185
2186    #[test]
2187    fn like_escape() {
2188        // (value, pattern, expected)
2189        let test_cases = vec![
2190            // Empty pattern
2191            (r"", r"", true),
2192            (r"\", r"", false),
2193            // Sole (dangling) escape (some engines consider this invalid pattern)
2194            (r"", r"\", false),
2195            (r"\", r"\", true),
2196            (r"\\", r"\", false),
2197            (r"a", r"\", false),
2198            (r"\a", r"\", false),
2199            (r"\\a", r"\", false),
2200            // Sole escape
2201            (r"", r"\\", false),
2202            (r"\", r"\\", true),
2203            (r"\\", r"\\", false),
2204            (r"a", r"\\", false),
2205            (r"\a", r"\\", false),
2206            (r"\\a", r"\\", false),
2207            // Sole escape and dangling escape
2208            (r"", r"\\\", false),
2209            (r"\", r"\\\", false),
2210            (r"\\", r"\\\", true),
2211            (r"\\\", r"\\\", false),
2212            (r"\\\\", r"\\\", false),
2213            (r"a", r"\\\", false),
2214            (r"\a", r"\\\", false),
2215            (r"\\a", r"\\\", false),
2216            // Sole two escapes
2217            (r"", r"\\\\", false),
2218            (r"\", r"\\\\", false),
2219            (r"\\", r"\\\\", true),
2220            (r"\\\", r"\\\\", false),
2221            (r"\\\\", r"\\\\", false),
2222            (r"\\\\\", r"\\\\", false),
2223            (r"a", r"\\\\", false),
2224            (r"\a", r"\\\\", false),
2225            (r"\\a", r"\\\\", false),
2226            // Escaped non-wildcard
2227            (r"", r"\a", false),
2228            (r"\", r"\a", false),
2229            (r"\\", r"\a", false),
2230            (r"a", r"\a", true),
2231            (r"\a", r"\a", false),
2232            (r"\\a", r"\a", false),
2233            // Escaped _ wildcard
2234            (r"", r"\_", false),
2235            (r"\", r"\_", false),
2236            (r"\\", r"\_", false),
2237            (r"a", r"\_", false),
2238            (r"_", r"\_", true),
2239            (r"%", r"\_", false),
2240            (r"\a", r"\_", false),
2241            (r"\\a", r"\_", false),
2242            (r"\_", r"\_", false),
2243            (r"\\_", r"\_", false),
2244            // Escaped % wildcard
2245            (r"", r"\%", false),
2246            (r"\", r"\%", false),
2247            (r"\\", r"\%", false),
2248            (r"a", r"\%", false),
2249            (r"_", r"\%", false),
2250            (r"%", r"\%", true),
2251            (r"\a", r"\%", false),
2252            (r"\\a", r"\%", false),
2253            (r"\%", r"\%", false),
2254            (r"\\%", r"\%", false),
2255            // Escape and non-wildcard
2256            (r"", r"\\a", false),
2257            (r"\", r"\\a", false),
2258            (r"\\", r"\\a", false),
2259            (r"a", r"\\a", false),
2260            (r"\a", r"\\a", true),
2261            (r"\\a", r"\\a", false),
2262            (r"\\\a", r"\\a", false),
2263            // Escape and _ wildcard
2264            (r"", r"\\_", false),
2265            (r"\", r"\\_", false),
2266            (r"\\", r"\\_", true),
2267            (r"a", r"\\_", false),
2268            (r"_", r"\\_", false),
2269            (r"%", r"\\_", false),
2270            (r"\a", r"\\_", true),
2271            (r"\\a", r"\\_", false),
2272            (r"\_", r"\\_", true),
2273            (r"\\_", r"\\_", false),
2274            (r"\\\_", r"\\_", false),
2275            // Escape and % wildcard
2276            (r"", r"\\%", false),
2277            (r"\", r"\\%", true),
2278            (r"\\", r"\\%", true),
2279            (r"a", r"\\%", false),
2280            (r"ab", r"\\%", false),
2281            (r"a%", r"\\%", false),
2282            (r"_", r"\\%", false),
2283            (r"%", r"\\%", false),
2284            (r"\a", r"\\%", true),
2285            (r"\\a", r"\\%", true),
2286            (r"\%", r"\\%", true),
2287            (r"\\%", r"\\%", true),
2288            (r"\\\%", r"\\%", true),
2289            // %... pattern with dangling wildcard
2290            (r"\", r"%\", true),
2291            (r"\\", r"%\", true),
2292            (r"%\", r"%\", true),
2293            (r"%\\", r"%\", true),
2294            (r"abc\", r"%\", true),
2295            (r"abc", r"%\", false),
2296            // %... pattern with wildcard
2297            (r"\", r"%\\", true),
2298            (r"\\", r"%\\", true),
2299            (r"%\\", r"%\\", true),
2300            (r"%\\\", r"%\\", true),
2301            (r"abc\", r"%\\", true),
2302            (r"abc", r"%\\", false),
2303            // %... pattern including escaped non-wildcard
2304            (r"ac", r"%a\c", true),
2305            (r"xyzac", r"%a\c", true),
2306            (r"abc", r"%a\c", false),
2307            (r"a\c", r"%a\c", false),
2308            (r"%a\c", r"%a\c", false),
2309            // %... pattern including escape
2310            (r"\", r"%a\\c", false),
2311            (r"\\", r"%a\\c", false),
2312            (r"ac", r"%a\\c", false),
2313            (r"a\c", r"%a\\c", true),
2314            (r"a\\c", r"%a\\c", false),
2315            (r"abc", r"%a\\c", false),
2316            (r"xyza\c", r"%a\\c", true),
2317            (r"xyza\\c", r"%a\\c", false),
2318            (r"%a\\c", r"%a\\c", false),
2319            // ...% pattern with wildcard
2320            (r"\", r"\\%", true),
2321            (r"\\", r"\\%", true),
2322            (r"\\%", r"\\%", true),
2323            (r"\\\%", r"\\%", true),
2324            (r"\abc", r"\\%", true),
2325            (r"a", r"\\%", false),
2326            (r"abc", r"\\%", false),
2327            // ...% pattern including escaped non-wildcard
2328            (r"ac", r"a\c%", true),
2329            (r"acxyz", r"a\c%", true),
2330            (r"abc", r"a\c%", false),
2331            (r"a\c", r"a\c%", false),
2332            (r"a\c%", r"a\c%", false),
2333            (r"a\\c%", r"a\c%", false),
2334            // ...% pattern including escape
2335            (r"ac", r"a\\c%", false),
2336            (r"a\c", r"a\\c%", true),
2337            (r"a\cxyz", r"a\\c%", true),
2338            (r"a\\c", r"a\\c%", false),
2339            (r"a\\cxyz", r"a\\c%", false),
2340            (r"abc", r"a\\c%", false),
2341            (r"abcxyz", r"a\\c%", false),
2342            (r"a\\c%", r"a\\c%", false),
2343            // %...% pattern including escaped non-wildcard
2344            (r"ac", r"%a\c%", true),
2345            (r"xyzacxyz", r"%a\c%", true),
2346            (r"abc", r"%a\c%", false),
2347            (r"a\c", r"%a\c%", false),
2348            (r"xyza\cxyz", r"%a\c%", false),
2349            (r"%a\c%", r"%a\c%", false),
2350            (r"%a\\c%", r"%a\c%", false),
2351            // %...% pattern including escape
2352            (r"ac", r"%a\\c%", false),
2353            (r"a\c", r"%a\\c%", true),
2354            (r"xyza\cxyz", r"%a\\c%", true),
2355            (r"a\\c", r"%a\\c%", false),
2356            (r"xyza\\cxyz", r"%a\\c%", false),
2357            (r"abc", r"%a\\c%", false),
2358            (r"xyzabcxyz", r"%a\\c%", false),
2359            (r"%a\\c%", r"%a\\c%", false),
2360            // Odd (7) backslashes and % wildcard
2361            (r"\\%", r"\\\\\\\%", false),
2362            (r"\\\", r"\\\\\\\%", false),
2363            (r"\\\%", r"\\\\\\\%", true),
2364            (r"\\\\", r"\\\\\\\%", false),
2365            (r"\\\\%", r"\\\\\\\%", false),
2366            (r"\\\\\\\%", r"\\\\\\\%", false),
2367            // Odd (7) backslashes and _ wildcard
2368            (r"\\\", r"\\\\\\\_", false),
2369            (r"\\\\", r"\\\\\\\_", false),
2370            (r"\\\_", r"\\\\\\\_", true),
2371            (r"\\\\", r"\\\\\\\_", false),
2372            (r"\\\a", r"\\\\\\\_", false),
2373            (r"\\\\_", r"\\\\\\\_", false),
2374            (r"\\\\\\\_", r"\\\\\\\_", false),
2375            // Even (8) backslashes and % wildcard
2376            (r"\\\", r"\\\\\\\\%", false),
2377            (r"\\\\", r"\\\\\\\\%", true),
2378            (r"\\\\\", r"\\\\\\\\%", true),
2379            (r"\\\\xyz", r"\\\\\\\\%", true),
2380            (r"\\\\\\\\%", r"\\\\\\\\%", true),
2381            // Even (8) backslashes and _ wildcard
2382            (r"\\\", r"\\\\\\\\_", false),
2383            (r"\\\\", r"\\\\\\\\_", false),
2384            (r"\\\\\", r"\\\\\\\\_", true),
2385            (r"\\\\a", r"\\\\\\\\_", true),
2386            (r"\\\\\a", r"\\\\\\\\_", false),
2387            (r"\\\\ab", r"\\\\\\\\_", false),
2388            (r"\\\\\\\\_", r"\\\\\\\\_", false),
2389        ];
2390
2391        for (value, pattern, expected) in test_cases {
2392            let unexpected = BooleanArray::from(vec![!expected]);
2393            let expected = BooleanArray::from(vec![expected]);
2394
2395            for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
2396                for ((value_datum, value_type), (pattern_datum, pattern_type)) in zip(
2397                    make_datums(value, &string_type),
2398                    make_datums(pattern, &string_type),
2399                ) {
2400                    let value_datum = value_datum.as_ref();
2401                    let pattern_datum = pattern_datum.as_ref();
2402                    assert_eq!(
2403                        like(value_datum, pattern_datum).unwrap(),
2404                        expected,
2405                        "{value_type:?} «{value}» like {pattern_type:?} «{pattern}»"
2406                    );
2407                    assert_eq!(
2408                        ilike(value_datum, pattern_datum).unwrap(),
2409                        expected,
2410                        "{value_type:?} «{value}» ilike {pattern_type:?} «{pattern}»"
2411                    );
2412                    assert_eq!(
2413                        nlike(value_datum, pattern_datum).unwrap(),
2414                        unexpected,
2415                        "{value_type:?} «{value}» nlike {pattern_type:?} «{pattern}»"
2416                    );
2417                    assert_eq!(
2418                        nilike(value_datum, pattern_datum).unwrap(),
2419                        unexpected,
2420                        "{value_type:?} «{value}» nilike {pattern_type:?} «{pattern}»"
2421                    );
2422                }
2423            }
2424        }
2425    }
2426
2427    #[test]
2428    fn like_escape_many() {
2429        // (value, pattern, expected)
2430        let test_cases = vec![
2431            (r"", r"", true),
2432            (r"\", r"", false),
2433            (r"\\", r"", false),
2434            (r"\\\", r"", false),
2435            (r"\\\\", r"", false),
2436            (r"a", r"", false),
2437            (r"\a", r"", false),
2438            (r"\\a", r"", false),
2439            (r"%", r"", false),
2440            (r"\%", r"", false),
2441            (r"\\%", r"", false),
2442            (r"%%", r"", false),
2443            (r"\%%", r"", false),
2444            (r"\\%%", r"", false),
2445            (r"_", r"", false),
2446            (r"\_", r"", false),
2447            (r"\\_", r"", false),
2448            (r"__", r"", false),
2449            (r"\__", r"", false),
2450            (r"\\__", r"", false),
2451            (r"abc", r"", false),
2452            (r"a_c", r"", false),
2453            (r"a\bc", r"", false),
2454            (r"a\_c", r"", false),
2455            (r"%abc", r"", false),
2456            (r"\%abc", r"", false),
2457            (r"a\\_c%", r"", false),
2458            (r"", r"\", false),
2459            (r"\", r"\", true),
2460            (r"\\", r"\", false),
2461            (r"\\\", r"\", false),
2462            (r"\\\\", r"\", false),
2463            (r"a", r"\", false),
2464            (r"\a", r"\", false),
2465            (r"\\a", r"\", false),
2466            (r"%", r"\", false),
2467            (r"\%", r"\", false),
2468            (r"\\%", r"\", false),
2469            (r"%%", r"\", false),
2470            (r"\%%", r"\", false),
2471            (r"\\%%", r"\", false),
2472            (r"_", r"\", false),
2473            (r"\_", r"\", false),
2474            (r"\\_", r"\", false),
2475            (r"__", r"\", false),
2476            (r"\__", r"\", false),
2477            (r"\\__", r"\", false),
2478            (r"abc", r"\", false),
2479            (r"a_c", r"\", false),
2480            (r"a\bc", r"\", false),
2481            (r"a\_c", r"\", false),
2482            (r"%abc", r"\", false),
2483            (r"\%abc", r"\", false),
2484            (r"a\\_c%", r"\", false),
2485            (r"", r"\\", false),
2486            (r"\", r"\\", true),
2487            (r"\\", r"\\", false),
2488            (r"\\\", r"\\", false),
2489            (r"\\\\", r"\\", false),
2490            (r"a", r"\\", false),
2491            (r"\a", r"\\", false),
2492            (r"\\a", r"\\", false),
2493            (r"%", r"\\", false),
2494            (r"\%", r"\\", false),
2495            (r"\\%", r"\\", false),
2496            (r"%%", r"\\", false),
2497            (r"\%%", r"\\", false),
2498            (r"\\%%", r"\\", false),
2499            (r"_", r"\\", false),
2500            (r"\_", r"\\", false),
2501            (r"\\_", r"\\", false),
2502            (r"__", r"\\", false),
2503            (r"\__", r"\\", false),
2504            (r"\\__", r"\\", false),
2505            (r"abc", r"\\", false),
2506            (r"a_c", r"\\", false),
2507            (r"a\bc", r"\\", false),
2508            (r"a\_c", r"\\", false),
2509            (r"%abc", r"\\", false),
2510            (r"\%abc", r"\\", false),
2511            (r"a\\_c%", r"\\", false),
2512            (r"", r"\\\", false),
2513            (r"\", r"\\\", false),
2514            (r"\\", r"\\\", true),
2515            (r"\\\", r"\\\", false),
2516            (r"\\\\", r"\\\", false),
2517            (r"a", r"\\\", false),
2518            (r"\a", r"\\\", false),
2519            (r"\\a", r"\\\", false),
2520            (r"%", r"\\\", false),
2521            (r"\%", r"\\\", false),
2522            (r"\\%", r"\\\", false),
2523            (r"%%", r"\\\", false),
2524            (r"\%%", r"\\\", false),
2525            (r"\\%%", r"\\\", false),
2526            (r"_", r"\\\", false),
2527            (r"\_", r"\\\", false),
2528            (r"\\_", r"\\\", false),
2529            (r"__", r"\\\", false),
2530            (r"\__", r"\\\", false),
2531            (r"\\__", r"\\\", false),
2532            (r"abc", r"\\\", false),
2533            (r"a_c", r"\\\", false),
2534            (r"a\bc", r"\\\", false),
2535            (r"a\_c", r"\\\", false),
2536            (r"%abc", r"\\\", false),
2537            (r"\%abc", r"\\\", false),
2538            (r"a\\_c%", r"\\\", false),
2539            (r"", r"\\\\", false),
2540            (r"\", r"\\\\", false),
2541            (r"\\", r"\\\\", true),
2542            (r"\\\", r"\\\\", false),
2543            (r"\\\\", r"\\\\", false),
2544            (r"a", r"\\\\", false),
2545            (r"\a", r"\\\\", false),
2546            (r"\\a", r"\\\\", false),
2547            (r"%", r"\\\\", false),
2548            (r"\%", r"\\\\", false),
2549            (r"\\%", r"\\\\", false),
2550            (r"%%", r"\\\\", false),
2551            (r"\%%", r"\\\\", false),
2552            (r"\\%%", r"\\\\", false),
2553            (r"_", r"\\\\", false),
2554            (r"\_", r"\\\\", false),
2555            (r"\\_", r"\\\\", false),
2556            (r"__", r"\\\\", false),
2557            (r"\__", r"\\\\", false),
2558            (r"\\__", r"\\\\", false),
2559            (r"abc", r"\\\\", false),
2560            (r"a_c", r"\\\\", false),
2561            (r"a\bc", r"\\\\", false),
2562            (r"a\_c", r"\\\\", false),
2563            (r"%abc", r"\\\\", false),
2564            (r"\%abc", r"\\\\", false),
2565            (r"a\\_c%", r"\\\\", false),
2566            (r"", r"a", false),
2567            (r"\", r"a", false),
2568            (r"\\", r"a", false),
2569            (r"\\\", r"a", false),
2570            (r"\\\\", r"a", false),
2571            (r"a", r"a", true),
2572            (r"\a", r"a", false),
2573            (r"\\a", r"a", false),
2574            (r"%", r"a", false),
2575            (r"\%", r"a", false),
2576            (r"\\%", r"a", false),
2577            (r"%%", r"a", false),
2578            (r"\%%", r"a", false),
2579            (r"\\%%", r"a", false),
2580            (r"_", r"a", false),
2581            (r"\_", r"a", false),
2582            (r"\\_", r"a", false),
2583            (r"__", r"a", false),
2584            (r"\__", r"a", false),
2585            (r"\\__", r"a", false),
2586            (r"abc", r"a", false),
2587            (r"a_c", r"a", false),
2588            (r"a\bc", r"a", false),
2589            (r"a\_c", r"a", false),
2590            (r"%abc", r"a", false),
2591            (r"\%abc", r"a", false),
2592            (r"a\\_c%", r"a", false),
2593            (r"", r"\a", false),
2594            (r"\", r"\a", false),
2595            (r"\\", r"\a", false),
2596            (r"\\\", r"\a", false),
2597            (r"\\\\", r"\a", false),
2598            (r"a", r"\a", true),
2599            (r"\a", r"\a", false),
2600            (r"\\a", r"\a", false),
2601            (r"%", r"\a", false),
2602            (r"\%", r"\a", false),
2603            (r"\\%", r"\a", false),
2604            (r"%%", r"\a", false),
2605            (r"\%%", r"\a", false),
2606            (r"\\%%", r"\a", false),
2607            (r"_", r"\a", false),
2608            (r"\_", r"\a", false),
2609            (r"\\_", r"\a", false),
2610            (r"__", r"\a", false),
2611            (r"\__", r"\a", false),
2612            (r"\\__", r"\a", false),
2613            (r"abc", r"\a", false),
2614            (r"a_c", r"\a", false),
2615            (r"a\bc", r"\a", false),
2616            (r"a\_c", r"\a", false),
2617            (r"%abc", r"\a", false),
2618            (r"\%abc", r"\a", false),
2619            (r"a\\_c%", r"\a", false),
2620            (r"", r"\\a", false),
2621            (r"\", r"\\a", false),
2622            (r"\\", r"\\a", false),
2623            (r"\\\", r"\\a", false),
2624            (r"\\\\", r"\\a", false),
2625            (r"a", r"\\a", false),
2626            (r"\a", r"\\a", true),
2627            (r"\\a", r"\\a", false),
2628            (r"%", r"\\a", false),
2629            (r"\%", r"\\a", false),
2630            (r"\\%", r"\\a", false),
2631            (r"%%", r"\\a", false),
2632            (r"\%%", r"\\a", false),
2633            (r"\\%%", r"\\a", false),
2634            (r"_", r"\\a", false),
2635            (r"\_", r"\\a", false),
2636            (r"\\_", r"\\a", false),
2637            (r"__", r"\\a", false),
2638            (r"\__", r"\\a", false),
2639            (r"\\__", r"\\a", false),
2640            (r"abc", r"\\a", false),
2641            (r"a_c", r"\\a", false),
2642            (r"a\bc", r"\\a", false),
2643            (r"a\_c", r"\\a", false),
2644            (r"%abc", r"\\a", false),
2645            (r"\%abc", r"\\a", false),
2646            (r"a\\_c%", r"\\a", false),
2647            (r"", r"%", true),
2648            (r"\", r"%", true),
2649            (r"\\", r"%", true),
2650            (r"\\\", r"%", true),
2651            (r"\\\\", r"%", true),
2652            (r"a", r"%", true),
2653            (r"\a", r"%", true),
2654            (r"\\a", r"%", true),
2655            (r"%", r"%", true),
2656            (r"\%", r"%", true),
2657            (r"\\%", r"%", true),
2658            (r"%%", r"%", true),
2659            (r"\%%", r"%", true),
2660            (r"\\%%", r"%", true),
2661            (r"_", r"%", true),
2662            (r"\_", r"%", true),
2663            (r"\\_", r"%", true),
2664            (r"__", r"%", true),
2665            (r"\__", r"%", true),
2666            (r"\\__", r"%", true),
2667            (r"abc", r"%", true),
2668            (r"a_c", r"%", true),
2669            (r"a\bc", r"%", true),
2670            (r"a\_c", r"%", true),
2671            (r"%abc", r"%", true),
2672            (r"\%abc", r"%", true),
2673            (r"a\\_c%", r"%", true),
2674            (r"", r"\%", false),
2675            (r"\", r"\%", false),
2676            (r"\\", r"\%", false),
2677            (r"\\\", r"\%", false),
2678            (r"\\\\", r"\%", false),
2679            (r"a", r"\%", false),
2680            (r"\a", r"\%", false),
2681            (r"\\a", r"\%", false),
2682            (r"%", r"\%", true),
2683            (r"\%", r"\%", false),
2684            (r"\\%", r"\%", false),
2685            (r"%%", r"\%", false),
2686            (r"\%%", r"\%", false),
2687            (r"\\%%", r"\%", false),
2688            (r"_", r"\%", false),
2689            (r"\_", r"\%", false),
2690            (r"\\_", r"\%", false),
2691            (r"__", r"\%", false),
2692            (r"\__", r"\%", false),
2693            (r"\\__", r"\%", false),
2694            (r"abc", r"\%", false),
2695            (r"a_c", r"\%", false),
2696            (r"a\bc", r"\%", false),
2697            (r"a\_c", r"\%", false),
2698            (r"%abc", r"\%", false),
2699            (r"\%abc", r"\%", false),
2700            (r"a\\_c%", r"\%", false),
2701            (r"", r"\\%", false),
2702            (r"\", r"\\%", true),
2703            (r"\\", r"\\%", true),
2704            (r"\\\", r"\\%", true),
2705            (r"\\\\", r"\\%", true),
2706            (r"a", r"\\%", false),
2707            (r"\a", r"\\%", true),
2708            (r"\\a", r"\\%", true),
2709            (r"%", r"\\%", false),
2710            (r"\%", r"\\%", true),
2711            (r"\\%", r"\\%", true),
2712            (r"%%", r"\\%", false),
2713            (r"\%%", r"\\%", true),
2714            (r"\\%%", r"\\%", true),
2715            (r"_", r"\\%", false),
2716            (r"\_", r"\\%", true),
2717            (r"\\_", r"\\%", true),
2718            (r"__", r"\\%", false),
2719            (r"\__", r"\\%", true),
2720            (r"\\__", r"\\%", true),
2721            (r"abc", r"\\%", false),
2722            (r"a_c", r"\\%", false),
2723            (r"a\bc", r"\\%", false),
2724            (r"a\_c", r"\\%", false),
2725            (r"%abc", r"\\%", false),
2726            (r"\%abc", r"\\%", true),
2727            (r"a\\_c%", r"\\%", false),
2728            (r"", r"%%", true),
2729            (r"\", r"%%", true),
2730            (r"\\", r"%%", true),
2731            (r"\\\", r"%%", true),
2732            (r"\\\\", r"%%", true),
2733            (r"a", r"%%", true),
2734            (r"\a", r"%%", true),
2735            (r"\\a", r"%%", true),
2736            (r"%", r"%%", true),
2737            (r"\%", r"%%", true),
2738            (r"\\%", r"%%", true),
2739            (r"%%", r"%%", true),
2740            (r"\%%", r"%%", true),
2741            (r"\\%%", r"%%", true),
2742            (r"_", r"%%", true),
2743            (r"\_", r"%%", true),
2744            (r"\\_", r"%%", true),
2745            (r"__", r"%%", true),
2746            (r"\__", r"%%", true),
2747            (r"\\__", r"%%", true),
2748            (r"abc", r"%%", true),
2749            (r"a_c", r"%%", true),
2750            (r"a\bc", r"%%", true),
2751            (r"a\_c", r"%%", true),
2752            (r"%abc", r"%%", true),
2753            (r"\%abc", r"%%", true),
2754            (r"a\\_c%", r"%%", true),
2755            (r"", r"\%%", false),
2756            (r"\", r"\%%", false),
2757            (r"\\", r"\%%", false),
2758            (r"\\\", r"\%%", false),
2759            (r"\\\\", r"\%%", false),
2760            (r"a", r"\%%", false),
2761            (r"\a", r"\%%", false),
2762            (r"\\a", r"\%%", false),
2763            (r"%", r"\%%", true),
2764            (r"\%", r"\%%", false),
2765            (r"\\%", r"\%%", false),
2766            (r"%%", r"\%%", true),
2767            (r"\%%", r"\%%", false),
2768            (r"\\%%", r"\%%", false),
2769            (r"_", r"\%%", false),
2770            (r"\_", r"\%%", false),
2771            (r"\\_", r"\%%", false),
2772            (r"__", r"\%%", false),
2773            (r"\__", r"\%%", false),
2774            (r"\\__", r"\%%", false),
2775            (r"abc", r"\%%", false),
2776            (r"a_c", r"\%%", false),
2777            (r"a\bc", r"\%%", false),
2778            (r"a\_c", r"\%%", false),
2779            (r"%abc", r"\%%", true),
2780            (r"\%abc", r"\%%", false),
2781            (r"a\\_c%", r"\%%", false),
2782            (r"", r"\\%%", false),
2783            (r"\", r"\\%%", true),
2784            (r"\\", r"\\%%", true),
2785            (r"\\\", r"\\%%", true),
2786            (r"\\\\", r"\\%%", true),
2787            (r"a", r"\\%%", false),
2788            (r"\a", r"\\%%", true),
2789            (r"\\a", r"\\%%", true),
2790            (r"%", r"\\%%", false),
2791            (r"\%", r"\\%%", true),
2792            (r"\\%", r"\\%%", true),
2793            (r"%%", r"\\%%", false),
2794            (r"\%%", r"\\%%", true),
2795            (r"\\%%", r"\\%%", true),
2796            (r"_", r"\\%%", false),
2797            (r"\_", r"\\%%", true),
2798            (r"\\_", r"\\%%", true),
2799            (r"__", r"\\%%", false),
2800            (r"\__", r"\\%%", true),
2801            (r"\\__", r"\\%%", true),
2802            (r"abc", r"\\%%", false),
2803            (r"a_c", r"\\%%", false),
2804            (r"a\bc", r"\\%%", false),
2805            (r"a\_c", r"\\%%", false),
2806            (r"%abc", r"\\%%", false),
2807            (r"\%abc", r"\\%%", true),
2808            (r"a\\_c%", r"\\%%", false),
2809            (r"", r"_", false),
2810            (r"\", r"_", true),
2811            (r"\\", r"_", false),
2812            (r"\\\", r"_", false),
2813            (r"\\\\", r"_", false),
2814            (r"a", r"_", true),
2815            (r"\a", r"_", false),
2816            (r"\\a", r"_", false),
2817            (r"%", r"_", true),
2818            (r"\%", r"_", false),
2819            (r"\\%", r"_", false),
2820            (r"%%", r"_", false),
2821            (r"\%%", r"_", false),
2822            (r"\\%%", r"_", false),
2823            (r"_", r"_", true),
2824            (r"\_", r"_", false),
2825            (r"\\_", r"_", false),
2826            (r"__", r"_", false),
2827            (r"\__", r"_", false),
2828            (r"\\__", r"_", false),
2829            (r"abc", r"_", false),
2830            (r"a_c", r"_", false),
2831            (r"a\bc", r"_", false),
2832            (r"a\_c", r"_", false),
2833            (r"%abc", r"_", false),
2834            (r"\%abc", r"_", false),
2835            (r"a\\_c%", r"_", false),
2836            (r"", r"\_", false),
2837            (r"\", r"\_", false),
2838            (r"\\", r"\_", false),
2839            (r"\\\", r"\_", false),
2840            (r"\\\\", r"\_", false),
2841            (r"a", r"\_", false),
2842            (r"\a", r"\_", false),
2843            (r"\\a", r"\_", false),
2844            (r"%", r"\_", false),
2845            (r"\%", r"\_", false),
2846            (r"\\%", r"\_", false),
2847            (r"%%", r"\_", false),
2848            (r"\%%", r"\_", false),
2849            (r"\\%%", r"\_", false),
2850            (r"_", r"\_", true),
2851            (r"\_", r"\_", false),
2852            (r"\\_", r"\_", false),
2853            (r"__", r"\_", false),
2854            (r"\__", r"\_", false),
2855            (r"\\__", r"\_", false),
2856            (r"abc", r"\_", false),
2857            (r"a_c", r"\_", false),
2858            (r"a\bc", r"\_", false),
2859            (r"a\_c", r"\_", false),
2860            (r"%abc", r"\_", false),
2861            (r"\%abc", r"\_", false),
2862            (r"a\\_c%", r"\_", false),
2863            (r"", r"\\_", false),
2864            (r"\", r"\\_", false),
2865            (r"\\", r"\\_", true),
2866            (r"\\\", r"\\_", false),
2867            (r"\\\\", r"\\_", false),
2868            (r"a", r"\\_", false),
2869            (r"\a", r"\\_", true),
2870            (r"\\a", r"\\_", false),
2871            (r"%", r"\\_", false),
2872            (r"\%", r"\\_", true),
2873            (r"\\%", r"\\_", false),
2874            (r"%%", r"\\_", false),
2875            (r"\%%", r"\\_", false),
2876            (r"\\%%", r"\\_", false),
2877            (r"_", r"\\_", false),
2878            (r"\_", r"\\_", true),
2879            (r"\\_", r"\\_", false),
2880            (r"__", r"\\_", false),
2881            (r"\__", r"\\_", false),
2882            (r"\\__", r"\\_", false),
2883            (r"abc", r"\\_", false),
2884            (r"a_c", r"\\_", false),
2885            (r"a\bc", r"\\_", false),
2886            (r"a\_c", r"\\_", false),
2887            (r"%abc", r"\\_", false),
2888            (r"\%abc", r"\\_", false),
2889            (r"a\\_c%", r"\\_", false),
2890            (r"", r"__", false),
2891            (r"\", r"__", false),
2892            (r"\\", r"__", true),
2893            (r"\\\", r"__", false),
2894            (r"\\\\", r"__", false),
2895            (r"a", r"__", false),
2896            (r"\a", r"__", true),
2897            (r"\\a", r"__", false),
2898            (r"%", r"__", false),
2899            (r"\%", r"__", true),
2900            (r"\\%", r"__", false),
2901            (r"%%", r"__", true),
2902            (r"\%%", r"__", false),
2903            (r"\\%%", r"__", false),
2904            (r"_", r"__", false),
2905            (r"\_", r"__", true),
2906            (r"\\_", r"__", false),
2907            (r"__", r"__", true),
2908            (r"\__", r"__", false),
2909            (r"\\__", r"__", false),
2910            (r"abc", r"__", false),
2911            (r"a_c", r"__", false),
2912            (r"a\bc", r"__", false),
2913            (r"a\_c", r"__", false),
2914            (r"%abc", r"__", false),
2915            (r"\%abc", r"__", false),
2916            (r"a\\_c%", r"__", false),
2917            (r"", r"\__", false),
2918            (r"\", r"\__", false),
2919            (r"\\", r"\__", false),
2920            (r"\\\", r"\__", false),
2921            (r"\\\\", r"\__", false),
2922            (r"a", r"\__", false),
2923            (r"\a", r"\__", false),
2924            (r"\\a", r"\__", false),
2925            (r"%", r"\__", false),
2926            (r"\%", r"\__", false),
2927            (r"\\%", r"\__", false),
2928            (r"%%", r"\__", false),
2929            (r"\%%", r"\__", false),
2930            (r"\\%%", r"\__", false),
2931            (r"_", r"\__", false),
2932            (r"\_", r"\__", false),
2933            (r"\\_", r"\__", false),
2934            (r"__", r"\__", true),
2935            (r"\__", r"\__", false),
2936            (r"\\__", r"\__", false),
2937            (r"abc", r"\__", false),
2938            (r"a_c", r"\__", false),
2939            (r"a\bc", r"\__", false),
2940            (r"a\_c", r"\__", false),
2941            (r"%abc", r"\__", false),
2942            (r"\%abc", r"\__", false),
2943            (r"a\\_c%", r"\__", false),
2944            (r"", r"\\__", false),
2945            (r"\", r"\\__", false),
2946            (r"\\", r"\\__", false),
2947            (r"\\\", r"\\__", true),
2948            (r"\\\\", r"\\__", false),
2949            (r"a", r"\\__", false),
2950            (r"\a", r"\\__", false),
2951            (r"\\a", r"\\__", true),
2952            (r"%", r"\\__", false),
2953            (r"\%", r"\\__", false),
2954            (r"\\%", r"\\__", true),
2955            (r"%%", r"\\__", false),
2956            (r"\%%", r"\\__", true),
2957            (r"\\%%", r"\\__", false),
2958            (r"_", r"\\__", false),
2959            (r"\_", r"\\__", false),
2960            (r"\\_", r"\\__", true),
2961            (r"__", r"\\__", false),
2962            (r"\__", r"\\__", true),
2963            (r"\\__", r"\\__", false),
2964            (r"abc", r"\\__", false),
2965            (r"a_c", r"\\__", false),
2966            (r"a\bc", r"\\__", false),
2967            (r"a\_c", r"\\__", false),
2968            (r"%abc", r"\\__", false),
2969            (r"\%abc", r"\\__", false),
2970            (r"a\\_c%", r"\\__", false),
2971            (r"", r"abc", false),
2972            (r"\", r"abc", false),
2973            (r"\\", r"abc", false),
2974            (r"\\\", r"abc", false),
2975            (r"\\\\", r"abc", false),
2976            (r"a", r"abc", false),
2977            (r"\a", r"abc", false),
2978            (r"\\a", r"abc", false),
2979            (r"%", r"abc", false),
2980            (r"\%", r"abc", false),
2981            (r"\\%", r"abc", false),
2982            (r"%%", r"abc", false),
2983            (r"\%%", r"abc", false),
2984            (r"\\%%", r"abc", false),
2985            (r"_", r"abc", false),
2986            (r"\_", r"abc", false),
2987            (r"\\_", r"abc", false),
2988            (r"__", r"abc", false),
2989            (r"\__", r"abc", false),
2990            (r"\\__", r"abc", false),
2991            (r"abc", r"abc", true),
2992            (r"a_c", r"abc", false),
2993            (r"a\bc", r"abc", false),
2994            (r"a\_c", r"abc", false),
2995            (r"%abc", r"abc", false),
2996            (r"\%abc", r"abc", false),
2997            (r"a\\_c%", r"abc", false),
2998            (r"", r"a_c", false),
2999            (r"\", r"a_c", false),
3000            (r"\\", r"a_c", false),
3001            (r"\\\", r"a_c", false),
3002            (r"\\\\", r"a_c", false),
3003            (r"a", r"a_c", false),
3004            (r"\a", r"a_c", false),
3005            (r"\\a", r"a_c", false),
3006            (r"%", r"a_c", false),
3007            (r"\%", r"a_c", false),
3008            (r"\\%", r"a_c", false),
3009            (r"%%", r"a_c", false),
3010            (r"\%%", r"a_c", false),
3011            (r"\\%%", r"a_c", false),
3012            (r"_", r"a_c", false),
3013            (r"\_", r"a_c", false),
3014            (r"\\_", r"a_c", false),
3015            (r"__", r"a_c", false),
3016            (r"\__", r"a_c", false),
3017            (r"\\__", r"a_c", false),
3018            (r"abc", r"a_c", true),
3019            (r"a_c", r"a_c", true),
3020            (r"a\bc", r"a_c", false),
3021            (r"a\_c", r"a_c", false),
3022            (r"%abc", r"a_c", false),
3023            (r"\%abc", r"a_c", false),
3024            (r"a\\_c%", r"a_c", false),
3025            (r"", r"a\bc", false),
3026            (r"\", r"a\bc", false),
3027            (r"\\", r"a\bc", false),
3028            (r"\\\", r"a\bc", false),
3029            (r"\\\\", r"a\bc", false),
3030            (r"a", r"a\bc", false),
3031            (r"\a", r"a\bc", false),
3032            (r"\\a", r"a\bc", false),
3033            (r"%", r"a\bc", false),
3034            (r"\%", r"a\bc", false),
3035            (r"\\%", r"a\bc", false),
3036            (r"%%", r"a\bc", false),
3037            (r"\%%", r"a\bc", false),
3038            (r"\\%%", r"a\bc", false),
3039            (r"_", r"a\bc", false),
3040            (r"\_", r"a\bc", false),
3041            (r"\\_", r"a\bc", false),
3042            (r"__", r"a\bc", false),
3043            (r"\__", r"a\bc", false),
3044            (r"\\__", r"a\bc", false),
3045            (r"abc", r"a\bc", true),
3046            (r"a_c", r"a\bc", false),
3047            (r"a\bc", r"a\bc", false),
3048            (r"a\_c", r"a\bc", false),
3049            (r"%abc", r"a\bc", false),
3050            (r"\%abc", r"a\bc", false),
3051            (r"a\\_c%", r"a\bc", false),
3052            (r"", r"a\_c", false),
3053            (r"\", r"a\_c", false),
3054            (r"\\", r"a\_c", false),
3055            (r"\\\", r"a\_c", false),
3056            (r"\\\\", r"a\_c", false),
3057            (r"a", r"a\_c", false),
3058            (r"\a", r"a\_c", false),
3059            (r"\\a", r"a\_c", false),
3060            (r"%", r"a\_c", false),
3061            (r"\%", r"a\_c", false),
3062            (r"\\%", r"a\_c", false),
3063            (r"%%", r"a\_c", false),
3064            (r"\%%", r"a\_c", false),
3065            (r"\\%%", r"a\_c", false),
3066            (r"_", r"a\_c", false),
3067            (r"\_", r"a\_c", false),
3068            (r"\\_", r"a\_c", false),
3069            (r"__", r"a\_c", false),
3070            (r"\__", r"a\_c", false),
3071            (r"\\__", r"a\_c", false),
3072            (r"abc", r"a\_c", false),
3073            (r"a_c", r"a\_c", true),
3074            (r"a\bc", r"a\_c", false),
3075            (r"a\_c", r"a\_c", false),
3076            (r"%abc", r"a\_c", false),
3077            (r"\%abc", r"a\_c", false),
3078            (r"a\\_c%", r"a\_c", false),
3079            (r"", r"%abc", false),
3080            (r"\", r"%abc", false),
3081            (r"\\", r"%abc", false),
3082            (r"\\\", r"%abc", false),
3083            (r"\\\\", r"%abc", false),
3084            (r"a", r"%abc", false),
3085            (r"\a", r"%abc", false),
3086            (r"\\a", r"%abc", false),
3087            (r"%", r"%abc", false),
3088            (r"\%", r"%abc", false),
3089            (r"\\%", r"%abc", false),
3090            (r"%%", r"%abc", false),
3091            (r"\%%", r"%abc", false),
3092            (r"\\%%", r"%abc", false),
3093            (r"_", r"%abc", false),
3094            (r"\_", r"%abc", false),
3095            (r"\\_", r"%abc", false),
3096            (r"__", r"%abc", false),
3097            (r"\__", r"%abc", false),
3098            (r"\\__", r"%abc", false),
3099            (r"abc", r"%abc", true),
3100            (r"a_c", r"%abc", false),
3101            (r"a\bc", r"%abc", false),
3102            (r"a\_c", r"%abc", false),
3103            (r"%abc", r"%abc", true),
3104            (r"\%abc", r"%abc", true),
3105            (r"a\\_c%", r"%abc", false),
3106            (r"", r"\%abc", false),
3107            (r"\", r"\%abc", false),
3108            (r"\\", r"\%abc", false),
3109            (r"\\\", r"\%abc", false),
3110            (r"\\\\", r"\%abc", false),
3111            (r"a", r"\%abc", false),
3112            (r"\a", r"\%abc", false),
3113            (r"\\a", r"\%abc", false),
3114            (r"%", r"\%abc", false),
3115            (r"\%", r"\%abc", false),
3116            (r"\\%", r"\%abc", false),
3117            (r"%%", r"\%abc", false),
3118            (r"\%%", r"\%abc", false),
3119            (r"\\%%", r"\%abc", false),
3120            (r"_", r"\%abc", false),
3121            (r"\_", r"\%abc", false),
3122            (r"\\_", r"\%abc", false),
3123            (r"__", r"\%abc", false),
3124            (r"\__", r"\%abc", false),
3125            (r"\\__", r"\%abc", false),
3126            (r"abc", r"\%abc", false),
3127            (r"a_c", r"\%abc", false),
3128            (r"a\bc", r"\%abc", false),
3129            (r"a\_c", r"\%abc", false),
3130            (r"%abc", r"\%abc", true),
3131            (r"\%abc", r"\%abc", false),
3132            (r"a\\_c%", r"\%abc", false),
3133            (r"", r"a\\_c%", false),
3134            (r"\", r"a\\_c%", false),
3135            (r"\\", r"a\\_c%", false),
3136            (r"\\\", r"a\\_c%", false),
3137            (r"\\\\", r"a\\_c%", false),
3138            (r"a", r"a\\_c%", false),
3139            (r"\a", r"a\\_c%", false),
3140            (r"\\a", r"a\\_c%", false),
3141            (r"%", r"a\\_c%", false),
3142            (r"\%", r"a\\_c%", false),
3143            (r"\\%", r"a\\_c%", false),
3144            (r"%%", r"a\\_c%", false),
3145            (r"\%%", r"a\\_c%", false),
3146            (r"\\%%", r"a\\_c%", false),
3147            (r"_", r"a\\_c%", false),
3148            (r"\_", r"a\\_c%", false),
3149            (r"\\_", r"a\\_c%", false),
3150            (r"__", r"a\\_c%", false),
3151            (r"\__", r"a\\_c%", false),
3152            (r"\\__", r"a\\_c%", false),
3153            (r"abc", r"a\\_c%", false),
3154            (r"a_c", r"a\\_c%", false),
3155            (r"a\bc", r"a\\_c%", true),
3156            (r"a\_c", r"a\\_c%", true),
3157            (r"%abc", r"a\\_c%", false),
3158            (r"\%abc", r"a\\_c%", false),
3159            (r"a\\_c%", r"a\\_c%", false),
3160        ];
3161
3162        let values = test_cases
3163            .iter()
3164            .map(|(value, _, _)| *value)
3165            .collect::<Vec<_>>();
3166        let patterns = test_cases
3167            .iter()
3168            .map(|(_, pattern, _)| *pattern)
3169            .collect::<Vec<_>>();
3170        let expected = BooleanArray::from(
3171            test_cases
3172                .iter()
3173                .map(|(_, _, expected)| *expected)
3174                .collect::<Vec<_>>(),
3175        );
3176        let unexpected = BooleanArray::from(
3177            test_cases
3178                .iter()
3179                .map(|(_, _, expected)| !*expected)
3180                .collect::<Vec<_>>(),
3181        );
3182
3183        for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
3184            let values = make_array(values.iter(), &string_type);
3185            let patterns = make_array(patterns.iter(), &string_type);
3186            let (values, patterns) = (values.as_ref(), patterns.as_ref());
3187
3188            assert_eq!(like(&values, &patterns).unwrap(), expected,);
3189            assert_eq!(ilike(&values, &patterns).unwrap(), expected,);
3190            assert_eq!(nlike(&values, &patterns).unwrap(), unexpected,);
3191            assert_eq!(nilike(&values, &patterns).unwrap(), unexpected,);
3192        }
3193    }
3194
3195    fn make_datums(
3196        value: impl AsRef<str>,
3197        data_type: &DataType,
3198    ) -> Vec<(Box<dyn Datum>, DatumType)> {
3199        match data_type {
3200            DataType::Utf8 => {
3201                let array = StringArray::from_iter_values([value]);
3202                vec![
3203                    (Box::new(array.clone()), DatumType::Array),
3204                    (Box::new(Scalar::new(array)), DatumType::Scalar),
3205                ]
3206            }
3207            DataType::LargeUtf8 => {
3208                let array = LargeStringArray::from_iter_values([value]);
3209                vec![
3210                    (Box::new(array.clone()), DatumType::Array),
3211                    (Box::new(Scalar::new(array)), DatumType::Scalar),
3212                ]
3213            }
3214            DataType::Utf8View => {
3215                let array = StringViewArray::from_iter_values([value]);
3216                vec![
3217                    (Box::new(array.clone()), DatumType::Array),
3218                    (Box::new(Scalar::new(array)), DatumType::Scalar),
3219                ]
3220            }
3221            _ => unimplemented!(),
3222        }
3223    }
3224
3225    fn make_array(
3226        values: impl IntoIterator<Item: AsRef<str>>,
3227        data_type: &DataType,
3228    ) -> Box<dyn Array> {
3229        match data_type {
3230            DataType::Utf8 => Box::new(StringArray::from_iter_values(values)),
3231            DataType::LargeUtf8 => Box::new(LargeStringArray::from_iter_values(values)),
3232            DataType::Utf8View => Box::new(StringViewArray::from_iter_values(values)),
3233            _ => unimplemented!(),
3234        }
3235    }
3236
3237    #[derive(Debug)]
3238    enum DatumType {
3239        Array,
3240        Scalar,
3241    }
3242}