vortex_array/compute/
like.rs

1use vortex_dtype::DType;
2use vortex_error::{VortexResult, vortex_bail, vortex_err};
3
4use crate::arrow::{Datum, from_arrow_array_with_len};
5use crate::encoding::Encoding;
6use crate::{Array, ArrayRef};
7
8pub trait LikeFn<A> {
9    fn like(
10        &self,
11        array: A,
12        pattern: &dyn Array,
13        options: LikeOptions,
14    ) -> VortexResult<Option<ArrayRef>>;
15}
16
17impl<E: Encoding> LikeFn<&dyn Array> for E
18where
19    E: for<'a> LikeFn<&'a E::Array>,
20{
21    fn like(
22        &self,
23        array: &dyn Array,
24        pattern: &dyn Array,
25        options: LikeOptions,
26    ) -> VortexResult<Option<ArrayRef>> {
27        LikeFn::like(
28            self,
29            array
30                .as_any()
31                .downcast_ref()
32                .ok_or_else(|| vortex_err!("Mismatched array"))?,
33            pattern,
34            options,
35        )
36    }
37}
38
39/// Options for SQL LIKE function
40#[derive(Default, Debug, Clone, Copy)]
41pub struct LikeOptions {
42    pub negated: bool,
43    pub case_insensitive: bool,
44}
45
46/// Perform SQL left LIKE right
47///
48/// There are two wildcards supported with the LIKE operator:
49/// - %: matches zero or more characters
50/// - _: matches exactly one character
51pub fn like(
52    array: &dyn Array,
53    pattern: &dyn Array,
54    options: LikeOptions,
55) -> VortexResult<ArrayRef> {
56    if !matches!(array.dtype(), DType::Utf8(..)) {
57        vortex_bail!("Expected utf8 array, got {}", array.dtype());
58    }
59    if !matches!(pattern.dtype(), DType::Utf8(..)) {
60        vortex_bail!("Expected utf8 pattern, got {}", array.dtype());
61    }
62    if array.len() != pattern.len() {
63        vortex_bail!(
64            "Length mismatch lhs len {} ({}) != rhs len {} ({})",
65            array.len(),
66            array.encoding(),
67            pattern.len(),
68            pattern.encoding()
69        );
70    }
71
72    let expected_dtype =
73        DType::Bool((array.dtype().is_nullable() || pattern.dtype().is_nullable()).into());
74    let array_encoding = array.encoding();
75
76    let result = array
77        .vtable()
78        .like_fn()
79        .and_then(|f| f.like(array, pattern, options).transpose())
80        .unwrap_or_else(|| {
81            // Otherwise, we canonicalize into a UTF8 array.
82            log::debug!(
83                "No like implementation found for encoding {}",
84                array.encoding(),
85            );
86            arrow_like(array, pattern, options)
87        })?;
88
89    debug_assert_eq!(
90        result.len(),
91        pattern.len(),
92        "Like length mismatch {}",
93        array_encoding
94    );
95    debug_assert_eq!(
96        result.dtype(),
97        &expected_dtype,
98        "Like dtype mismatch {}",
99        array_encoding
100    );
101
102    Ok(result)
103}
104
105/// Implementation of `LikeFn` using the Arrow crate.
106pub(crate) fn arrow_like(
107    array: &dyn Array,
108    pattern: &dyn Array,
109    options: LikeOptions,
110) -> VortexResult<ArrayRef> {
111    let nullable = array.dtype().is_nullable();
112    let len = array.len();
113    debug_assert_eq!(
114        array.len(),
115        pattern.len(),
116        "Arrow Like: length mismatch for {}",
117        array.encoding()
118    );
119    let lhs = Datum::try_new(array.to_array())?;
120    let rhs = Datum::try_new(pattern.to_array())?;
121
122    let result = match (options.negated, options.case_insensitive) {
123        (false, false) => arrow_string::like::like(&lhs, &rhs)?,
124        (true, false) => arrow_string::like::nlike(&lhs, &rhs)?,
125        (false, true) => arrow_string::like::ilike(&lhs, &rhs)?,
126        (true, true) => arrow_string::like::nilike(&lhs, &rhs)?,
127    };
128
129    from_arrow_array_with_len(&result, len, nullable)
130}