vortex_array/compute/
like.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::LazyLock;
6
7use arcref::ArcRef;
8use vortex_dtype::DType;
9use vortex_error::VortexError;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_err;
14
15use crate::Array;
16use crate::ArrayRef;
17use crate::arrow::Datum;
18use crate::arrow::from_arrow_array_with_len;
19use crate::compute::ComputeFn;
20use crate::compute::ComputeFnVTable;
21use crate::compute::InvocationArgs;
22use crate::compute::Kernel;
23use crate::compute::Options;
24use crate::compute::Output;
25use crate::vtable::VTable;
26
27static LIKE_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
28    let compute = ComputeFn::new("like".into(), ArcRef::new_ref(&Like));
29    for kernel in inventory::iter::<LikeKernelRef> {
30        compute.register_kernel(kernel.0.clone());
31    }
32    compute
33});
34
35pub(crate) fn warm_up_vtable() -> usize {
36    LIKE_FN.kernels().len()
37}
38
39/// Perform SQL left LIKE right
40///
41/// There are two wildcards supported with the LIKE operator:
42/// - %: matches zero or more characters
43/// - _: matches exactly one character
44pub fn like(
45    array: &dyn Array,
46    pattern: &dyn Array,
47    options: LikeOptions,
48) -> VortexResult<ArrayRef> {
49    LIKE_FN
50        .invoke(&InvocationArgs {
51            inputs: &[array.into(), pattern.into()],
52            options: &options,
53        })?
54        .unwrap_array()
55}
56
57pub struct LikeKernelRef(ArcRef<dyn Kernel>);
58inventory::collect!(LikeKernelRef);
59
60pub trait LikeKernel: VTable {
61    fn like(
62        &self,
63        array: &Self::Array,
64        pattern: &dyn Array,
65        options: LikeOptions,
66    ) -> VortexResult<Option<ArrayRef>>;
67}
68
69#[derive(Debug)]
70pub struct LikeKernelAdapter<V: VTable>(pub V);
71
72impl<V: VTable + LikeKernel> LikeKernelAdapter<V> {
73    pub const fn lift(&'static self) -> LikeKernelRef {
74        LikeKernelRef(ArcRef::new_ref(self))
75    }
76}
77
78impl<V: VTable + LikeKernel> Kernel for LikeKernelAdapter<V> {
79    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
80        let inputs = LikeArgs::try_from(args)?;
81        let Some(array) = inputs.array.as_opt::<V>() else {
82            return Ok(None);
83        };
84        Ok(V::like(&self.0, array, inputs.pattern, inputs.options)?.map(|array| array.into()))
85    }
86}
87
88struct Like;
89
90impl ComputeFnVTable for Like {
91    fn invoke(
92        &self,
93        args: &InvocationArgs,
94        kernels: &[ArcRef<dyn Kernel>],
95    ) -> VortexResult<Output> {
96        let LikeArgs {
97            array,
98            pattern,
99            options,
100        } = LikeArgs::try_from(args)?;
101
102        for kernel in kernels {
103            if let Some(output) = kernel.invoke(args)? {
104                return Ok(output);
105            }
106        }
107        if let Some(output) = array.invoke(&LIKE_FN, args)? {
108            return Ok(output);
109        }
110
111        // Otherwise, we fall back to the Arrow implementation
112        Ok(arrow_like(array, pattern, options)?.into())
113    }
114
115    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
116        let LikeArgs { array, pattern, .. } = LikeArgs::try_from(args)?;
117        if !matches!(array.dtype(), DType::Utf8(..)) {
118            vortex_bail!("Expected utf8 array, got {}", array.dtype());
119        }
120        if !matches!(pattern.dtype(), DType::Utf8(..)) {
121            vortex_bail!("Expected utf8 pattern, got {}", array.dtype());
122        }
123        let nullability = array.dtype().is_nullable() || pattern.dtype().is_nullable();
124        Ok(DType::Bool(nullability.into()))
125    }
126
127    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
128        let LikeArgs { array, pattern, .. } = LikeArgs::try_from(args)?;
129        if array.len() != pattern.len() {
130            vortex_bail!(
131                "Length mismatch lhs len {} ({}) != rhs len {} ({})",
132                array.len(),
133                array.encoding_id(),
134                pattern.len(),
135                pattern.encoding_id()
136            );
137        }
138        Ok(array.len())
139    }
140
141    fn is_elementwise(&self) -> bool {
142        true
143    }
144}
145
146/// Options for SQL LIKE function
147#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)]
148pub struct LikeOptions {
149    pub negated: bool,
150    pub case_insensitive: bool,
151}
152
153impl Options for LikeOptions {
154    fn as_any(&self) -> &dyn Any {
155        self
156    }
157}
158
159struct LikeArgs<'a> {
160    array: &'a dyn Array,
161    pattern: &'a dyn Array,
162    options: LikeOptions,
163}
164
165impl<'a> TryFrom<&InvocationArgs<'a>> for LikeArgs<'a> {
166    type Error = VortexError;
167
168    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
169        if value.inputs.len() != 2 {
170            vortex_bail!("Expected 2 inputs, found {}", value.inputs.len());
171        }
172        let array = value.inputs[0]
173            .array()
174            .ok_or_else(|| vortex_err!("Expected first input to be an array"))?;
175        let pattern = value.inputs[1]
176            .array()
177            .ok_or_else(|| vortex_err!("Expected second input to be an array"))?;
178        let options = *value
179            .options
180            .as_any()
181            .downcast_ref::<LikeOptions>()
182            .vortex_expect("Expected options to be LikeOptions");
183
184        Ok(LikeArgs {
185            array,
186            pattern,
187            options,
188        })
189    }
190}
191
192/// Implementation of `LikeFn` using the Arrow crate.
193pub(crate) fn arrow_like(
194    array: &dyn Array,
195    pattern: &dyn Array,
196    options: LikeOptions,
197) -> VortexResult<ArrayRef> {
198    let nullable = array.dtype().is_nullable() | pattern.dtype().is_nullable();
199    let len = array.len();
200    assert_eq!(
201        array.len(),
202        pattern.len(),
203        "Arrow Like: length mismatch for {}",
204        array.encoding_id()
205    );
206
207    // convert the pattern to the preferred array datatype
208    let lhs = Datum::try_new(array)?;
209    let rhs = Datum::try_new_with_target_datatype(pattern, lhs.data_type())?;
210
211    let result = match (options.negated, options.case_insensitive) {
212        (false, false) => arrow_string::like::like(&lhs, &rhs)?,
213        (true, false) => arrow_string::like::nlike(&lhs, &rhs)?,
214        (false, true) => arrow_string::like::ilike(&lhs, &rhs)?,
215        (true, true) => arrow_string::like::nilike(&lhs, &rhs)?,
216    };
217
218    Ok(from_arrow_array_with_len(&result, len, nullable))
219}