vortex_array/compute/
like.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt::Display;
6use std::fmt::Formatter;
7use std::sync::LazyLock;
8
9use arcref::ArcRef;
10use vortex_dtype::DType;
11use vortex_error::VortexError;
12use vortex_error::VortexExpect;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_err;
16
17use crate::Array;
18use crate::ArrayRef;
19use crate::arrow::Datum;
20use crate::arrow::from_arrow_array_with_len;
21use crate::compute::ComputeFn;
22use crate::compute::ComputeFnVTable;
23use crate::compute::InvocationArgs;
24use crate::compute::Kernel;
25use crate::compute::Options;
26use crate::compute::Output;
27use crate::vtable::VTable;
28
29static LIKE_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
30    let compute = ComputeFn::new("like".into(), ArcRef::new_ref(&Like));
31    for kernel in inventory::iter::<LikeKernelRef> {
32        compute.register_kernel(kernel.0.clone());
33    }
34    compute
35});
36
37pub(crate) fn warm_up_vtable() -> usize {
38    LIKE_FN.kernels().len()
39}
40
41/// Perform SQL left LIKE right
42///
43/// There are two wildcards supported with the LIKE operator:
44/// - %: matches zero or more characters
45/// - _: matches exactly one character
46pub fn like(
47    array: &dyn Array,
48    pattern: &dyn Array,
49    options: LikeOptions,
50) -> VortexResult<ArrayRef> {
51    LIKE_FN
52        .invoke(&InvocationArgs {
53            inputs: &[array.into(), pattern.into()],
54            options: &options,
55        })?
56        .unwrap_array()
57}
58
59pub struct LikeKernelRef(ArcRef<dyn Kernel>);
60inventory::collect!(LikeKernelRef);
61
62pub trait LikeKernel: VTable {
63    fn like(
64        &self,
65        array: &Self::Array,
66        pattern: &dyn Array,
67        options: LikeOptions,
68    ) -> VortexResult<Option<ArrayRef>>;
69}
70
71#[derive(Debug)]
72pub struct LikeKernelAdapter<V: VTable>(pub V);
73
74impl<V: VTable + LikeKernel> LikeKernelAdapter<V> {
75    pub const fn lift(&'static self) -> LikeKernelRef {
76        LikeKernelRef(ArcRef::new_ref(self))
77    }
78}
79
80impl<V: VTable + LikeKernel> Kernel for LikeKernelAdapter<V> {
81    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
82        let inputs = LikeArgs::try_from(args)?;
83        let Some(array) = inputs.array.as_opt::<V>() else {
84            return Ok(None);
85        };
86        Ok(V::like(&self.0, array, inputs.pattern, inputs.options)?.map(|array| array.into()))
87    }
88}
89
90struct Like;
91
92impl ComputeFnVTable for Like {
93    fn invoke(
94        &self,
95        args: &InvocationArgs,
96        kernels: &[ArcRef<dyn Kernel>],
97    ) -> VortexResult<Output> {
98        let LikeArgs {
99            array,
100            pattern,
101            options,
102        } = LikeArgs::try_from(args)?;
103
104        for kernel in kernels {
105            if let Some(output) = kernel.invoke(args)? {
106                return Ok(output);
107            }
108        }
109        if let Some(output) = array.invoke(&LIKE_FN, args)? {
110            return Ok(output);
111        }
112
113        // Otherwise, we fall back to the Arrow implementation
114        Ok(arrow_like(array, pattern, options)?.into())
115    }
116
117    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
118        let LikeArgs { array, pattern, .. } = LikeArgs::try_from(args)?;
119        if !matches!(array.dtype(), DType::Utf8(..)) {
120            vortex_bail!("Expected utf8 array, got {}", array.dtype());
121        }
122        if !matches!(pattern.dtype(), DType::Utf8(..)) {
123            vortex_bail!("Expected utf8 pattern, got {}", array.dtype());
124        }
125        let nullability = array.dtype().is_nullable() || pattern.dtype().is_nullable();
126        Ok(DType::Bool(nullability.into()))
127    }
128
129    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
130        let LikeArgs { array, pattern, .. } = LikeArgs::try_from(args)?;
131        if array.len() != pattern.len() {
132            vortex_bail!(
133                "Length mismatch lhs len {} ({}) != rhs len {} ({})",
134                array.len(),
135                array.encoding_id(),
136                pattern.len(),
137                pattern.encoding_id()
138            );
139        }
140        Ok(array.len())
141    }
142
143    fn is_elementwise(&self) -> bool {
144        true
145    }
146}
147
148/// Options for SQL LIKE function
149#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)]
150pub struct LikeOptions {
151    pub negated: bool,
152    pub case_insensitive: bool,
153}
154
155impl Display for LikeOptions {
156    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
157        if self.negated {
158            write!(f, "NOT ")?;
159        }
160        if self.case_insensitive {
161            write!(f, "ILIKE")
162        } else {
163            write!(f, "LIKE")
164        }
165    }
166}
167
168impl Options for LikeOptions {
169    fn as_any(&self) -> &dyn Any {
170        self
171    }
172}
173
174struct LikeArgs<'a> {
175    array: &'a dyn Array,
176    pattern: &'a dyn Array,
177    options: LikeOptions,
178}
179
180impl<'a> TryFrom<&InvocationArgs<'a>> for LikeArgs<'a> {
181    type Error = VortexError;
182
183    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
184        if value.inputs.len() != 2 {
185            vortex_bail!("Expected 2 inputs, found {}", value.inputs.len());
186        }
187        let array = value.inputs[0]
188            .array()
189            .ok_or_else(|| vortex_err!("Expected first input to be an array"))?;
190        let pattern = value.inputs[1]
191            .array()
192            .ok_or_else(|| vortex_err!("Expected second input to be an array"))?;
193        let options = *value
194            .options
195            .as_any()
196            .downcast_ref::<LikeOptions>()
197            .vortex_expect("Expected options to be LikeOptions");
198
199        Ok(LikeArgs {
200            array,
201            pattern,
202            options,
203        })
204    }
205}
206
207/// Implementation of `LikeFn` using the Arrow crate.
208pub(crate) fn arrow_like(
209    array: &dyn Array,
210    pattern: &dyn Array,
211    options: LikeOptions,
212) -> VortexResult<ArrayRef> {
213    let nullable = array.dtype().is_nullable() | pattern.dtype().is_nullable();
214    let len = array.len();
215    assert_eq!(
216        array.len(),
217        pattern.len(),
218        "Arrow Like: length mismatch for {}",
219        array.encoding_id()
220    );
221
222    // convert the pattern to the preferred array datatype
223    let lhs = Datum::try_new(array)?;
224    let rhs = Datum::try_new_with_target_datatype(pattern, lhs.data_type())?;
225
226    let result = match (options.negated, options.case_insensitive) {
227        (false, false) => arrow_string::like::like(&lhs, &rhs)?,
228        (true, false) => arrow_string::like::nlike(&lhs, &rhs)?,
229        (false, true) => arrow_string::like::ilike(&lhs, &rhs)?,
230        (true, true) => arrow_string::like::nilike(&lhs, &rhs)?,
231    };
232
233    Ok(from_arrow_array_with_len(&result, len, nullable))
234}