vortex_array/compute/
like.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::LazyLock;
6
7use arcref::ArcRef;
8use vortex_dtype::DType;
9use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail, vortex_err};
10
11use crate::arrow::{Datum, from_arrow_array_with_len};
12use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Options, Output};
13use crate::vtable::VTable;
14use crate::{Array, ArrayRef};
15
16static LIKE_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
17    let compute = ComputeFn::new("like".into(), ArcRef::new_ref(&Like));
18    for kernel in inventory::iter::<LikeKernelRef> {
19        compute.register_kernel(kernel.0.clone());
20    }
21    compute
22});
23
24pub(crate) fn warm_up_vtable() -> usize {
25    LIKE_FN.kernels().len()
26}
27
28/// Perform SQL left LIKE right
29///
30/// There are two wildcards supported with the LIKE operator:
31/// - %: matches zero or more characters
32/// - _: matches exactly one character
33pub fn like(
34    array: &dyn Array,
35    pattern: &dyn Array,
36    options: LikeOptions,
37) -> VortexResult<ArrayRef> {
38    LIKE_FN
39        .invoke(&InvocationArgs {
40            inputs: &[array.into(), pattern.into()],
41            options: &options,
42        })?
43        .unwrap_array()
44}
45
46pub struct LikeKernelRef(ArcRef<dyn Kernel>);
47inventory::collect!(LikeKernelRef);
48
49pub trait LikeKernel: VTable {
50    fn like(
51        &self,
52        array: &Self::Array,
53        pattern: &dyn Array,
54        options: LikeOptions,
55    ) -> VortexResult<Option<ArrayRef>>;
56}
57
58#[derive(Debug)]
59pub struct LikeKernelAdapter<V: VTable>(pub V);
60
61impl<V: VTable + LikeKernel> LikeKernelAdapter<V> {
62    pub const fn lift(&'static self) -> LikeKernelRef {
63        LikeKernelRef(ArcRef::new_ref(self))
64    }
65}
66
67impl<V: VTable + LikeKernel> Kernel for LikeKernelAdapter<V> {
68    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
69        let inputs = LikeArgs::try_from(args)?;
70        let Some(array) = inputs.array.as_opt::<V>() else {
71            return Ok(None);
72        };
73        Ok(V::like(&self.0, array, inputs.pattern, inputs.options)?.map(|array| array.into()))
74    }
75}
76
77struct Like;
78
79impl ComputeFnVTable for Like {
80    fn invoke(
81        &self,
82        args: &InvocationArgs,
83        kernels: &[ArcRef<dyn Kernel>],
84    ) -> VortexResult<Output> {
85        let LikeArgs {
86            array,
87            pattern,
88            options,
89        } = LikeArgs::try_from(args)?;
90
91        for kernel in kernels {
92            if let Some(output) = kernel.invoke(args)? {
93                return Ok(output);
94            }
95        }
96        if let Some(output) = array.invoke(&LIKE_FN, args)? {
97            return Ok(output);
98        }
99
100        // Otherwise, we fall back to the Arrow implementation
101        Ok(arrow_like(array, pattern, options)?.into())
102    }
103
104    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
105        let LikeArgs { array, pattern, .. } = LikeArgs::try_from(args)?;
106        if !matches!(array.dtype(), DType::Utf8(..)) {
107            vortex_bail!("Expected utf8 array, got {}", array.dtype());
108        }
109        if !matches!(pattern.dtype(), DType::Utf8(..)) {
110            vortex_bail!("Expected utf8 pattern, got {}", array.dtype());
111        }
112        let nullability = array.dtype().is_nullable() || pattern.dtype().is_nullable();
113        Ok(DType::Bool(nullability.into()))
114    }
115
116    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
117        let LikeArgs { array, pattern, .. } = LikeArgs::try_from(args)?;
118        if array.len() != pattern.len() {
119            vortex_bail!(
120                "Length mismatch lhs len {} ({}) != rhs len {} ({})",
121                array.len(),
122                array.encoding_id(),
123                pattern.len(),
124                pattern.encoding_id()
125            );
126        }
127        Ok(array.len())
128    }
129
130    fn is_elementwise(&self) -> bool {
131        true
132    }
133}
134
135/// Options for SQL LIKE function
136#[derive(Default, Debug, Clone, Copy)]
137pub struct LikeOptions {
138    pub negated: bool,
139    pub case_insensitive: bool,
140}
141
142impl Options for LikeOptions {
143    fn as_any(&self) -> &dyn Any {
144        self
145    }
146}
147
148struct LikeArgs<'a> {
149    array: &'a dyn Array,
150    pattern: &'a dyn Array,
151    options: LikeOptions,
152}
153
154impl<'a> TryFrom<&InvocationArgs<'a>> for LikeArgs<'a> {
155    type Error = VortexError;
156
157    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
158        if value.inputs.len() != 2 {
159            vortex_bail!("Expected 2 inputs, found {}", value.inputs.len());
160        }
161        let array = value.inputs[0]
162            .array()
163            .ok_or_else(|| vortex_err!("Expected first input to be an array"))?;
164        let pattern = value.inputs[1]
165            .array()
166            .ok_or_else(|| vortex_err!("Expected second input to be an array"))?;
167        let options = *value
168            .options
169            .as_any()
170            .downcast_ref::<LikeOptions>()
171            .vortex_expect("Expected options to be LikeOptions");
172
173        Ok(LikeArgs {
174            array,
175            pattern,
176            options,
177        })
178    }
179}
180
181/// Implementation of `LikeFn` using the Arrow crate.
182pub(crate) fn arrow_like(
183    array: &dyn Array,
184    pattern: &dyn Array,
185    options: LikeOptions,
186) -> VortexResult<ArrayRef> {
187    let nullable = array.dtype().is_nullable() | pattern.dtype().is_nullable();
188    let len = array.len();
189    assert_eq!(
190        array.len(),
191        pattern.len(),
192        "Arrow Like: length mismatch for {}",
193        array.encoding_id()
194    );
195    let lhs = Datum::try_new(array)?;
196    let rhs = Datum::try_new(pattern)?;
197
198    let result = match (options.negated, options.case_insensitive) {
199        (false, false) => arrow_string::like::like(&lhs, &rhs)?,
200        (true, false) => arrow_string::like::nlike(&lhs, &rhs)?,
201        (false, true) => arrow_string::like::ilike(&lhs, &rhs)?,
202        (true, true) => arrow_string::like::nilike(&lhs, &rhs)?,
203    };
204
205    Ok(from_arrow_array_with_len(&result, len, nullable))
206}