vortex_array/compute/
like.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::LazyLock;
6
7use arcref::ArcRef;
8use vortex_dtype::DType;
9use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail, vortex_err};
10
11use crate::arrow::{Datum, from_arrow_array_with_len};
12use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Options, Output};
13use crate::vtable::VTable;
14use crate::{Array, ArrayRef};
15
16static LIKE_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
17    let compute = ComputeFn::new("like".into(), ArcRef::new_ref(&Like));
18    for kernel in inventory::iter::<LikeKernelRef> {
19        compute.register_kernel(kernel.0.clone());
20    }
21    compute
22});
23
24/// Perform SQL left LIKE right
25///
26/// There are two wildcards supported with the LIKE operator:
27/// - %: matches zero or more characters
28/// - _: matches exactly one character
29pub fn like(
30    array: &dyn Array,
31    pattern: &dyn Array,
32    options: LikeOptions,
33) -> VortexResult<ArrayRef> {
34    LIKE_FN
35        .invoke(&InvocationArgs {
36            inputs: &[array.into(), pattern.into()],
37            options: &options,
38        })?
39        .unwrap_array()
40}
41
42pub struct LikeKernelRef(ArcRef<dyn Kernel>);
43inventory::collect!(LikeKernelRef);
44
45pub trait LikeKernel: VTable {
46    fn like(
47        &self,
48        array: &Self::Array,
49        pattern: &dyn Array,
50        options: LikeOptions,
51    ) -> VortexResult<Option<ArrayRef>>;
52}
53
54#[derive(Debug)]
55pub struct LikeKernelAdapter<V: VTable>(pub V);
56
57impl<V: VTable + LikeKernel> LikeKernelAdapter<V> {
58    pub const fn lift(&'static self) -> LikeKernelRef {
59        LikeKernelRef(ArcRef::new_ref(self))
60    }
61}
62
63impl<V: VTable + LikeKernel> Kernel for LikeKernelAdapter<V> {
64    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
65        let inputs = LikeArgs::try_from(args)?;
66        let Some(array) = inputs.array.as_opt::<V>() else {
67            return Ok(None);
68        };
69        Ok(V::like(&self.0, array, inputs.pattern, inputs.options)?.map(|array| array.into()))
70    }
71}
72
73struct Like;
74
75impl ComputeFnVTable for Like {
76    fn invoke(
77        &self,
78        args: &InvocationArgs,
79        kernels: &[ArcRef<dyn Kernel>],
80    ) -> VortexResult<Output> {
81        let LikeArgs {
82            array,
83            pattern,
84            options,
85        } = LikeArgs::try_from(args)?;
86
87        for kernel in kernels {
88            if let Some(output) = kernel.invoke(args)? {
89                return Ok(output);
90            }
91        }
92        if let Some(output) = array.invoke(&LIKE_FN, args)? {
93            return Ok(output);
94        }
95
96        // Otherwise, we fall back to the Arrow implementation
97        Ok(arrow_like(array, pattern, options)?.into())
98    }
99
100    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
101        let LikeArgs { array, pattern, .. } = LikeArgs::try_from(args)?;
102        if !matches!(array.dtype(), DType::Utf8(..)) {
103            vortex_bail!("Expected utf8 array, got {}", array.dtype());
104        }
105        if !matches!(pattern.dtype(), DType::Utf8(..)) {
106            vortex_bail!("Expected utf8 pattern, got {}", array.dtype());
107        }
108        let nullability = array.dtype().is_nullable() || pattern.dtype().is_nullable();
109        Ok(DType::Bool(nullability.into()))
110    }
111
112    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
113        let LikeArgs { array, pattern, .. } = LikeArgs::try_from(args)?;
114        if array.len() != pattern.len() {
115            vortex_bail!(
116                "Length mismatch lhs len {} ({}) != rhs len {} ({})",
117                array.len(),
118                array.encoding_id(),
119                pattern.len(),
120                pattern.encoding_id()
121            );
122        }
123        Ok(array.len())
124    }
125
126    fn is_elementwise(&self) -> bool {
127        true
128    }
129}
130
131/// Options for SQL LIKE function
132#[derive(Default, Debug, Clone, Copy)]
133pub struct LikeOptions {
134    pub negated: bool,
135    pub case_insensitive: bool,
136}
137
138impl Options for LikeOptions {
139    fn as_any(&self) -> &dyn Any {
140        self
141    }
142}
143
144struct LikeArgs<'a> {
145    array: &'a dyn Array,
146    pattern: &'a dyn Array,
147    options: LikeOptions,
148}
149
150impl<'a> TryFrom<&InvocationArgs<'a>> for LikeArgs<'a> {
151    type Error = VortexError;
152
153    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
154        if value.inputs.len() != 2 {
155            vortex_bail!("Expected 2 inputs, found {}", value.inputs.len());
156        }
157        let array = value.inputs[0]
158            .array()
159            .ok_or_else(|| vortex_err!("Expected first input to be an array"))?;
160        let pattern = value.inputs[1]
161            .array()
162            .ok_or_else(|| vortex_err!("Expected second input to be an array"))?;
163        let options = *value
164            .options
165            .as_any()
166            .downcast_ref::<LikeOptions>()
167            .vortex_expect("Expected options to be LikeOptions");
168
169        Ok(LikeArgs {
170            array,
171            pattern,
172            options,
173        })
174    }
175}
176
177/// Implementation of `LikeFn` using the Arrow crate.
178pub(crate) fn arrow_like(
179    array: &dyn Array,
180    pattern: &dyn Array,
181    options: LikeOptions,
182) -> VortexResult<ArrayRef> {
183    let nullable = array.dtype().is_nullable() | pattern.dtype().is_nullable();
184    let len = array.len();
185    assert_eq!(
186        array.len(),
187        pattern.len(),
188        "Arrow Like: length mismatch for {}",
189        array.encoding_id()
190    );
191    let lhs = Datum::try_new(array)?;
192    let rhs = Datum::try_new(pattern)?;
193
194    let result = match (options.negated, options.case_insensitive) {
195        (false, false) => arrow_string::like::like(&lhs, &rhs)?,
196        (true, false) => arrow_string::like::nlike(&lhs, &rhs)?,
197        (false, true) => arrow_string::like::ilike(&lhs, &rhs)?,
198        (true, true) => arrow_string::like::nilike(&lhs, &rhs)?,
199    };
200
201    Ok(from_arrow_array_with_len(&result, len, nullable))
202}