Skip to main content

vortex_array/arrays/dict/compute/
like.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_error::VortexResult;
5
6use super::Dict;
7use super::DictArray;
8use crate::ArrayRef;
9use crate::IntoArray;
10use crate::array::ArrayView;
11use crate::arrays::ConstantArray;
12use crate::arrays::dict::DictArrayExt;
13use crate::arrays::dict::DictArraySlotsExt;
14use crate::arrays::scalar_fn::ScalarFnFactoryExt;
15use crate::optimizer::ArrayOptimizer;
16use crate::scalar_fn::fns::like::Like;
17use crate::scalar_fn::fns::like::LikeOptions;
18use crate::scalar_fn::fns::like::LikeReduce;
19
20impl LikeReduce for Dict {
21    fn like(
22        array: ArrayView<'_, Dict>,
23        pattern: &ArrayRef,
24        options: LikeOptions,
25    ) -> VortexResult<Option<ArrayRef>> {
26        // If we have more values than codes, it is faster to canonicalize first.
27        if array.values().len() > array.codes().len() {
28            return Ok(None);
29        }
30        if let Some(pattern) = pattern.as_constant() {
31            let pattern = ConstantArray::new(pattern, array.values().len()).into_array();
32
33            let values = Like
34                .try_new_array(pattern.len(), options, [array.values().clone(), pattern])?
35                .optimize()?;
36
37            // SAFETY: LIKE preserves the len of the values, so codes are still pointing at
38            //  valid positions.
39            // Preserve all_values_referenced since codes are unchanged.
40            unsafe {
41                Ok(Some(
42                    DictArray::new_unchecked(array.codes().clone(), values)
43                        .set_all_values_referenced(array.has_all_values_referenced())
44                        .into_array(),
45                ))
46            }
47        } else {
48            Ok(None)
49        }
50    }
51}
52
53#[cfg(test)]
54mod tests {
55    use vortex_buffer::buffer;
56    use vortex_error::VortexResult;
57
58    use crate::IntoArray;
59    use crate::arrays::BoolArray;
60    use crate::arrays::DictArray;
61    use crate::arrays::VarBinArray;
62    use crate::arrays::dict::compute::like::ConstantArray;
63    use crate::arrays::scalar_fn::ScalarFnFactoryExt;
64    use crate::assert_arrays_eq;
65    use crate::optimizer::ArrayOptimizer;
66    use crate::scalar_fn::fns::like::Like;
67    use crate::scalar_fn::fns::like::LikeOptions;
68
69    #[test]
70    fn like_reduce_dict() -> VortexResult<()> {
71        let dict = DictArray::try_new(
72            buffer![0u8, 1, 0, 2].into_array(),
73            VarBinArray::from(vec!["hello", "world", "help"]).into_array(),
74        )?
75        .into_array();
76
77        let pattern = ConstantArray::new("hello%", 4).into_array();
78        let result = Like
79            .try_new_array(4, LikeOptions::default(), [dict, pattern])?
80            .optimize()?;
81
82        assert_arrays_eq!(result, BoolArray::from_iter([true, false, true, false]));
83        Ok(())
84    }
85}