vortex_array/compute/
like.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
use vortex_dtype::DType;
use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult};

use crate::arrow::{from_arrow_array_with_len, Datum};
use crate::encoding::Encoding;
use crate::{ArrayDType, ArrayData};

pub trait LikeFn<Array> {
    fn like(
        &self,
        array: Array,
        pattern: &ArrayData,
        options: LikeOptions,
    ) -> VortexResult<ArrayData>;
}

impl<E: Encoding> LikeFn<ArrayData> for E
where
    E: LikeFn<E::Array>,
    E::Array: TryFrom<ArrayData, Error = VortexError>,
{
    fn like(
        &self,
        array: ArrayData,
        pattern: &ArrayData,
        options: LikeOptions,
    ) -> VortexResult<ArrayData> {
        let encoding = array
            .encoding()
            .as_any()
            .downcast_ref::<E>()
            .ok_or_else(|| vortex_err!("Mismatched encoding"))?;
        let array = <E::Array as TryFrom<ArrayData>>::try_from(array)?;
        LikeFn::like(encoding, array, pattern, options)
    }
}

/// Options for SQL LIKE function
#[derive(Default, Debug, Clone, Copy)]
pub struct LikeOptions {
    pub negated: bool,
    pub case_insensitive: bool,
}

/// Perform SQL left LIKE right
///
/// There are two wildcards supported with the LIKE operator:
/// - %: matches zero or more characters
/// - _: matches exactly one character
pub fn like(
    array: ArrayData,
    pattern: &ArrayData,
    options: LikeOptions,
) -> VortexResult<ArrayData> {
    if !matches!(array.dtype(), DType::Utf8(..)) {
        vortex_bail!("Expected utf8 array, got {}", array.dtype());
    }
    if !matches!(pattern.dtype(), DType::Utf8(..)) {
        vortex_bail!("Expected utf8 pattern, got {}", array.dtype());
    }
    let expected_dtype =
        DType::Bool((array.dtype().is_nullable() || pattern.dtype().is_nullable()).into());
    let array_encoding = array.encoding().id();

    let result = if let Some(f) = array.encoding().like_fn() {
        f.like(array, pattern, options)
    } else {
        // Otherwise, we canonicalize into a UTF8 array.
        log::debug!(
            "No like implementation found for encoding {}",
            array.encoding().id(),
        );
        arrow_like(array, pattern, options)
    }?;

    debug_assert_eq!(
        result.len(),
        pattern.len(),
        "Like length mismatch {}",
        array_encoding
    );
    debug_assert_eq!(
        result.dtype(),
        &expected_dtype,
        "Like dtype mismatch {}",
        array_encoding
    );

    Ok(result)
}

/// Implementation of `LikeFn` using the Arrow crate.
pub(crate) fn arrow_like(
    array: ArrayData,
    pattern: &ArrayData,
    options: LikeOptions,
) -> VortexResult<ArrayData> {
    let nullable = array.dtype().is_nullable();
    let len = array.len();
    let lhs = unsafe { Datum::try_new(array)? };
    let rhs = unsafe { Datum::try_new(pattern.clone())? };

    let result = match (options.negated, options.case_insensitive) {
        (false, false) => arrow_string::like::like(&lhs, &rhs)?,
        (true, false) => arrow_string::like::nlike(&lhs, &rhs)?,
        (false, true) => arrow_string::like::ilike(&lhs, &rhs)?,
        (true, true) => arrow_string::like::nilike(&lhs, &rhs)?,
    };

    from_arrow_array_with_len(&result, len, nullable)
}