datafusion_functions_nested/
array_has.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDFImpl`] definitions for array_has, array_has_all and array_has_any functions.
19
20use arrow::array::{Array, ArrayRef, BooleanArray, Datum, Scalar};
21use arrow::buffer::BooleanBuffer;
22use arrow::datatypes::DataType;
23use arrow::row::{RowConverter, Rows, SortField};
24use datafusion_common::cast::{as_fixed_size_list_array, as_generic_list_array};
25use datafusion_common::utils::string_utils::string_array_to_vec;
26use datafusion_common::utils::take_function_args;
27use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
28use datafusion_expr::expr::ScalarFunction;
29use datafusion_expr::simplify::ExprSimplifyResult;
30use datafusion_expr::{
31    in_list, ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
32};
33use datafusion_macros::user_doc;
34use datafusion_physical_expr_common::datum::compare_with_eq;
35use itertools::Itertools;
36
37use crate::make_array::make_array_udf;
38use crate::utils::make_scalar_function;
39
40use std::any::Any;
41use std::sync::Arc;
42
43// Create static instances of ScalarUDFs for each function
44make_udf_expr_and_func!(ArrayHas,
45    array_has,
46    haystack_array element, // arg names
47    "returns true, if the element appears in the first array, otherwise false.", // doc
48    array_has_udf // internal function name
49);
50make_udf_expr_and_func!(ArrayHasAll,
51    array_has_all,
52    haystack_array needle_array, // arg names
53    "returns true if each element of the second array appears in the first array; otherwise, it returns false.", // doc
54    array_has_all_udf // internal function name
55);
56make_udf_expr_and_func!(ArrayHasAny,
57    array_has_any,
58    haystack_array needle_array, // arg names
59    "returns true if at least one element of the second array appears in the first array; otherwise, it returns false.", // doc
60    array_has_any_udf // internal function name
61);
62
63#[user_doc(
64    doc_section(label = "Array Functions"),
65    description = "Returns true if the array contains the element.",
66    syntax_example = "array_has(array, element)",
67    sql_example = r#"```sql
68> select array_has([1, 2, 3], 2);
69+-----------------------------+
70| array_has(List([1,2,3]), 2) |
71+-----------------------------+
72| true                        |
73+-----------------------------+
74```"#,
75    argument(
76        name = "array",
77        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
78    ),
79    argument(
80        name = "element",
81        description = "Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators."
82    )
83)]
84#[derive(Debug, PartialEq, Eq, Hash)]
85pub struct ArrayHas {
86    signature: Signature,
87    aliases: Vec<String>,
88}
89
90impl Default for ArrayHas {
91    fn default() -> Self {
92        Self::new()
93    }
94}
95
96impl ArrayHas {
97    pub fn new() -> Self {
98        Self {
99            signature: Signature::array_and_element(Volatility::Immutable),
100            aliases: vec![
101                String::from("list_has"),
102                String::from("array_contains"),
103                String::from("list_contains"),
104            ],
105        }
106    }
107}
108
109impl ScalarUDFImpl for ArrayHas {
110    fn as_any(&self) -> &dyn Any {
111        self
112    }
113    fn name(&self) -> &str {
114        "array_has"
115    }
116
117    fn signature(&self) -> &Signature {
118        &self.signature
119    }
120
121    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
122        Ok(DataType::Boolean)
123    }
124
125    fn simplify(
126        &self,
127        mut args: Vec<Expr>,
128        _info: &dyn datafusion_expr::simplify::SimplifyInfo,
129    ) -> Result<ExprSimplifyResult> {
130        let [haystack, needle] = take_function_args(self.name(), &mut args)?;
131
132        // if the haystack is a constant list, we can use an inlist expression which is more
133        // efficient because the haystack is not varying per-row
134        match haystack {
135            Expr::Literal(scalar, _) if scalar.is_null() => {
136                return Ok(ExprSimplifyResult::Simplified(Expr::Literal(
137                    ScalarValue::Boolean(None),
138                    None,
139                )))
140            }
141            Expr::Literal(
142                // FixedSizeList gets coerced to List
143                scalar @ ScalarValue::List(_) | scalar @ ScalarValue::LargeList(_),
144                _,
145            ) => {
146                if let Ok(scalar_values) =
147                    ScalarValue::convert_array_to_scalar_vec(&scalar.to_array()?)
148                {
149                    assert_eq!(scalar_values.len(), 1);
150                    let list = scalar_values
151                        .into_iter()
152                        .flatten()
153                        .flatten()
154                        .map(|v| Expr::Literal(v, None))
155                        .collect();
156
157                    return Ok(ExprSimplifyResult::Simplified(in_list(
158                        std::mem::take(needle),
159                        list,
160                        false,
161                    )));
162                }
163            }
164            Expr::ScalarFunction(ScalarFunction { func, args })
165                if func == &make_array_udf() =>
166            {
167                // make_array has a static set of arguments, so we can pull the arguments out from it
168                return Ok(ExprSimplifyResult::Simplified(in_list(
169                    std::mem::take(needle),
170                    std::mem::take(args),
171                    false,
172                )));
173            }
174            _ => {}
175        };
176        Ok(ExprSimplifyResult::Original(args))
177    }
178
179    fn invoke_with_args(
180        &self,
181        args: datafusion_expr::ScalarFunctionArgs,
182    ) -> Result<ColumnarValue> {
183        let [first_arg, second_arg] = take_function_args(self.name(), &args.args)?;
184        if first_arg.data_type().is_null() {
185            // Always return null if the first argument is null
186            // i.e. array_has(null, element) -> null
187            return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
188        }
189
190        match &second_arg {
191            ColumnarValue::Array(array_needle) => {
192                // the needle is already an array, convert the haystack to an array of the same length
193                let haystack = first_arg.to_array(array_needle.len())?;
194                let array = array_has_inner_for_array(&haystack, array_needle)?;
195                Ok(ColumnarValue::Array(array))
196            }
197            ColumnarValue::Scalar(scalar_needle) => {
198                // Always return null if the second argument is null
199                // i.e. array_has(array, null) -> null
200                if scalar_needle.is_null() {
201                    return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
202                }
203
204                // since the needle is a scalar, convert it to an array of size 1
205                let haystack = first_arg.to_array(1)?;
206                let needle = scalar_needle.to_array_of_size(1)?;
207                let needle = Scalar::new(needle);
208                let array = array_has_inner_for_scalar(&haystack, &needle)?;
209                if let ColumnarValue::Scalar(_) = &first_arg {
210                    // If both inputs are scalar, keeps output as scalar
211                    let scalar_value = ScalarValue::try_from_array(&array, 0)?;
212                    Ok(ColumnarValue::Scalar(scalar_value))
213                } else {
214                    Ok(ColumnarValue::Array(array))
215                }
216            }
217        }
218    }
219
220    fn aliases(&self) -> &[String] {
221        &self.aliases
222    }
223
224    fn documentation(&self) -> Option<&Documentation> {
225        self.doc()
226    }
227}
228
229fn array_has_inner_for_scalar(
230    haystack: &ArrayRef,
231    needle: &dyn Datum,
232) -> Result<ArrayRef> {
233    let haystack = haystack.as_ref().try_into()?;
234    array_has_dispatch_for_scalar(haystack, needle)
235}
236
237fn array_has_inner_for_array(haystack: &ArrayRef, needle: &ArrayRef) -> Result<ArrayRef> {
238    let haystack = haystack.as_ref().try_into()?;
239    array_has_dispatch_for_array(haystack, needle)
240}
241
242enum ArrayWrapper<'a> {
243    FixedSizeList(&'a arrow::array::FixedSizeListArray),
244    List(&'a arrow::array::GenericListArray<i32>),
245    LargeList(&'a arrow::array::GenericListArray<i64>),
246}
247
248impl<'a> TryFrom<&'a dyn Array> for ArrayWrapper<'a> {
249    type Error = DataFusionError;
250
251    fn try_from(
252        value: &'a dyn Array,
253    ) -> std::result::Result<ArrayWrapper<'a>, Self::Error> {
254        match value.data_type() {
255            DataType::List(_) => {
256                Ok(ArrayWrapper::List(as_generic_list_array::<i32>(value)?))
257            }
258            DataType::LargeList(_) => Ok(ArrayWrapper::LargeList(
259                as_generic_list_array::<i64>(value)?,
260            )),
261            DataType::FixedSizeList(_, _) => Ok(ArrayWrapper::FixedSizeList(
262                as_fixed_size_list_array(value)?,
263            )),
264            _ => exec_err!("array_has does not support type '{:?}'.", value.data_type()),
265        }
266    }
267}
268
269impl<'a> ArrayWrapper<'a> {
270    fn len(&self) -> usize {
271        match self {
272            ArrayWrapper::FixedSizeList(arr) => arr.len(),
273            ArrayWrapper::List(arr) => arr.len(),
274            ArrayWrapper::LargeList(arr) => arr.len(),
275        }
276    }
277
278    fn iter(&self) -> Box<dyn Iterator<Item = Option<ArrayRef>> + 'a> {
279        match self {
280            ArrayWrapper::FixedSizeList(arr) => Box::new(arr.iter()),
281            ArrayWrapper::List(arr) => Box::new(arr.iter()),
282            ArrayWrapper::LargeList(arr) => Box::new(arr.iter()),
283        }
284    }
285
286    fn values(&self) -> &ArrayRef {
287        match self {
288            ArrayWrapper::FixedSizeList(arr) => arr.values(),
289            ArrayWrapper::List(arr) => arr.values(),
290            ArrayWrapper::LargeList(arr) => arr.values(),
291        }
292    }
293
294    fn value_type(&self) -> DataType {
295        match self {
296            ArrayWrapper::FixedSizeList(arr) => arr.value_type(),
297            ArrayWrapper::List(arr) => arr.value_type(),
298            ArrayWrapper::LargeList(arr) => arr.value_type(),
299        }
300    }
301
302    fn offsets(&self) -> Box<dyn Iterator<Item = usize> + 'a> {
303        match self {
304            ArrayWrapper::FixedSizeList(arr) => {
305                let offsets = (0..=arr.len())
306                    .step_by(arr.value_length() as usize)
307                    .collect::<Vec<_>>();
308                Box::new(offsets.into_iter())
309            }
310            ArrayWrapper::List(arr) => {
311                Box::new(arr.offsets().iter().map(|o| (*o) as usize))
312            }
313            ArrayWrapper::LargeList(arr) => {
314                Box::new(arr.offsets().iter().map(|o| (*o) as usize))
315            }
316        }
317    }
318}
319
320fn array_has_dispatch_for_array(
321    haystack: ArrayWrapper<'_>,
322    needle: &ArrayRef,
323) -> Result<ArrayRef> {
324    let mut boolean_builder = BooleanArray::builder(haystack.len());
325    for (i, arr) in haystack.iter().enumerate() {
326        if arr.is_none() || needle.is_null(i) {
327            boolean_builder.append_null();
328            continue;
329        }
330        let arr = arr.unwrap();
331        let is_nested = arr.data_type().is_nested();
332        let needle_row = Scalar::new(needle.slice(i, 1));
333        let eq_array = compare_with_eq(&arr, &needle_row, is_nested)?;
334        boolean_builder.append_value(eq_array.true_count() > 0);
335    }
336
337    Ok(Arc::new(boolean_builder.finish()))
338}
339
340fn array_has_dispatch_for_scalar(
341    haystack: ArrayWrapper<'_>,
342    needle: &dyn Datum,
343) -> Result<ArrayRef> {
344    let values = haystack.values();
345    let is_nested = values.data_type().is_nested();
346    // If first argument is empty list (second argument is non-null), return false
347    // i.e. array_has([], non-null element) -> false
348    if haystack.len() == 0 {
349        return Ok(Arc::new(BooleanArray::new(
350            BooleanBuffer::new_unset(haystack.len()),
351            None,
352        )));
353    }
354    let eq_array = compare_with_eq(values, needle, is_nested)?;
355    let mut final_contained = vec![None; haystack.len()];
356
357    // Check validity buffer to distinguish between null and empty arrays
358    let validity = match &haystack {
359        ArrayWrapper::FixedSizeList(arr) => arr.nulls(),
360        ArrayWrapper::List(arr) => arr.nulls(),
361        ArrayWrapper::LargeList(arr) => arr.nulls(),
362    };
363
364    for (i, (start, end)) in haystack.offsets().tuple_windows().enumerate() {
365        let length = end - start;
366
367        // Check if the array at this position is null
368        if let Some(validity_buffer) = validity {
369            if !validity_buffer.is_valid(i) {
370                final_contained[i] = None; // null array -> null result
371                continue;
372            }
373        }
374
375        // For non-null arrays: length is 0 for empty arrays
376        if length == 0 {
377            final_contained[i] = Some(false); // empty array -> false
378        } else {
379            let sliced_array = eq_array.slice(start, length);
380            final_contained[i] = Some(sliced_array.true_count() > 0);
381        }
382    }
383
384    Ok(Arc::new(BooleanArray::from(final_contained)))
385}
386
387fn array_has_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
388    array_has_all_and_any_inner(args, ComparisonType::All)
389}
390
391// General row comparison for array_has_all and array_has_any
392fn general_array_has_for_all_and_any<'a>(
393    haystack: &ArrayWrapper<'a>,
394    needle: &ArrayWrapper<'a>,
395    comparison_type: ComparisonType,
396) -> Result<ArrayRef> {
397    let mut boolean_builder = BooleanArray::builder(haystack.len());
398    let converter = RowConverter::new(vec![SortField::new(haystack.value_type())])?;
399
400    for (arr, sub_arr) in haystack.iter().zip(needle.iter()) {
401        if let (Some(arr), Some(sub_arr)) = (arr, sub_arr) {
402            let arr_values = converter.convert_columns(&[arr])?;
403            let sub_arr_values = converter.convert_columns(&[sub_arr])?;
404            boolean_builder.append_value(general_array_has_all_and_any_kernel(
405                arr_values,
406                sub_arr_values,
407                comparison_type,
408            ));
409        } else {
410            boolean_builder.append_null();
411        }
412    }
413
414    Ok(Arc::new(boolean_builder.finish()))
415}
416
417// String comparison for array_has_all and array_has_any
418fn array_has_all_and_any_string_internal<'a>(
419    haystack: &ArrayWrapper<'a>,
420    needle: &ArrayWrapper<'a>,
421    comparison_type: ComparisonType,
422) -> Result<ArrayRef> {
423    let mut boolean_builder = BooleanArray::builder(haystack.len());
424    for (arr, sub_arr) in haystack.iter().zip(needle.iter()) {
425        match (arr, sub_arr) {
426            (Some(arr), Some(sub_arr)) => {
427                let haystack_array = string_array_to_vec(&arr);
428                let needle_array = string_array_to_vec(&sub_arr);
429                boolean_builder.append_value(array_has_string_kernel(
430                    haystack_array,
431                    needle_array,
432                    comparison_type,
433                ));
434            }
435            (_, _) => {
436                boolean_builder.append_null();
437            }
438        }
439    }
440
441    Ok(Arc::new(boolean_builder.finish()))
442}
443
444fn array_has_all_and_any_dispatch<'a>(
445    haystack: &ArrayWrapper<'a>,
446    needle: &ArrayWrapper<'a>,
447    comparison_type: ComparisonType,
448) -> Result<ArrayRef> {
449    if needle.values().is_empty() {
450        let buffer = match comparison_type {
451            ComparisonType::All => BooleanBuffer::new_set(haystack.len()),
452            ComparisonType::Any => BooleanBuffer::new_unset(haystack.len()),
453        };
454        Ok(Arc::new(BooleanArray::from(buffer)))
455    } else {
456        match needle.value_type() {
457            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
458                array_has_all_and_any_string_internal(haystack, needle, comparison_type)
459            }
460            _ => general_array_has_for_all_and_any(haystack, needle, comparison_type),
461        }
462    }
463}
464
465fn array_has_all_and_any_inner(
466    args: &[ArrayRef],
467    comparison_type: ComparisonType,
468) -> Result<ArrayRef> {
469    let haystack: ArrayWrapper = args[0].as_ref().try_into()?;
470    let needle: ArrayWrapper = args[1].as_ref().try_into()?;
471    array_has_all_and_any_dispatch(&haystack, &needle, comparison_type)
472}
473
474fn array_has_any_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
475    array_has_all_and_any_inner(args, ComparisonType::Any)
476}
477
478#[user_doc(
479    doc_section(label = "Array Functions"),
480    description = "Returns true if all elements of sub-array exist in array.",
481    syntax_example = "array_has_all(array, sub-array)",
482    sql_example = r#"```sql
483> select array_has_all([1, 2, 3, 4], [2, 3]);
484+--------------------------------------------+
485| array_has_all(List([1,2,3,4]), List([2,3])) |
486+--------------------------------------------+
487| true                                       |
488+--------------------------------------------+
489```"#,
490    argument(
491        name = "array",
492        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
493    ),
494    argument(
495        name = "sub-array",
496        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
497    )
498)]
499#[derive(Debug, PartialEq, Eq, Hash)]
500pub struct ArrayHasAll {
501    signature: Signature,
502    aliases: Vec<String>,
503}
504
505impl Default for ArrayHasAll {
506    fn default() -> Self {
507        Self::new()
508    }
509}
510
511impl ArrayHasAll {
512    pub fn new() -> Self {
513        Self {
514            signature: Signature::arrays(2, None, Volatility::Immutable),
515            aliases: vec![String::from("list_has_all")],
516        }
517    }
518}
519
520impl ScalarUDFImpl for ArrayHasAll {
521    fn as_any(&self) -> &dyn Any {
522        self
523    }
524    fn name(&self) -> &str {
525        "array_has_all"
526    }
527
528    fn signature(&self) -> &Signature {
529        &self.signature
530    }
531
532    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
533        Ok(DataType::Boolean)
534    }
535
536    fn invoke_with_args(
537        &self,
538        args: datafusion_expr::ScalarFunctionArgs,
539    ) -> Result<ColumnarValue> {
540        make_scalar_function(array_has_all_inner)(&args.args)
541    }
542
543    fn aliases(&self) -> &[String] {
544        &self.aliases
545    }
546
547    fn documentation(&self) -> Option<&Documentation> {
548        self.doc()
549    }
550}
551
552#[user_doc(
553    doc_section(label = "Array Functions"),
554    description = "Returns true if any elements exist in both arrays.",
555    syntax_example = "array_has_any(array, sub-array)",
556    sql_example = r#"```sql
557> select array_has_any([1, 2, 3], [3, 4]);
558+------------------------------------------+
559| array_has_any(List([1,2,3]), List([3,4])) |
560+------------------------------------------+
561| true                                     |
562+------------------------------------------+
563```"#,
564    argument(
565        name = "array",
566        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
567    ),
568    argument(
569        name = "sub-array",
570        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
571    )
572)]
573#[derive(Debug, PartialEq, Eq, Hash)]
574pub struct ArrayHasAny {
575    signature: Signature,
576    aliases: Vec<String>,
577}
578
579impl Default for ArrayHasAny {
580    fn default() -> Self {
581        Self::new()
582    }
583}
584
585impl ArrayHasAny {
586    pub fn new() -> Self {
587        Self {
588            signature: Signature::arrays(2, None, Volatility::Immutable),
589            aliases: vec![String::from("list_has_any"), String::from("arrays_overlap")],
590        }
591    }
592}
593
594impl ScalarUDFImpl for ArrayHasAny {
595    fn as_any(&self) -> &dyn Any {
596        self
597    }
598    fn name(&self) -> &str {
599        "array_has_any"
600    }
601
602    fn signature(&self) -> &Signature {
603        &self.signature
604    }
605
606    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
607        Ok(DataType::Boolean)
608    }
609
610    fn invoke_with_args(
611        &self,
612        args: datafusion_expr::ScalarFunctionArgs,
613    ) -> Result<ColumnarValue> {
614        make_scalar_function(array_has_any_inner)(&args.args)
615    }
616
617    fn aliases(&self) -> &[String] {
618        &self.aliases
619    }
620
621    fn documentation(&self) -> Option<&Documentation> {
622        self.doc()
623    }
624}
625
626/// Represents the type of comparison for array_has.
627#[derive(Debug, PartialEq, Clone, Copy)]
628enum ComparisonType {
629    // array_has_all
630    All,
631    // array_has_any
632    Any,
633}
634
635fn array_has_string_kernel(
636    haystack: Vec<Option<&str>>,
637    needle: Vec<Option<&str>>,
638    comparison_type: ComparisonType,
639) -> bool {
640    match comparison_type {
641        ComparisonType::All => needle
642            .iter()
643            .dedup()
644            .all(|x| haystack.iter().dedup().any(|y| y == x)),
645        ComparisonType::Any => needle
646            .iter()
647            .dedup()
648            .any(|x| haystack.iter().dedup().any(|y| y == x)),
649    }
650}
651
652fn general_array_has_all_and_any_kernel(
653    haystack_rows: Rows,
654    needle_rows: Rows,
655    comparison_type: ComparisonType,
656) -> bool {
657    match comparison_type {
658        ComparisonType::All => needle_rows.iter().all(|needle_row| {
659            haystack_rows
660                .iter()
661                .any(|haystack_row| haystack_row == needle_row)
662        }),
663        ComparisonType::Any => needle_rows.iter().any(|needle_row| {
664            haystack_rows
665                .iter()
666                .any(|haystack_row| haystack_row == needle_row)
667        }),
668    }
669}
670
671#[cfg(test)]
672mod tests {
673    use std::sync::Arc;
674
675    use arrow::datatypes::Int32Type;
676    use arrow::{
677        array::{create_array, Array, ArrayRef, AsArray, Int32Array, ListArray},
678        buffer::OffsetBuffer,
679        datatypes::{DataType, Field},
680    };
681    use datafusion_common::{
682        config::ConfigOptions, utils::SingleRowListArrayBuilder, DataFusionError,
683        ScalarValue,
684    };
685    use datafusion_expr::{
686        col, execution_props::ExecutionProps, lit, simplify::ExprSimplifyResult,
687        ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl,
688    };
689
690    use crate::expr_fn::make_array;
691
692    use super::ArrayHas;
693
694    #[test]
695    fn test_simplify_array_has_to_in_list() {
696        let haystack = lit(SingleRowListArrayBuilder::new(create_array!(
697            Int32,
698            [1, 2, 3]
699        ))
700        .build_list_scalar());
701        let needle = col("c");
702
703        let props = ExecutionProps::new();
704        let context = datafusion_expr::simplify::SimplifyContext::new(&props);
705
706        let Ok(ExprSimplifyResult::Simplified(Expr::InList(in_list))) =
707            ArrayHas::new().simplify(vec![haystack, needle.clone()], &context)
708        else {
709            panic!("Expected simplified expression");
710        };
711
712        assert_eq!(
713            in_list,
714            datafusion_expr::expr::InList {
715                expr: Box::new(needle),
716                list: vec![lit(1), lit(2), lit(3)],
717                negated: false,
718            }
719        );
720    }
721
722    #[test]
723    fn test_simplify_array_has_with_make_array_to_in_list() {
724        let haystack = make_array(vec![lit(1), lit(2), lit(3)]);
725        let needle = col("c");
726
727        let props = ExecutionProps::new();
728        let context = datafusion_expr::simplify::SimplifyContext::new(&props);
729
730        let Ok(ExprSimplifyResult::Simplified(Expr::InList(in_list))) =
731            ArrayHas::new().simplify(vec![haystack, needle.clone()], &context)
732        else {
733            panic!("Expected simplified expression");
734        };
735
736        assert_eq!(
737            in_list,
738            datafusion_expr::expr::InList {
739                expr: Box::new(needle),
740                list: vec![lit(1), lit(2), lit(3)],
741                negated: false,
742            }
743        );
744    }
745
746    #[test]
747    fn test_simplify_array_has_with_null_to_null() {
748        let haystack = Expr::Literal(ScalarValue::Null, None);
749        let needle = col("c");
750
751        let props = ExecutionProps::new();
752        let context = datafusion_expr::simplify::SimplifyContext::new(&props);
753        let Ok(ExprSimplifyResult::Simplified(simplified)) =
754            ArrayHas::new().simplify(vec![haystack, needle], &context)
755        else {
756            panic!("Expected simplified expression");
757        };
758
759        assert_eq!(simplified, Expr::Literal(ScalarValue::Boolean(None), None));
760    }
761
762    #[test]
763    fn test_simplify_array_has_with_null_list_to_null() {
764        let haystack =
765            ListArray::from_iter_primitive::<Int32Type, [Option<i32>; 0], _>([None]);
766        let haystack = Expr::Literal(ScalarValue::List(Arc::new(haystack)), None);
767        let needle = col("c");
768
769        let props = ExecutionProps::new();
770        let context = datafusion_expr::simplify::SimplifyContext::new(&props);
771        let Ok(ExprSimplifyResult::Simplified(simplified)) =
772            ArrayHas::new().simplify(vec![haystack, needle], &context)
773        else {
774            panic!("Expected simplified expression");
775        };
776
777        assert_eq!(simplified, Expr::Literal(ScalarValue::Boolean(None), None));
778    }
779
780    #[test]
781    fn test_array_has_complex_list_not_simplified() {
782        let haystack = col("c1");
783        let needle = col("c2");
784
785        let props = ExecutionProps::new();
786        let context = datafusion_expr::simplify::SimplifyContext::new(&props);
787
788        let Ok(ExprSimplifyResult::Original(args)) =
789            ArrayHas::new().simplify(vec![haystack, needle.clone()], &context)
790        else {
791            panic!("Expected simplified expression");
792        };
793
794        assert_eq!(args, vec![col("c1"), col("c2")],);
795    }
796
797    #[test]
798    fn test_array_has_list_empty_child() -> Result<(), DataFusionError> {
799        let haystack_field = Arc::new(Field::new_list(
800            "haystack",
801            Field::new_list("", Field::new("", DataType::Int32, true), true),
802            true,
803        ));
804
805        let needle_field = Arc::new(Field::new("needle", DataType::Int32, true));
806        let return_field = Arc::new(Field::new("return", DataType::Boolean, true));
807        let haystack = ListArray::new(
808            Field::new_list_field(DataType::Int32, true).into(),
809            OffsetBuffer::new(vec![0, 0].into()),
810            Arc::new(Int32Array::from(Vec::<i32>::new())) as ArrayRef,
811            Some(vec![false].into()),
812        );
813
814        let haystack = ColumnarValue::Array(Arc::new(haystack));
815        let needle = ColumnarValue::Scalar(ScalarValue::Int32(Some(1)));
816        let result = ArrayHas::new().invoke_with_args(ScalarFunctionArgs {
817            args: vec![haystack, needle],
818            arg_fields: vec![haystack_field, needle_field],
819            number_rows: 1,
820            return_field,
821            config_options: Arc::new(ConfigOptions::default()),
822        })?;
823
824        let output = result.into_array(1)?;
825        let output = output.as_boolean();
826        assert_eq!(output.len(), 1);
827        assert!(output.is_null(0));
828
829        Ok(())
830    }
831
832    #[test]
833    fn test_array_has_list_null_haystack() -> Result<(), DataFusionError> {
834        let haystack_field = Arc::new(Field::new("haystack", DataType::Null, true));
835        let needle_field = Arc::new(Field::new("needle", DataType::Int32, true));
836        let return_field = Arc::new(Field::new("return", DataType::Boolean, true));
837        let haystack =
838            ListArray::from_iter_primitive::<Int32Type, [Option<i32>; 0], _>([
839                None, None, None,
840            ]);
841
842        let haystack = ColumnarValue::Array(Arc::new(haystack));
843        let needle = ColumnarValue::Scalar(ScalarValue::Int32(Some(1)));
844        let result = ArrayHas::new().invoke_with_args(ScalarFunctionArgs {
845            args: vec![haystack, needle],
846            arg_fields: vec![haystack_field, needle_field],
847            number_rows: 1,
848            return_field,
849            config_options: Arc::new(ConfigOptions::default()),
850        })?;
851
852        let output = result.into_array(1)?;
853        let output = output.as_boolean();
854        assert_eq!(output.len(), 3);
855        for i in 0..3 {
856            assert!(output.is_null(i));
857        }
858
859        Ok(())
860    }
861}