datafusion_functions_nested/
array_has.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDFImpl`] definitions for array_has, array_has_all and array_has_any functions.
19
20use arrow::array::{Array, ArrayRef, BooleanArray, Datum, Scalar};
21use arrow::buffer::BooleanBuffer;
22use arrow::datatypes::DataType;
23use arrow::row::{RowConverter, Rows, SortField};
24use datafusion_common::cast::{as_fixed_size_list_array, as_generic_list_array};
25use datafusion_common::utils::string_utils::string_array_to_vec;
26use datafusion_common::utils::take_function_args;
27use datafusion_common::{DataFusionError, Result, ScalarValue, exec_err};
28use datafusion_expr::expr::ScalarFunction;
29use datafusion_expr::simplify::ExprSimplifyResult;
30use datafusion_expr::{
31    ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, in_list,
32};
33use datafusion_macros::user_doc;
34use datafusion_physical_expr_common::datum::compare_with_eq;
35use itertools::Itertools;
36
37use crate::make_array::make_array_udf;
38use crate::utils::make_scalar_function;
39
40use std::any::Any;
41use std::sync::Arc;
42
43// Create static instances of ScalarUDFs for each function
44make_udf_expr_and_func!(ArrayHas,
45    array_has,
46    haystack_array element, // arg names
47    "returns true, if the element appears in the first array, otherwise false.", // doc
48    array_has_udf // internal function name
49);
50make_udf_expr_and_func!(ArrayHasAll,
51    array_has_all,
52    haystack_array needle_array, // arg names
53    "returns true if each element of the second array appears in the first array; otherwise, it returns false.", // doc
54    array_has_all_udf // internal function name
55);
56make_udf_expr_and_func!(ArrayHasAny,
57    array_has_any,
58    haystack_array needle_array, // arg names
59    "returns true if at least one element of the second array appears in the first array; otherwise, it returns false.", // doc
60    array_has_any_udf // internal function name
61);
62
63#[user_doc(
64    doc_section(label = "Array Functions"),
65    description = "Returns true if the array contains the element.",
66    syntax_example = "array_has(array, element)",
67    sql_example = r#"```sql
68> select array_has([1, 2, 3], 2);
69+-----------------------------+
70| array_has(List([1,2,3]), 2) |
71+-----------------------------+
72| true                        |
73+-----------------------------+
74```"#,
75    argument(
76        name = "array",
77        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
78    ),
79    argument(
80        name = "element",
81        description = "Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators."
82    )
83)]
84#[derive(Debug, PartialEq, Eq, Hash)]
85pub struct ArrayHas {
86    signature: Signature,
87    aliases: Vec<String>,
88}
89
90impl Default for ArrayHas {
91    fn default() -> Self {
92        Self::new()
93    }
94}
95
96impl ArrayHas {
97    pub fn new() -> Self {
98        Self {
99            signature: Signature::array_and_element(Volatility::Immutable),
100            aliases: vec![
101                String::from("list_has"),
102                String::from("array_contains"),
103                String::from("list_contains"),
104            ],
105        }
106    }
107}
108
109impl ScalarUDFImpl for ArrayHas {
110    fn as_any(&self) -> &dyn Any {
111        self
112    }
113    fn name(&self) -> &str {
114        "array_has"
115    }
116
117    fn signature(&self) -> &Signature {
118        &self.signature
119    }
120
121    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
122        Ok(DataType::Boolean)
123    }
124
125    fn simplify(
126        &self,
127        mut args: Vec<Expr>,
128        _info: &dyn datafusion_expr::simplify::SimplifyInfo,
129    ) -> Result<ExprSimplifyResult> {
130        let [haystack, needle] = take_function_args(self.name(), &mut args)?;
131
132        // if the haystack is a constant list, we can use an inlist expression which is more
133        // efficient because the haystack is not varying per-row
134        match haystack {
135            Expr::Literal(scalar, _) if scalar.is_null() => {
136                return Ok(ExprSimplifyResult::Simplified(Expr::Literal(
137                    ScalarValue::Boolean(None),
138                    None,
139                )));
140            }
141            Expr::Literal(
142                // FixedSizeList gets coerced to List
143                scalar @ ScalarValue::List(_) | scalar @ ScalarValue::LargeList(_),
144                _,
145            ) => {
146                if let Ok(scalar_values) =
147                    ScalarValue::convert_array_to_scalar_vec(&scalar.to_array()?)
148                {
149                    assert_eq!(scalar_values.len(), 1);
150                    let list = scalar_values
151                        .into_iter()
152                        .flatten()
153                        .flatten()
154                        .map(|v| Expr::Literal(v, None))
155                        .collect();
156
157                    return Ok(ExprSimplifyResult::Simplified(in_list(
158                        std::mem::take(needle),
159                        list,
160                        false,
161                    )));
162                }
163            }
164            Expr::ScalarFunction(ScalarFunction { func, args })
165                if func == &make_array_udf() =>
166            {
167                // make_array has a static set of arguments, so we can pull the arguments out from it
168                return Ok(ExprSimplifyResult::Simplified(in_list(
169                    std::mem::take(needle),
170                    std::mem::take(args),
171                    false,
172                )));
173            }
174            _ => {}
175        };
176        Ok(ExprSimplifyResult::Original(args))
177    }
178
179    fn invoke_with_args(
180        &self,
181        args: datafusion_expr::ScalarFunctionArgs,
182    ) -> Result<ColumnarValue> {
183        let [first_arg, second_arg] = take_function_args(self.name(), &args.args)?;
184        if first_arg.data_type().is_null() {
185            // Always return null if the first argument is null
186            // i.e. array_has(null, element) -> null
187            return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
188        }
189
190        match &second_arg {
191            ColumnarValue::Array(array_needle) => {
192                // the needle is already an array, convert the haystack to an array of the same length
193                let haystack = first_arg.to_array(array_needle.len())?;
194                let array = array_has_inner_for_array(&haystack, array_needle)?;
195                Ok(ColumnarValue::Array(array))
196            }
197            ColumnarValue::Scalar(scalar_needle) => {
198                // Always return null if the second argument is null
199                // i.e. array_has(array, null) -> null
200                if scalar_needle.is_null() {
201                    return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
202                }
203
204                // since the needle is a scalar, convert it to an array of size 1
205                let haystack = first_arg.to_array(1)?;
206                let needle = scalar_needle.to_array_of_size(1)?;
207                let needle = Scalar::new(needle);
208                let array = array_has_inner_for_scalar(&haystack, &needle)?;
209                if let ColumnarValue::Scalar(_) = &first_arg {
210                    // If both inputs are scalar, keeps output as scalar
211                    let scalar_value = ScalarValue::try_from_array(&array, 0)?;
212                    Ok(ColumnarValue::Scalar(scalar_value))
213                } else {
214                    Ok(ColumnarValue::Array(array))
215                }
216            }
217        }
218    }
219
220    fn aliases(&self) -> &[String] {
221        &self.aliases
222    }
223
224    fn documentation(&self) -> Option<&Documentation> {
225        self.doc()
226    }
227}
228
229fn array_has_inner_for_scalar(
230    haystack: &ArrayRef,
231    needle: &dyn Datum,
232) -> Result<ArrayRef> {
233    let haystack = haystack.as_ref().try_into()?;
234    array_has_dispatch_for_scalar(haystack, needle)
235}
236
237fn array_has_inner_for_array(haystack: &ArrayRef, needle: &ArrayRef) -> Result<ArrayRef> {
238    let haystack = haystack.as_ref().try_into()?;
239    array_has_dispatch_for_array(haystack, needle)
240}
241
242#[derive(Copy, Clone)]
243enum ArrayWrapper<'a> {
244    FixedSizeList(&'a arrow::array::FixedSizeListArray),
245    List(&'a arrow::array::GenericListArray<i32>),
246    LargeList(&'a arrow::array::GenericListArray<i64>),
247}
248
249impl<'a> TryFrom<&'a dyn Array> for ArrayWrapper<'a> {
250    type Error = DataFusionError;
251
252    fn try_from(
253        value: &'a dyn Array,
254    ) -> std::result::Result<ArrayWrapper<'a>, Self::Error> {
255        match value.data_type() {
256            DataType::List(_) => {
257                Ok(ArrayWrapper::List(as_generic_list_array::<i32>(value)?))
258            }
259            DataType::LargeList(_) => Ok(ArrayWrapper::LargeList(
260                as_generic_list_array::<i64>(value)?,
261            )),
262            DataType::FixedSizeList(_, _) => Ok(ArrayWrapper::FixedSizeList(
263                as_fixed_size_list_array(value)?,
264            )),
265            _ => exec_err!("array_has does not support type '{:?}'.", value.data_type()),
266        }
267    }
268}
269
270impl<'a> ArrayWrapper<'a> {
271    fn len(&self) -> usize {
272        match self {
273            ArrayWrapper::FixedSizeList(arr) => arr.len(),
274            ArrayWrapper::List(arr) => arr.len(),
275            ArrayWrapper::LargeList(arr) => arr.len(),
276        }
277    }
278
279    fn iter(&self) -> Box<dyn Iterator<Item = Option<ArrayRef>> + 'a> {
280        match self {
281            ArrayWrapper::FixedSizeList(arr) => Box::new(arr.iter()),
282            ArrayWrapper::List(arr) => Box::new(arr.iter()),
283            ArrayWrapper::LargeList(arr) => Box::new(arr.iter()),
284        }
285    }
286
287    fn values(&self) -> &ArrayRef {
288        match self {
289            ArrayWrapper::FixedSizeList(arr) => arr.values(),
290            ArrayWrapper::List(arr) => arr.values(),
291            ArrayWrapper::LargeList(arr) => arr.values(),
292        }
293    }
294
295    fn value_type(&self) -> DataType {
296        match self {
297            ArrayWrapper::FixedSizeList(arr) => arr.value_type(),
298            ArrayWrapper::List(arr) => arr.value_type(),
299            ArrayWrapper::LargeList(arr) => arr.value_type(),
300        }
301    }
302
303    fn offsets(&self) -> Box<dyn Iterator<Item = usize> + 'a> {
304        match self {
305            ArrayWrapper::FixedSizeList(arr) => {
306                let offsets = (0..=arr.len())
307                    .step_by(arr.value_length() as usize)
308                    .collect::<Vec<_>>();
309                Box::new(offsets.into_iter())
310            }
311            ArrayWrapper::List(arr) => {
312                Box::new(arr.offsets().iter().map(|o| (*o) as usize))
313            }
314            ArrayWrapper::LargeList(arr) => {
315                Box::new(arr.offsets().iter().map(|o| (*o) as usize))
316            }
317        }
318    }
319}
320
321fn array_has_dispatch_for_array<'a>(
322    haystack: ArrayWrapper<'a>,
323    needle: &ArrayRef,
324) -> Result<ArrayRef> {
325    let mut boolean_builder = BooleanArray::builder(haystack.len());
326    for (i, arr) in haystack.iter().enumerate() {
327        if arr.is_none() || needle.is_null(i) {
328            boolean_builder.append_null();
329            continue;
330        }
331        let arr = arr.unwrap();
332        let is_nested = arr.data_type().is_nested();
333        let needle_row = Scalar::new(needle.slice(i, 1));
334        let eq_array = compare_with_eq(&arr, &needle_row, is_nested)?;
335        boolean_builder.append_value(eq_array.true_count() > 0);
336    }
337
338    Ok(Arc::new(boolean_builder.finish()))
339}
340
341fn array_has_dispatch_for_scalar(
342    haystack: ArrayWrapper<'_>,
343    needle: &dyn Datum,
344) -> Result<ArrayRef> {
345    let values = haystack.values();
346    let is_nested = values.data_type().is_nested();
347    // If first argument is empty list (second argument is non-null), return false
348    // i.e. array_has([], non-null element) -> false
349    if haystack.len() == 0 {
350        return Ok(Arc::new(BooleanArray::new(
351            BooleanBuffer::new_unset(haystack.len()),
352            None,
353        )));
354    }
355    let eq_array = compare_with_eq(values, needle, is_nested)?;
356    let mut final_contained = vec![None; haystack.len()];
357
358    // Check validity buffer to distinguish between null and empty arrays
359    let validity = match &haystack {
360        ArrayWrapper::FixedSizeList(arr) => arr.nulls(),
361        ArrayWrapper::List(arr) => arr.nulls(),
362        ArrayWrapper::LargeList(arr) => arr.nulls(),
363    };
364
365    for (i, (start, end)) in haystack.offsets().tuple_windows().enumerate() {
366        let length = end - start;
367
368        // Check if the array at this position is null
369        if let Some(validity_buffer) = validity
370            && !validity_buffer.is_valid(i)
371        {
372            final_contained[i] = None; // null array -> null result
373            continue;
374        }
375
376        // For non-null arrays: length is 0 for empty arrays
377        if length == 0 {
378            final_contained[i] = Some(false); // empty array -> false
379        } else {
380            let sliced_array = eq_array.slice(start, length);
381            final_contained[i] = Some(sliced_array.true_count() > 0);
382        }
383    }
384
385    Ok(Arc::new(BooleanArray::from(final_contained)))
386}
387
388fn array_has_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
389    array_has_all_and_any_inner(args, ComparisonType::All)
390}
391
392// General row comparison for array_has_all and array_has_any
393fn general_array_has_for_all_and_any<'a>(
394    haystack: ArrayWrapper<'a>,
395    needle: ArrayWrapper<'a>,
396    comparison_type: ComparisonType,
397) -> Result<ArrayRef> {
398    let mut boolean_builder = BooleanArray::builder(haystack.len());
399    let converter = RowConverter::new(vec![SortField::new(haystack.value_type())])?;
400
401    for (arr, sub_arr) in haystack.iter().zip(needle.iter()) {
402        if let (Some(arr), Some(sub_arr)) = (arr, sub_arr) {
403            let arr_values = converter.convert_columns(&[arr])?;
404            let sub_arr_values = converter.convert_columns(&[sub_arr])?;
405            boolean_builder.append_value(general_array_has_all_and_any_kernel(
406                &arr_values,
407                &sub_arr_values,
408                comparison_type,
409            ));
410        } else {
411            boolean_builder.append_null();
412        }
413    }
414
415    Ok(Arc::new(boolean_builder.finish()))
416}
417
418// String comparison for array_has_all and array_has_any
419fn array_has_all_and_any_string_internal<'a>(
420    haystack: ArrayWrapper<'a>,
421    needle: ArrayWrapper<'a>,
422    comparison_type: ComparisonType,
423) -> Result<ArrayRef> {
424    let mut boolean_builder = BooleanArray::builder(haystack.len());
425    for (arr, sub_arr) in haystack.iter().zip(needle.iter()) {
426        match (arr, sub_arr) {
427            (Some(arr), Some(sub_arr)) => {
428                let haystack_array = string_array_to_vec(&arr);
429                let needle_array = string_array_to_vec(&sub_arr);
430                boolean_builder.append_value(array_has_string_kernel(
431                    &haystack_array,
432                    &needle_array,
433                    comparison_type,
434                ));
435            }
436            (_, _) => {
437                boolean_builder.append_null();
438            }
439        }
440    }
441
442    Ok(Arc::new(boolean_builder.finish()))
443}
444
445fn array_has_all_and_any_dispatch<'a>(
446    haystack: ArrayWrapper<'a>,
447    needle: ArrayWrapper<'a>,
448    comparison_type: ComparisonType,
449) -> Result<ArrayRef> {
450    if needle.values().is_empty() {
451        let buffer = match comparison_type {
452            ComparisonType::All => BooleanBuffer::new_set(haystack.len()),
453            ComparisonType::Any => BooleanBuffer::new_unset(haystack.len()),
454        };
455        Ok(Arc::new(BooleanArray::from(buffer)))
456    } else {
457        match needle.value_type() {
458            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
459                array_has_all_and_any_string_internal(haystack, needle, comparison_type)
460            }
461            _ => general_array_has_for_all_and_any(haystack, needle, comparison_type),
462        }
463    }
464}
465
466fn array_has_all_and_any_inner(
467    args: &[ArrayRef],
468    comparison_type: ComparisonType,
469) -> Result<ArrayRef> {
470    let haystack: ArrayWrapper = args[0].as_ref().try_into()?;
471    let needle: ArrayWrapper = args[1].as_ref().try_into()?;
472    array_has_all_and_any_dispatch(haystack, needle, comparison_type)
473}
474
475fn array_has_any_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
476    array_has_all_and_any_inner(args, ComparisonType::Any)
477}
478
479#[user_doc(
480    doc_section(label = "Array Functions"),
481    description = "Returns true if all elements of sub-array exist in array.",
482    syntax_example = "array_has_all(array, sub-array)",
483    sql_example = r#"```sql
484> select array_has_all([1, 2, 3, 4], [2, 3]);
485+--------------------------------------------+
486| array_has_all(List([1,2,3,4]), List([2,3])) |
487+--------------------------------------------+
488| true                                       |
489+--------------------------------------------+
490```"#,
491    argument(
492        name = "array",
493        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
494    ),
495    argument(
496        name = "sub-array",
497        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
498    )
499)]
500#[derive(Debug, PartialEq, Eq, Hash)]
501pub struct ArrayHasAll {
502    signature: Signature,
503    aliases: Vec<String>,
504}
505
506impl Default for ArrayHasAll {
507    fn default() -> Self {
508        Self::new()
509    }
510}
511
512impl ArrayHasAll {
513    pub fn new() -> Self {
514        Self {
515            signature: Signature::arrays(2, None, Volatility::Immutable),
516            aliases: vec![String::from("list_has_all")],
517        }
518    }
519}
520
521impl ScalarUDFImpl for ArrayHasAll {
522    fn as_any(&self) -> &dyn Any {
523        self
524    }
525    fn name(&self) -> &str {
526        "array_has_all"
527    }
528
529    fn signature(&self) -> &Signature {
530        &self.signature
531    }
532
533    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
534        Ok(DataType::Boolean)
535    }
536
537    fn invoke_with_args(
538        &self,
539        args: datafusion_expr::ScalarFunctionArgs,
540    ) -> Result<ColumnarValue> {
541        make_scalar_function(array_has_all_inner)(&args.args)
542    }
543
544    fn aliases(&self) -> &[String] {
545        &self.aliases
546    }
547
548    fn documentation(&self) -> Option<&Documentation> {
549        self.doc()
550    }
551}
552
553#[user_doc(
554    doc_section(label = "Array Functions"),
555    description = "Returns true if any elements exist in both arrays.",
556    syntax_example = "array_has_any(array, sub-array)",
557    sql_example = r#"```sql
558> select array_has_any([1, 2, 3], [3, 4]);
559+------------------------------------------+
560| array_has_any(List([1,2,3]), List([3,4])) |
561+------------------------------------------+
562| true                                     |
563+------------------------------------------+
564```"#,
565    argument(
566        name = "array",
567        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
568    ),
569    argument(
570        name = "sub-array",
571        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
572    )
573)]
574#[derive(Debug, PartialEq, Eq, Hash)]
575pub struct ArrayHasAny {
576    signature: Signature,
577    aliases: Vec<String>,
578}
579
580impl Default for ArrayHasAny {
581    fn default() -> Self {
582        Self::new()
583    }
584}
585
586impl ArrayHasAny {
587    pub fn new() -> Self {
588        Self {
589            signature: Signature::arrays(2, None, Volatility::Immutable),
590            aliases: vec![String::from("list_has_any"), String::from("arrays_overlap")],
591        }
592    }
593}
594
595impl ScalarUDFImpl for ArrayHasAny {
596    fn as_any(&self) -> &dyn Any {
597        self
598    }
599    fn name(&self) -> &str {
600        "array_has_any"
601    }
602
603    fn signature(&self) -> &Signature {
604        &self.signature
605    }
606
607    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
608        Ok(DataType::Boolean)
609    }
610
611    fn invoke_with_args(
612        &self,
613        args: datafusion_expr::ScalarFunctionArgs,
614    ) -> Result<ColumnarValue> {
615        make_scalar_function(array_has_any_inner)(&args.args)
616    }
617
618    fn aliases(&self) -> &[String] {
619        &self.aliases
620    }
621
622    fn documentation(&self) -> Option<&Documentation> {
623        self.doc()
624    }
625}
626
627/// Represents the type of comparison for array_has.
628#[derive(Debug, PartialEq, Clone, Copy)]
629enum ComparisonType {
630    // array_has_all
631    All,
632    // array_has_any
633    Any,
634}
635
636fn array_has_string_kernel(
637    haystack: &[Option<&str>],
638    needle: &[Option<&str>],
639    comparison_type: ComparisonType,
640) -> bool {
641    match comparison_type {
642        ComparisonType::All => needle
643            .iter()
644            .dedup()
645            .all(|x| haystack.iter().dedup().any(|y| y == x)),
646        ComparisonType::Any => needle
647            .iter()
648            .dedup()
649            .any(|x| haystack.iter().dedup().any(|y| y == x)),
650    }
651}
652
653fn general_array_has_all_and_any_kernel(
654    haystack_rows: &Rows,
655    needle_rows: &Rows,
656    comparison_type: ComparisonType,
657) -> bool {
658    match comparison_type {
659        ComparisonType::All => needle_rows.iter().all(|needle_row| {
660            haystack_rows
661                .iter()
662                .any(|haystack_row| haystack_row == needle_row)
663        }),
664        ComparisonType::Any => needle_rows.iter().any(|needle_row| {
665            haystack_rows
666                .iter()
667                .any(|haystack_row| haystack_row == needle_row)
668        }),
669    }
670}
671
672#[cfg(test)]
673mod tests {
674    use std::sync::Arc;
675
676    use arrow::datatypes::Int32Type;
677    use arrow::{
678        array::{Array, ArrayRef, AsArray, Int32Array, ListArray, create_array},
679        buffer::OffsetBuffer,
680        datatypes::{DataType, Field},
681    };
682    use datafusion_common::{
683        DataFusionError, ScalarValue, config::ConfigOptions,
684        utils::SingleRowListArrayBuilder,
685    };
686    use datafusion_expr::{
687        ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl, col,
688        execution_props::ExecutionProps, lit, simplify::ExprSimplifyResult,
689    };
690
691    use crate::expr_fn::make_array;
692
693    use super::ArrayHas;
694
695    #[test]
696    fn test_simplify_array_has_to_in_list() {
697        let haystack = lit(SingleRowListArrayBuilder::new(create_array!(
698            Int32,
699            [1, 2, 3]
700        ))
701        .build_list_scalar());
702        let needle = col("c");
703
704        let props = ExecutionProps::new();
705        let context = datafusion_expr::simplify::SimplifyContext::new(&props);
706
707        let Ok(ExprSimplifyResult::Simplified(Expr::InList(in_list))) =
708            ArrayHas::new().simplify(vec![haystack, needle.clone()], &context)
709        else {
710            panic!("Expected simplified expression");
711        };
712
713        assert_eq!(
714            in_list,
715            datafusion_expr::expr::InList {
716                expr: Box::new(needle),
717                list: vec![lit(1), lit(2), lit(3)],
718                negated: false,
719            }
720        );
721    }
722
723    #[test]
724    fn test_simplify_array_has_with_make_array_to_in_list() {
725        let haystack = make_array(vec![lit(1), lit(2), lit(3)]);
726        let needle = col("c");
727
728        let props = ExecutionProps::new();
729        let context = datafusion_expr::simplify::SimplifyContext::new(&props);
730
731        let Ok(ExprSimplifyResult::Simplified(Expr::InList(in_list))) =
732            ArrayHas::new().simplify(vec![haystack, needle.clone()], &context)
733        else {
734            panic!("Expected simplified expression");
735        };
736
737        assert_eq!(
738            in_list,
739            datafusion_expr::expr::InList {
740                expr: Box::new(needle),
741                list: vec![lit(1), lit(2), lit(3)],
742                negated: false,
743            }
744        );
745    }
746
747    #[test]
748    fn test_simplify_array_has_with_null_to_null() {
749        let haystack = Expr::Literal(ScalarValue::Null, None);
750        let needle = col("c");
751
752        let props = ExecutionProps::new();
753        let context = datafusion_expr::simplify::SimplifyContext::new(&props);
754        let Ok(ExprSimplifyResult::Simplified(simplified)) =
755            ArrayHas::new().simplify(vec![haystack, needle], &context)
756        else {
757            panic!("Expected simplified expression");
758        };
759
760        assert_eq!(simplified, Expr::Literal(ScalarValue::Boolean(None), None));
761    }
762
763    #[test]
764    fn test_simplify_array_has_with_null_list_to_null() {
765        let haystack =
766            ListArray::from_iter_primitive::<Int32Type, [Option<i32>; 0], _>([None]);
767        let haystack = Expr::Literal(ScalarValue::List(Arc::new(haystack)), None);
768        let needle = col("c");
769
770        let props = ExecutionProps::new();
771        let context = datafusion_expr::simplify::SimplifyContext::new(&props);
772        let Ok(ExprSimplifyResult::Simplified(simplified)) =
773            ArrayHas::new().simplify(vec![haystack, needle], &context)
774        else {
775            panic!("Expected simplified expression");
776        };
777
778        assert_eq!(simplified, Expr::Literal(ScalarValue::Boolean(None), None));
779    }
780
781    #[test]
782    fn test_array_has_complex_list_not_simplified() {
783        let haystack = col("c1");
784        let needle = col("c2");
785
786        let props = ExecutionProps::new();
787        let context = datafusion_expr::simplify::SimplifyContext::new(&props);
788
789        let Ok(ExprSimplifyResult::Original(args)) =
790            ArrayHas::new().simplify(vec![haystack, needle.clone()], &context)
791        else {
792            panic!("Expected simplified expression");
793        };
794
795        assert_eq!(args, vec![col("c1"), col("c2")],);
796    }
797
798    #[test]
799    fn test_array_has_list_empty_child() -> Result<(), DataFusionError> {
800        let haystack_field = Arc::new(Field::new_list(
801            "haystack",
802            Field::new_list("", Field::new("", DataType::Int32, true), true),
803            true,
804        ));
805
806        let needle_field = Arc::new(Field::new("needle", DataType::Int32, true));
807        let return_field = Arc::new(Field::new("return", DataType::Boolean, true));
808        let haystack = ListArray::new(
809            Field::new_list_field(DataType::Int32, true).into(),
810            OffsetBuffer::new(vec![0, 0].into()),
811            Arc::new(Int32Array::from(Vec::<i32>::new())) as ArrayRef,
812            Some(vec![false].into()),
813        );
814
815        let haystack = ColumnarValue::Array(Arc::new(haystack));
816        let needle = ColumnarValue::Scalar(ScalarValue::Int32(Some(1)));
817        let result = ArrayHas::new().invoke_with_args(ScalarFunctionArgs {
818            args: vec![haystack, needle],
819            arg_fields: vec![haystack_field, needle_field],
820            number_rows: 1,
821            return_field,
822            config_options: Arc::new(ConfigOptions::default()),
823        })?;
824
825        let output = result.into_array(1)?;
826        let output = output.as_boolean();
827        assert_eq!(output.len(), 1);
828        assert!(output.is_null(0));
829
830        Ok(())
831    }
832
833    #[test]
834    fn test_array_has_list_null_haystack() -> Result<(), DataFusionError> {
835        let haystack_field = Arc::new(Field::new("haystack", DataType::Null, true));
836        let needle_field = Arc::new(Field::new("needle", DataType::Int32, true));
837        let return_field = Arc::new(Field::new("return", DataType::Boolean, true));
838        let haystack =
839            ListArray::from_iter_primitive::<Int32Type, [Option<i32>; 0], _>([
840                None, None, None,
841            ]);
842
843        let haystack = ColumnarValue::Array(Arc::new(haystack));
844        let needle = ColumnarValue::Scalar(ScalarValue::Int32(Some(1)));
845        let result = ArrayHas::new().invoke_with_args(ScalarFunctionArgs {
846            args: vec![haystack, needle],
847            arg_fields: vec![haystack_field, needle_field],
848            number_rows: 1,
849            return_field,
850            config_options: Arc::new(ConfigOptions::default()),
851        })?;
852
853        let output = result.into_array(1)?;
854        let output = output.as_boolean();
855        assert_eq!(output.len(), 3);
856        for i in 0..3 {
857            assert!(output.is_null(i));
858        }
859
860        Ok(())
861    }
862}