datafusion_physical_expr/expressions/
in_list.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Implementation of `InList` expressions: [`InListExpr`]
19
20use std::any::Any;
21use std::fmt::Debug;
22use std::hash::{Hash, Hasher};
23use std::sync::Arc;
24
25use crate::PhysicalExpr;
26use crate::physical_expr::physical_exprs_bag_equal;
27
28use arrow::array::*;
29use arrow::buffer::{BooleanBuffer, NullBuffer};
30use arrow::compute::kernels::boolean::{not, or_kleene};
31use arrow::compute::{SortOptions, take};
32use arrow::datatypes::*;
33use arrow::util::bit_iterator::BitIndexIterator;
34use datafusion_common::hash_utils::with_hashes;
35use datafusion_common::{
36    DFSchema, HashSet, Result, ScalarValue, assert_or_internal_err, exec_datafusion_err,
37    exec_err,
38};
39use datafusion_expr::{ColumnarValue, expr_vec_fmt};
40
41use ahash::RandomState;
42use datafusion_common::HashMap;
43use hashbrown::hash_map::RawEntryMut;
44
45/// Trait for InList static filters
46trait StaticFilter {
47    fn null_count(&self) -> usize;
48
49    /// Checks if values in `v` are contained in the filter
50    fn contains(&self, v: &dyn Array, negated: bool) -> Result<BooleanArray>;
51}
52
53/// InList
54pub struct InListExpr {
55    expr: Arc<dyn PhysicalExpr>,
56    list: Vec<Arc<dyn PhysicalExpr>>,
57    negated: bool,
58    static_filter: Option<Arc<dyn StaticFilter + Send + Sync>>,
59}
60
61impl Debug for InListExpr {
62    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
63        f.debug_struct("InListExpr")
64            .field("expr", &self.expr)
65            .field("list", &self.list)
66            .field("negated", &self.negated)
67            .finish()
68    }
69}
70
71/// Static filter for InList that stores the array and hash set for O(1) lookups
72#[derive(Debug, Clone)]
73struct ArrayStaticFilter {
74    in_array: ArrayRef,
75    state: RandomState,
76    /// Used to provide a lookup from value to in list index
77    ///
78    /// Note: usize::hash is not used, instead the raw entry
79    /// API is used to store entries w.r.t their value
80    map: HashMap<usize, (), ()>,
81}
82
83impl StaticFilter for ArrayStaticFilter {
84    fn null_count(&self) -> usize {
85        self.in_array.null_count()
86    }
87
88    /// Checks if values in `v` are contained in the `in_array` using this hash set for lookup.
89    fn contains(&self, v: &dyn Array, negated: bool) -> Result<BooleanArray> {
90        // Null type comparisons always return null (SQL three-valued logic)
91        if v.data_type() == &DataType::Null
92            || self.in_array.data_type() == &DataType::Null
93        {
94            let nulls = NullBuffer::new_null(v.len());
95            return Ok(BooleanArray::new(
96                BooleanBuffer::new_unset(v.len()),
97                Some(nulls),
98            ));
99        }
100
101        downcast_dictionary_array! {
102            v => {
103                let values_contains = self.contains(v.values().as_ref(), negated)?;
104                let result = take(&values_contains, v.keys(), None)?;
105                return Ok(downcast_array(result.as_ref()))
106            }
107            _ => {}
108        }
109
110        let needle_nulls = v.logical_nulls();
111        let needle_nulls = needle_nulls.as_ref();
112        let haystack_has_nulls = self.in_array.null_count() != 0;
113
114        with_hashes([v], &self.state, |hashes| {
115            let cmp = make_comparator(v, &self.in_array, SortOptions::default())?;
116            Ok((0..v.len())
117                .map(|i| {
118                    // SQL three-valued logic: null IN (...) is always null
119                    if needle_nulls.is_some_and(|nulls| nulls.is_null(i)) {
120                        return None;
121                    }
122
123                    let hash = hashes[i];
124                    let contains = self
125                        .map
126                        .raw_entry()
127                        .from_hash(hash, |idx| cmp(i, *idx).is_eq())
128                        .is_some();
129
130                    match contains {
131                        true => Some(!negated),
132                        false if haystack_has_nulls => None,
133                        false => Some(negated),
134                    }
135                })
136                .collect())
137        })
138    }
139}
140
141fn instantiate_static_filter(
142    in_array: ArrayRef,
143) -> Result<Arc<dyn StaticFilter + Send + Sync>> {
144    match in_array.data_type() {
145        // Integer primitive types
146        DataType::Int8 => Ok(Arc::new(Int8StaticFilter::try_new(&in_array)?)),
147        DataType::Int16 => Ok(Arc::new(Int16StaticFilter::try_new(&in_array)?)),
148        DataType::Int32 => Ok(Arc::new(Int32StaticFilter::try_new(&in_array)?)),
149        DataType::Int64 => Ok(Arc::new(Int64StaticFilter::try_new(&in_array)?)),
150        DataType::UInt8 => Ok(Arc::new(UInt8StaticFilter::try_new(&in_array)?)),
151        DataType::UInt16 => Ok(Arc::new(UInt16StaticFilter::try_new(&in_array)?)),
152        DataType::UInt32 => Ok(Arc::new(UInt32StaticFilter::try_new(&in_array)?)),
153        DataType::UInt64 => Ok(Arc::new(UInt64StaticFilter::try_new(&in_array)?)),
154        // Float primitive types (use ordered wrappers for Hash/Eq)
155        DataType::Float32 => Ok(Arc::new(Float32StaticFilter::try_new(&in_array)?)),
156        DataType::Float64 => Ok(Arc::new(Float64StaticFilter::try_new(&in_array)?)),
157        _ => {
158            /* fall through to generic implementation for unsupported types (Struct, etc.) */
159            Ok(Arc::new(ArrayStaticFilter::try_new(in_array)?))
160        }
161    }
162}
163
164impl ArrayStaticFilter {
165    /// Computes a [`StaticFilter`] for the provided [`Array`] if there
166    /// are nulls present or there are more than the configured number of
167    /// elements.
168    ///
169    /// Note: This is split into a separate function as higher-rank trait bounds currently
170    /// cause type inference to misbehave
171    fn try_new(in_array: ArrayRef) -> Result<ArrayStaticFilter> {
172        // Null type has no natural order - return empty hash set
173        if in_array.data_type() == &DataType::Null {
174            return Ok(ArrayStaticFilter {
175                in_array,
176                state: RandomState::new(),
177                map: HashMap::with_hasher(()),
178            });
179        }
180
181        let state = RandomState::new();
182        let mut map: HashMap<usize, (), ()> = HashMap::with_hasher(());
183
184        with_hashes([&in_array], &state, |hashes| -> Result<()> {
185            let cmp = make_comparator(&in_array, &in_array, SortOptions::default())?;
186
187            let insert_value = |idx| {
188                let hash = hashes[idx];
189                if let RawEntryMut::Vacant(v) = map
190                    .raw_entry_mut()
191                    .from_hash(hash, |x| cmp(*x, idx).is_eq())
192                {
193                    v.insert_with_hasher(hash, idx, (), |x| hashes[*x]);
194                }
195            };
196
197            match in_array.nulls() {
198                Some(nulls) => {
199                    BitIndexIterator::new(nulls.validity(), nulls.offset(), nulls.len())
200                        .for_each(insert_value)
201                }
202                None => (0..in_array.len()).for_each(insert_value),
203            }
204
205            Ok(())
206        })?;
207
208        Ok(Self {
209            in_array,
210            state,
211            map,
212        })
213    }
214}
215
216/// Wrapper for f32 that implements Hash and Eq using bit comparison.
217/// This treats NaN values as equal to each other when they have the same bit pattern.
218#[derive(Clone, Copy)]
219struct OrderedFloat32(f32);
220
221impl Hash for OrderedFloat32 {
222    fn hash<H: Hasher>(&self, state: &mut H) {
223        self.0.to_ne_bytes().hash(state);
224    }
225}
226
227impl PartialEq for OrderedFloat32 {
228    fn eq(&self, other: &Self) -> bool {
229        self.0.to_bits() == other.0.to_bits()
230    }
231}
232
233impl Eq for OrderedFloat32 {}
234
235impl From<f32> for OrderedFloat32 {
236    fn from(v: f32) -> Self {
237        Self(v)
238    }
239}
240
241/// Wrapper for f64 that implements Hash and Eq using bit comparison.
242/// This treats NaN values as equal to each other when they have the same bit pattern.
243#[derive(Clone, Copy)]
244struct OrderedFloat64(f64);
245
246impl Hash for OrderedFloat64 {
247    fn hash<H: Hasher>(&self, state: &mut H) {
248        self.0.to_ne_bytes().hash(state);
249    }
250}
251
252impl PartialEq for OrderedFloat64 {
253    fn eq(&self, other: &Self) -> bool {
254        self.0.to_bits() == other.0.to_bits()
255    }
256}
257
258impl Eq for OrderedFloat64 {}
259
260impl From<f64> for OrderedFloat64 {
261    fn from(v: f64) -> Self {
262        Self(v)
263    }
264}
265
266// Macro to generate specialized StaticFilter implementations for primitive types
267macro_rules! primitive_static_filter {
268    ($Name:ident, $ArrowType:ty) => {
269        struct $Name {
270            null_count: usize,
271            values: HashSet<<$ArrowType as ArrowPrimitiveType>::Native>,
272        }
273
274        impl $Name {
275            fn try_new(in_array: &ArrayRef) -> Result<Self> {
276                let in_array = in_array
277                    .as_primitive_opt::<$ArrowType>()
278                    .ok_or_else(|| exec_datafusion_err!("Failed to downcast an array to a '{}' array", stringify!($ArrowType)))?;
279
280                let mut values = HashSet::with_capacity(in_array.len());
281                let null_count = in_array.null_count();
282
283                for v in in_array.iter().flatten() {
284                    values.insert(v);
285                }
286
287                Ok(Self { null_count, values })
288            }
289        }
290
291        impl StaticFilter for $Name {
292            fn null_count(&self) -> usize {
293                self.null_count
294            }
295
296            fn contains(&self, v: &dyn Array, negated: bool) -> Result<BooleanArray> {
297                // Handle dictionary arrays by recursing on the values
298                downcast_dictionary_array! {
299                    v => {
300                        let values_contains = self.contains(v.values().as_ref(), negated)?;
301                        let result = take(&values_contains, v.keys(), None)?;
302                        return Ok(downcast_array(result.as_ref()))
303                    }
304                    _ => {}
305                }
306
307                let v = v
308                    .as_primitive_opt::<$ArrowType>()
309                    .ok_or_else(|| exec_datafusion_err!("Failed to downcast an array to a '{}' array", stringify!($ArrowType)))?;
310
311                let haystack_has_nulls = self.null_count > 0;
312
313                let needle_values = v.values();
314                let needle_nulls = v.nulls();
315                let needle_has_nulls = v.null_count() > 0;
316
317                // Truth table for `value [NOT] IN (set)` with SQL three-valued logic:
318                // ("-" means the value doesn't affect the result)
319                //
320                // | needle_null | haystack_null | negated | in set? | result |
321                // |-------------|---------------|---------|---------|--------|
322                // | true        | -             | false   | -       | null   |
323                // | true        | -             | true    | -       | null   |
324                // | false       | true          | false   | yes     | true   |
325                // | false       | true          | false   | no      | null   |
326                // | false       | true          | true    | yes     | false  |
327                // | false       | true          | true    | no      | null   |
328                // | false       | false         | false   | yes     | true   |
329                // | false       | false         | false   | no      | false  |
330                // | false       | false         | true    | yes     | false  |
331                // | false       | false         | true    | no      | true   |
332
333                // Compute the "contains" result using collect_bool (fast batched approach)
334                // This ignores nulls - we handle them separately
335                let contains_buffer = if negated {
336                    BooleanBuffer::collect_bool(needle_values.len(), |i| {
337                        !self.values.contains(&needle_values[i])
338                    })
339                } else {
340                    BooleanBuffer::collect_bool(needle_values.len(), |i| {
341                        self.values.contains(&needle_values[i])
342                    })
343                };
344
345                // Compute the null mask
346                // Output is null when:
347                // 1. needle value is null, OR
348                // 2. needle value is not in set AND haystack has nulls
349                let result_nulls = match (needle_has_nulls, haystack_has_nulls) {
350                    (false, false) => {
351                        // No nulls anywhere
352                        None
353                    }
354                    (true, false) => {
355                        // Only needle has nulls - just use needle's null mask
356                        needle_nulls.cloned()
357                    }
358                    (false, true) => {
359                        // Only haystack has nulls - result is null when value not in set
360                        // Valid (not null) when original "in set" is true
361                        // For NOT IN: contains_buffer = !original, so validity = !contains_buffer
362                        let validity = if negated {
363                            !&contains_buffer
364                        } else {
365                            contains_buffer.clone()
366                        };
367                        Some(NullBuffer::new(validity))
368                    }
369                    (true, true) => {
370                        // Both have nulls - combine needle nulls with haystack-induced nulls
371                        let needle_validity = needle_nulls.map(|n| n.inner().clone())
372                            .unwrap_or_else(|| BooleanBuffer::new_set(needle_values.len()));
373
374                        // Valid when original "in set" is true (see above)
375                        let haystack_validity = if negated {
376                            !&contains_buffer
377                        } else {
378                            contains_buffer.clone()
379                        };
380
381                        // Combined validity: valid only where both are valid
382                        let combined_validity = &needle_validity & &haystack_validity;
383                        Some(NullBuffer::new(combined_validity))
384                    }
385                };
386
387                Ok(BooleanArray::new(contains_buffer, result_nulls))
388            }
389        }
390    };
391}
392
393// Generate specialized filters for all integer primitive types
394primitive_static_filter!(Int8StaticFilter, Int8Type);
395primitive_static_filter!(Int16StaticFilter, Int16Type);
396primitive_static_filter!(Int32StaticFilter, Int32Type);
397primitive_static_filter!(Int64StaticFilter, Int64Type);
398primitive_static_filter!(UInt8StaticFilter, UInt8Type);
399primitive_static_filter!(UInt16StaticFilter, UInt16Type);
400primitive_static_filter!(UInt32StaticFilter, UInt32Type);
401primitive_static_filter!(UInt64StaticFilter, UInt64Type);
402
403// Macro to generate specialized StaticFilter implementations for float types
404// Floats require a wrapper type (OrderedFloat*) to implement Hash/Eq due to NaN semantics
405macro_rules! float_static_filter {
406    ($Name:ident, $ArrowType:ty, $OrderedType:ty) => {
407        struct $Name {
408            null_count: usize,
409            values: HashSet<$OrderedType>,
410        }
411
412        impl $Name {
413            fn try_new(in_array: &ArrayRef) -> Result<Self> {
414                let in_array = in_array
415                    .as_primitive_opt::<$ArrowType>()
416                    .ok_or_else(|| exec_datafusion_err!("Failed to downcast an array to a '{}' array", stringify!($ArrowType)))?;
417
418                let mut values = HashSet::with_capacity(in_array.len());
419                let null_count = in_array.null_count();
420
421                for v in in_array.iter().flatten() {
422                    values.insert(<$OrderedType>::from(v));
423                }
424
425                Ok(Self { null_count, values })
426            }
427        }
428
429        impl StaticFilter for $Name {
430            fn null_count(&self) -> usize {
431                self.null_count
432            }
433
434            fn contains(&self, v: &dyn Array, negated: bool) -> Result<BooleanArray> {
435                // Handle dictionary arrays by recursing on the values
436                downcast_dictionary_array! {
437                    v => {
438                        let values_contains = self.contains(v.values().as_ref(), negated)?;
439                        let result = take(&values_contains, v.keys(), None)?;
440                        return Ok(downcast_array(result.as_ref()))
441                    }
442                    _ => {}
443                }
444
445                let v = v
446                    .as_primitive_opt::<$ArrowType>()
447                    .ok_or_else(|| exec_datafusion_err!("Failed to downcast an array to a '{}' array", stringify!($ArrowType)))?;
448
449                let haystack_has_nulls = self.null_count > 0;
450
451                let needle_values = v.values();
452                let needle_nulls = v.nulls();
453                let needle_has_nulls = v.null_count() > 0;
454
455                // Truth table for `value [NOT] IN (set)` with SQL three-valued logic:
456                // ("-" means the value doesn't affect the result)
457                //
458                // | needle_null | haystack_null | negated | in set? | result |
459                // |-------------|---------------|---------|---------|--------|
460                // | true        | -             | false   | -       | null   |
461                // | true        | -             | true    | -       | null   |
462                // | false       | true          | false   | yes     | true   |
463                // | false       | true          | false   | no      | null   |
464                // | false       | true          | true    | yes     | false  |
465                // | false       | true          | true    | no      | null   |
466                // | false       | false         | false   | yes     | true   |
467                // | false       | false         | false   | no      | false  |
468                // | false       | false         | true    | yes     | false  |
469                // | false       | false         | true    | no      | true   |
470
471                // Compute the "contains" result using collect_bool (fast batched approach)
472                // This ignores nulls - we handle them separately
473                let contains_buffer = if negated {
474                    BooleanBuffer::collect_bool(needle_values.len(), |i| {
475                        !self.values.contains(&<$OrderedType>::from(needle_values[i]))
476                    })
477                } else {
478                    BooleanBuffer::collect_bool(needle_values.len(), |i| {
479                        self.values.contains(&<$OrderedType>::from(needle_values[i]))
480                    })
481                };
482
483                // Compute the null mask
484                // Output is null when:
485                // 1. needle value is null, OR
486                // 2. needle value is not in set AND haystack has nulls
487                let result_nulls = match (needle_has_nulls, haystack_has_nulls) {
488                    (false, false) => {
489                        // No nulls anywhere
490                        None
491                    }
492                    (true, false) => {
493                        // Only needle has nulls - just use needle's null mask
494                        needle_nulls.cloned()
495                    }
496                    (false, true) => {
497                        // Only haystack has nulls - result is null when value not in set
498                        // Valid (not null) when original "in set" is true
499                        // For NOT IN: contains_buffer = !original, so validity = !contains_buffer
500                        let validity = if negated {
501                            !&contains_buffer
502                        } else {
503                            contains_buffer.clone()
504                        };
505                        Some(NullBuffer::new(validity))
506                    }
507                    (true, true) => {
508                        // Both have nulls - combine needle nulls with haystack-induced nulls
509                        let needle_validity = needle_nulls.map(|n| n.inner().clone())
510                            .unwrap_or_else(|| BooleanBuffer::new_set(needle_values.len()));
511
512                        // Valid when original "in set" is true (see above)
513                        let haystack_validity = if negated {
514                            !&contains_buffer
515                        } else {
516                            contains_buffer.clone()
517                        };
518
519                        // Combined validity: valid only where both are valid
520                        let combined_validity = &needle_validity & &haystack_validity;
521                        Some(NullBuffer::new(combined_validity))
522                    }
523                };
524
525                Ok(BooleanArray::new(contains_buffer, result_nulls))
526            }
527        }
528    };
529}
530
531// Generate specialized filters for float types using ordered wrappers
532float_static_filter!(Float32StaticFilter, Float32Type, OrderedFloat32);
533float_static_filter!(Float64StaticFilter, Float64Type, OrderedFloat64);
534
535/// Evaluates the list of expressions into an array, flattening any dictionaries
536fn evaluate_list(
537    list: &[Arc<dyn PhysicalExpr>],
538    batch: &RecordBatch,
539) -> Result<ArrayRef> {
540    let scalars = list
541        .iter()
542        .map(|expr| {
543            expr.evaluate(batch).and_then(|r| match r {
544                ColumnarValue::Array(_) => {
545                    exec_err!("InList expression must evaluate to a scalar")
546                }
547                // Flatten dictionary values
548                ColumnarValue::Scalar(ScalarValue::Dictionary(_, v)) => Ok(*v),
549                ColumnarValue::Scalar(s) => Ok(s),
550            })
551        })
552        .collect::<Result<Vec<_>>>()?;
553
554    ScalarValue::iter_to_array(scalars)
555}
556
557/// Try to evaluate a list of expressions as constants.
558///
559/// Returns:
560/// - `Ok(Some(ArrayRef))` if all expressions are constants (can be evaluated on an empty RecordBatch)
561/// - `Ok(None)` if the list contains non-constant expressions
562/// - `Err(...)` only for actual errors (not for non-constant expressions)
563///
564/// This is used to detect when a list contains only literals, casts of literals,
565/// or other constant expressions.
566fn try_evaluate_constant_list(
567    list: &[Arc<dyn PhysicalExpr>],
568    schema: &Schema,
569) -> Result<Option<ArrayRef>> {
570    let batch = RecordBatch::new_empty(Arc::new(schema.clone()));
571    match evaluate_list(list, &batch) {
572        Ok(array) => Ok(Some(array)),
573        Err(_) => {
574            // Non-constant expressions can't be evaluated on an empty batch
575            // This is not an error, just means we can't use a static filter
576            Ok(None)
577        }
578    }
579}
580
581impl InListExpr {
582    /// Create a new InList expression
583    fn new(
584        expr: Arc<dyn PhysicalExpr>,
585        list: Vec<Arc<dyn PhysicalExpr>>,
586        negated: bool,
587        static_filter: Option<Arc<dyn StaticFilter + Send + Sync>>,
588    ) -> Self {
589        Self {
590            expr,
591            list,
592            negated,
593            static_filter,
594        }
595    }
596
597    /// Input expression
598    pub fn expr(&self) -> &Arc<dyn PhysicalExpr> {
599        &self.expr
600    }
601
602    /// List to search in
603    pub fn list(&self) -> &[Arc<dyn PhysicalExpr>] {
604        &self.list
605    }
606
607    pub fn is_empty(&self) -> bool {
608        self.list.is_empty()
609    }
610
611    pub fn len(&self) -> usize {
612        self.list.len()
613    }
614
615    /// Is this negated e.g. NOT IN LIST
616    pub fn negated(&self) -> bool {
617        self.negated
618    }
619
620    /// Create a new InList expression directly from an array, bypassing expression evaluation.
621    ///
622    /// This is more efficient than `in_list()` when you already have the list as an array,
623    /// as it avoids the conversion: `ArrayRef -> Vec<PhysicalExpr> -> ArrayRef -> StaticFilter`.
624    /// Instead it goes directly: `ArrayRef -> StaticFilter`.
625    ///
626    /// The `list` field will be empty when using this constructor, as the array is stored
627    /// directly in the static filter.
628    ///
629    /// This does not make the expression any more performant at runtime, but it does make it slightly
630    /// cheaper to build.
631    pub fn try_new_from_array(
632        expr: Arc<dyn PhysicalExpr>,
633        array: ArrayRef,
634        negated: bool,
635    ) -> Result<Self> {
636        let list = (0..array.len())
637            .map(|i| {
638                let scalar = ScalarValue::try_from_array(array.as_ref(), i)?;
639                Ok(crate::expressions::lit(scalar) as Arc<dyn PhysicalExpr>)
640            })
641            .collect::<Result<Vec<_>>>()?;
642        Ok(Self::new(
643            expr,
644            list,
645            negated,
646            Some(instantiate_static_filter(array)?),
647        ))
648    }
649
650    /// Create a new InList expression, using a static filter when possible.
651    ///
652    /// This validates data types and attempts to create a static filter for constant
653    /// list expressions. Uses specialized StaticFilter implementations for better
654    /// performance (e.g., Int32StaticFilter for Int32).
655    ///
656    /// Returns an error if data types don't match. If the list contains non-constant
657    /// expressions, falls back to dynamic evaluation at runtime.
658    pub fn try_new(
659        expr: Arc<dyn PhysicalExpr>,
660        list: Vec<Arc<dyn PhysicalExpr>>,
661        negated: bool,
662        schema: &Schema,
663    ) -> Result<Self> {
664        // Check the data types match
665        let expr_data_type = expr.data_type(schema)?;
666        for list_expr in list.iter() {
667            let list_expr_data_type = list_expr.data_type(schema)?;
668            assert_or_internal_err!(
669                DFSchema::datatype_is_logically_equal(
670                    &expr_data_type,
671                    &list_expr_data_type
672                ),
673                "The data type inlist should be same, the value type is {expr_data_type}, one of list expr type is {list_expr_data_type}"
674            );
675        }
676
677        // Try to create a static filter if all list expressions are constants
678        let static_filter = match try_evaluate_constant_list(&list, schema)? {
679            Some(in_array) => Some(instantiate_static_filter(in_array)?),
680            None => None, // Non-constant expressions, fall back to dynamic evaluation
681        };
682
683        Ok(Self::new(expr, list, negated, static_filter))
684    }
685}
686impl std::fmt::Display for InListExpr {
687    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
688        let list = expr_vec_fmt!(self.list);
689
690        if self.negated {
691            if self.static_filter.is_some() {
692                write!(f, "{} NOT IN (SET) ([{list}])", self.expr)
693            } else {
694                write!(f, "{} NOT IN ([{list}])", self.expr)
695            }
696        } else if self.static_filter.is_some() {
697            write!(f, "{} IN (SET) ([{list}])", self.expr)
698        } else {
699            write!(f, "{} IN ([{list}])", self.expr)
700        }
701    }
702}
703
704impl PhysicalExpr for InListExpr {
705    /// Return a reference to Any that can be used for downcasting
706    fn as_any(&self) -> &dyn Any {
707        self
708    }
709
710    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
711        Ok(DataType::Boolean)
712    }
713
714    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
715        if self.expr.nullable(input_schema)? {
716            return Ok(true);
717        }
718
719        if let Some(static_filter) = &self.static_filter {
720            Ok(static_filter.null_count() > 0)
721        } else {
722            for expr in &self.list {
723                if expr.nullable(input_schema)? {
724                    return Ok(true);
725                }
726            }
727            Ok(false)
728        }
729    }
730
731    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
732        let num_rows = batch.num_rows();
733        let value = self.expr.evaluate(batch)?;
734        let r = match &self.static_filter {
735            Some(filter) => {
736                match value {
737                    ColumnarValue::Array(array) => {
738                        filter.contains(&array, self.negated)?
739                    }
740                    ColumnarValue::Scalar(scalar) => {
741                        if scalar.is_null() {
742                            // SQL three-valued logic: null IN (...) is always null
743                            // The code below would handle this correctly but this is a faster path
744                            let nulls = NullBuffer::new_null(num_rows);
745                            return Ok(ColumnarValue::Array(Arc::new(
746                                BooleanArray::new(
747                                    BooleanBuffer::new_unset(num_rows),
748                                    Some(nulls),
749                                ),
750                            )));
751                        }
752                        // Use a 1 row array to avoid code duplication/branching
753                        // Since all we do is compute hash and lookup this should be efficient enough
754                        let array = scalar.to_array()?;
755                        let result_array =
756                            filter.contains(array.as_ref(), self.negated)?;
757                        // Broadcast the single result to all rows
758                        // Must check is_null() to preserve NULL values (SQL three-valued logic)
759                        if result_array.is_null(0) {
760                            let nulls = NullBuffer::new_null(num_rows);
761                            BooleanArray::new(
762                                BooleanBuffer::new_unset(num_rows),
763                                Some(nulls),
764                            )
765                        } else if result_array.value(0) {
766                            BooleanArray::new(BooleanBuffer::new_set(num_rows), None)
767                        } else {
768                            BooleanArray::new(BooleanBuffer::new_unset(num_rows), None)
769                        }
770                    }
771                }
772            }
773            None => {
774                // No static filter: iterate through each expression, compare, and OR results
775                let value = value.into_array(num_rows)?;
776                let found = self.list.iter().map(|expr| expr.evaluate(batch)).try_fold(
777                    BooleanArray::new(BooleanBuffer::new_unset(num_rows), None),
778                    |result, expr| -> Result<BooleanArray> {
779                        let rhs = match expr? {
780                            ColumnarValue::Array(array) => {
781                                let cmp = make_comparator(
782                                    value.as_ref(),
783                                    array.as_ref(),
784                                    SortOptions::default(),
785                                )?;
786                                (0..num_rows)
787                                    .map(|i| {
788                                        if value.is_null(i) || array.is_null(i) {
789                                            return None;
790                                        }
791                                        Some(cmp(i, i).is_eq())
792                                    })
793                                    .collect::<BooleanArray>()
794                            }
795                            ColumnarValue::Scalar(scalar) => {
796                                // Check if scalar is null once, before the loop
797                                if scalar.is_null() {
798                                    // If scalar is null, all comparisons return null
799                                    BooleanArray::from(vec![None; num_rows])
800                                } else {
801                                    // Convert scalar to 1-element array
802                                    let array = scalar.to_array()?;
803                                    let cmp = make_comparator(
804                                        value.as_ref(),
805                                        array.as_ref(),
806                                        SortOptions::default(),
807                                    )?;
808                                    // Compare each row of value with the single scalar element
809                                    (0..num_rows)
810                                        .map(|i| {
811                                            if value.is_null(i) {
812                                                None
813                                            } else {
814                                                Some(cmp(i, 0).is_eq())
815                                            }
816                                        })
817                                        .collect::<BooleanArray>()
818                                }
819                            }
820                        };
821                        Ok(or_kleene(&result, &rhs)?)
822                    },
823                )?;
824
825                if self.negated { not(&found)? } else { found }
826            }
827        };
828        Ok(ColumnarValue::Array(Arc::new(r)))
829    }
830
831    fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
832        let mut children = vec![&self.expr];
833        children.extend(&self.list);
834        children
835    }
836
837    fn with_new_children(
838        self: Arc<Self>,
839        children: Vec<Arc<dyn PhysicalExpr>>,
840    ) -> Result<Arc<dyn PhysicalExpr>> {
841        // assume the static_filter will not change during the rewrite process
842        Ok(Arc::new(InListExpr::new(
843            Arc::clone(&children[0]),
844            children[1..].to_vec(),
845            self.negated,
846            self.static_filter.as_ref().map(Arc::clone),
847        )))
848    }
849
850    fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
851        self.expr.fmt_sql(f)?;
852        if self.negated {
853            write!(f, " NOT")?;
854        }
855
856        write!(f, " IN (")?;
857        for (i, expr) in self.list.iter().enumerate() {
858            if i > 0 {
859                write!(f, ", ")?;
860            }
861            expr.fmt_sql(f)?;
862        }
863        write!(f, ")")
864    }
865}
866
867impl PartialEq for InListExpr {
868    fn eq(&self, other: &Self) -> bool {
869        self.expr.eq(&other.expr)
870            && physical_exprs_bag_equal(&self.list, &other.list)
871            && self.negated == other.negated
872    }
873}
874
875impl Eq for InListExpr {}
876
877impl Hash for InListExpr {
878    fn hash<H: Hasher>(&self, state: &mut H) {
879        self.expr.hash(state);
880        self.negated.hash(state);
881        // Add `self.static_filter` when hash is available
882        self.list.hash(state);
883    }
884}
885
886/// Creates a unary expression InList
887pub fn in_list(
888    expr: Arc<dyn PhysicalExpr>,
889    list: Vec<Arc<dyn PhysicalExpr>>,
890    negated: &bool,
891    schema: &Schema,
892) -> Result<Arc<dyn PhysicalExpr>> {
893    Ok(Arc::new(InListExpr::try_new(expr, list, *negated, schema)?))
894}
895
896#[cfg(test)]
897mod tests {
898    use super::*;
899    use crate::expressions::{col, lit, try_cast};
900    use arrow::buffer::NullBuffer;
901    use arrow::datatypes::{IntervalDayTime, IntervalMonthDayNano, i256};
902    use datafusion_common::plan_err;
903    use datafusion_expr::type_coercion::binary::comparison_coercion;
904    use datafusion_physical_expr_common::physical_expr::fmt_sql;
905    use insta::assert_snapshot;
906    use itertools::Itertools;
907
908    type InListCastResult = (Arc<dyn PhysicalExpr>, Vec<Arc<dyn PhysicalExpr>>);
909
910    // Try to do the type coercion for list physical expr.
911    // It's just used in the test
912    fn in_list_cast(
913        expr: Arc<dyn PhysicalExpr>,
914        list: Vec<Arc<dyn PhysicalExpr>>,
915        input_schema: &Schema,
916    ) -> Result<InListCastResult> {
917        let expr_type = &expr.data_type(input_schema)?;
918        let list_types: Vec<DataType> = list
919            .iter()
920            .map(|list_expr| list_expr.data_type(input_schema).unwrap())
921            .collect();
922        let result_type = get_coerce_type(expr_type, &list_types);
923        match result_type {
924            None => plan_err!(
925                "Can not find compatible types to compare {expr_type} with [{}]",
926                list_types.iter().join(", ")
927            ),
928            Some(data_type) => {
929                // find the coerced type
930                let cast_expr = try_cast(expr, input_schema, data_type.clone())?;
931                let cast_list_expr = list
932                    .into_iter()
933                    .map(|list_expr| {
934                        try_cast(list_expr, input_schema, data_type.clone()).unwrap()
935                    })
936                    .collect();
937                Ok((cast_expr, cast_list_expr))
938            }
939        }
940    }
941
942    // Attempts to coerce the types of `list_type` to be comparable with the
943    // `expr_type`
944    fn get_coerce_type(expr_type: &DataType, list_type: &[DataType]) -> Option<DataType> {
945        list_type
946            .iter()
947            .try_fold(expr_type.clone(), |left_type, right_type| {
948                comparison_coercion(&left_type, right_type)
949            })
950    }
951
952    /// Test helper macro that evaluates an IN LIST expression with automatic type casting.
953    ///
954    /// # Parameters
955    /// - `$BATCH`: The `RecordBatch` containing the input data to evaluate against
956    /// - `$LIST`: A `Vec<Arc<dyn PhysicalExpr>>` of literal expressions representing the IN list values
957    /// - `$NEGATED`: A `&bool` indicating whether this is a NOT IN operation (true) or IN operation (false)
958    /// - `$EXPECTED`: A `Vec<Option<bool>>` representing the expected boolean results for each row
959    /// - `$COL`: An `Arc<dyn PhysicalExpr>` representing the column expression to evaluate
960    /// - `$SCHEMA`: A `&Schema` reference for the input batch
961    ///
962    /// This macro first applies type casting to the column and list expressions to ensure
963    /// type compatibility, then delegates to `in_list_raw!` to perform the evaluation and assertion.
964    macro_rules! in_list {
965        ($BATCH:expr, $LIST:expr, $NEGATED:expr, $EXPECTED:expr, $COL:expr, $SCHEMA:expr) => {{
966            let (cast_expr, cast_list_exprs) = in_list_cast($COL, $LIST, $SCHEMA)?;
967            in_list_raw!(
968                $BATCH,
969                cast_list_exprs,
970                $NEGATED,
971                $EXPECTED,
972                cast_expr,
973                $SCHEMA
974            );
975        }};
976    }
977
978    /// Test helper macro that evaluates an IN LIST expression without automatic type casting.
979    ///
980    /// # Parameters
981    /// - `$BATCH`: The `RecordBatch` containing the input data to evaluate against
982    /// - `$LIST`: A `Vec<Arc<dyn PhysicalExpr>>` of literal expressions representing the IN list values
983    /// - `$NEGATED`: A `&bool` indicating whether this is a NOT IN operation (true) or IN operation (false)
984    /// - `$EXPECTED`: A `Vec<Option<bool>>` representing the expected boolean results for each row
985    /// - `$COL`: An `Arc<dyn PhysicalExpr>` representing the column expression to evaluate
986    /// - `$SCHEMA`: A `&Schema` reference for the input batch
987    ///
988    /// This macro creates an IN LIST expression, evaluates it against the batch, converts the result
989    /// to a `BooleanArray`, and asserts that it matches the expected output. Use this when the column
990    /// and list expressions are already the correct types and don't require casting.
991    macro_rules! in_list_raw {
992        ($BATCH:expr, $LIST:expr, $NEGATED:expr, $EXPECTED:expr, $COL:expr, $SCHEMA:expr) => {{
993            let col_expr = $COL;
994            let expr = in_list(Arc::clone(&col_expr), $LIST, $NEGATED, $SCHEMA).unwrap();
995            let result = expr
996                .evaluate(&$BATCH)?
997                .into_array($BATCH.num_rows())
998                .expect("Failed to convert to array");
999            let result = as_boolean_array(&result);
1000            let expected = &BooleanArray::from($EXPECTED);
1001            assert_eq!(
1002                expected,
1003                result,
1004                "Failed for: {}\n{}: {:?}",
1005                fmt_sql(expr.as_ref()),
1006                fmt_sql(col_expr.as_ref()),
1007                col_expr
1008                    .evaluate(&$BATCH)?
1009                    .into_array($BATCH.num_rows())
1010                    .unwrap()
1011            );
1012        }};
1013    }
1014
1015    /// Test case for primitive types following the standard IN LIST pattern.
1016    ///
1017    /// Each test case represents a data type with:
1018    /// - `value_in`: A value that appears in both the test array and the IN list (matches → true)
1019    /// - `value_not_in`: A value that appears in the test array but NOT in the IN list (doesn't match → false)
1020    /// - `other_list_values`: Additional values in the IN list besides `value_in`
1021    /// - `null_value`: Optional null scalar value for NULL handling tests. When None, tests
1022    ///   without nulls are run, exercising the `(false, false)` and `(false, true)` branches.
1023    struct InListPrimitiveTestCase {
1024        name: &'static str,
1025        value_in: ScalarValue,
1026        value_not_in: ScalarValue,
1027        other_list_values: Vec<ScalarValue>,
1028        null_value: Option<ScalarValue>,
1029    }
1030
1031    /// Generic test data struct for primitive types.
1032    ///
1033    /// Holds test values needed for IN LIST tests, allowing the data
1034    /// to be declared explicitly and reused across multiple types.
1035    #[derive(Clone)]
1036    struct PrimitiveTestCaseData<T> {
1037        value_in: T,
1038        value_not_in: T,
1039        other_list_values: Vec<T>,
1040    }
1041
1042    /// Helper to create test cases for any primitive type using generic data.
1043    ///
1044    /// Uses TryInto for flexible type conversion, allowing test data to be
1045    /// declared in any convertible type (e.g., i32 for all integer types).
1046    /// Creates a test case WITH null support (for null handling tests).
1047    fn primitive_test_case<T, D, F>(
1048        name: &'static str,
1049        constructor: F,
1050        data: PrimitiveTestCaseData<D>,
1051    ) -> InListPrimitiveTestCase
1052    where
1053        D: TryInto<T> + Clone,
1054        <D as TryInto<T>>::Error: Debug,
1055        F: Fn(Option<T>) -> ScalarValue,
1056        T: Clone,
1057    {
1058        InListPrimitiveTestCase {
1059            name,
1060            value_in: constructor(Some(data.value_in.try_into().unwrap())),
1061            value_not_in: constructor(Some(data.value_not_in.try_into().unwrap())),
1062            other_list_values: data
1063                .other_list_values
1064                .into_iter()
1065                .map(|v| constructor(Some(v.try_into().unwrap())))
1066                .collect(),
1067            null_value: Some(constructor(None)),
1068        }
1069    }
1070
1071    /// Helper to create test cases WITHOUT null support.
1072    /// These test cases exercise the `(false, true)` branch (no nulls, negated).
1073    fn primitive_test_case_no_nulls<T, D, F>(
1074        name: &'static str,
1075        constructor: F,
1076        data: PrimitiveTestCaseData<D>,
1077    ) -> InListPrimitiveTestCase
1078    where
1079        D: TryInto<T> + Clone,
1080        <D as TryInto<T>>::Error: Debug,
1081        F: Fn(Option<T>) -> ScalarValue,
1082        T: Clone,
1083    {
1084        InListPrimitiveTestCase {
1085            name,
1086            value_in: constructor(Some(data.value_in.try_into().unwrap())),
1087            value_not_in: constructor(Some(data.value_not_in.try_into().unwrap())),
1088            other_list_values: data
1089                .other_list_values
1090                .into_iter()
1091                .map(|v| constructor(Some(v.try_into().unwrap())))
1092                .collect(),
1093            null_value: None,
1094        }
1095    }
1096
1097    /// Runs test cases for multiple types, providing detailed SQL error messages on failure.
1098    ///
1099    /// For each test case, runs IN LIST scenarios based on whether null_value is Some or None:
1100    /// - With null_value (Some): 4 tests including null handling
1101    /// - Without null_value (None): 2 tests exercising the no-nulls paths
1102    fn run_test_cases(test_cases: Vec<InListPrimitiveTestCase>) -> Result<()> {
1103        for test_case in test_cases {
1104            let test_name = test_case.name;
1105
1106            // Get the data type from the scalar value
1107            let data_type = test_case.value_in.data_type();
1108
1109            // Build the base list: [value_in, ...other_list_values]
1110            let build_base_list = || -> Vec<Arc<dyn PhysicalExpr>> {
1111                let mut list = vec![lit(test_case.value_in.clone())];
1112                list.extend(test_case.other_list_values.iter().map(|v| lit(v.clone())));
1113                list
1114            };
1115
1116            match &test_case.null_value {
1117                Some(null_val) => {
1118                    // Tests WITH nulls in the needle array
1119                    let schema =
1120                        Schema::new(vec![Field::new("a", data_type.clone(), true)]);
1121
1122                    // Create array from scalar values: [value_in, value_not_in, None]
1123                    let array = ScalarValue::iter_to_array(vec![
1124                        test_case.value_in.clone(),
1125                        test_case.value_not_in.clone(),
1126                        null_val.clone(),
1127                    ])?;
1128
1129                    let col_a = col("a", &schema)?;
1130                    let batch = RecordBatch::try_new(
1131                        Arc::new(schema.clone()),
1132                        vec![Arc::clone(&array)],
1133                    )?;
1134
1135                    // Test 1: a IN (list) → [true, false, null]
1136                    let list = build_base_list();
1137                    in_list!(
1138                        batch,
1139                        list,
1140                        &false,
1141                        vec![Some(true), Some(false), None],
1142                        Arc::clone(&col_a),
1143                        &schema
1144                    );
1145
1146                    // Test 2: a NOT IN (list) → [false, true, null]
1147                    let list = build_base_list();
1148                    in_list!(
1149                        batch,
1150                        list,
1151                        &true,
1152                        vec![Some(false), Some(true), None],
1153                        Arc::clone(&col_a),
1154                        &schema
1155                    );
1156
1157                    // Test 3: a IN (list, NULL) → [true, null, null]
1158                    let mut list = build_base_list();
1159                    list.push(lit(null_val.clone()));
1160                    in_list!(
1161                        batch,
1162                        list,
1163                        &false,
1164                        vec![Some(true), None, None],
1165                        Arc::clone(&col_a),
1166                        &schema
1167                    );
1168
1169                    // Test 4: a NOT IN (list, NULL) → [false, null, null]
1170                    let mut list = build_base_list();
1171                    list.push(lit(null_val.clone()));
1172                    in_list!(
1173                        batch,
1174                        list,
1175                        &true,
1176                        vec![Some(false), None, None],
1177                        Arc::clone(&col_a),
1178                        &schema
1179                    );
1180                }
1181                None => {
1182                    // Tests WITHOUT nulls - exercises the (false, false) and (false, true) branches
1183                    let schema =
1184                        Schema::new(vec![Field::new("a", data_type.clone(), false)]);
1185
1186                    // Create array from scalar values: [value_in, value_not_in] (no NULL)
1187                    let array = ScalarValue::iter_to_array(vec![
1188                        test_case.value_in.clone(),
1189                        test_case.value_not_in.clone(),
1190                    ])?;
1191
1192                    let col_a = col("a", &schema)?;
1193                    let batch = RecordBatch::try_new(
1194                        Arc::new(schema.clone()),
1195                        vec![Arc::clone(&array)],
1196                    )?;
1197
1198                    // Test 1: a IN (list) → [true, false] - exercises (false, false) branch
1199                    let list = build_base_list();
1200                    in_list!(
1201                        batch,
1202                        list,
1203                        &false,
1204                        vec![Some(true), Some(false)],
1205                        Arc::clone(&col_a),
1206                        &schema
1207                    );
1208
1209                    // Test 2: a NOT IN (list) → [false, true] - exercises (false, true) branch
1210                    let list = build_base_list();
1211                    in_list!(
1212                        batch,
1213                        list,
1214                        &true,
1215                        vec![Some(false), Some(true)],
1216                        Arc::clone(&col_a),
1217                        &schema
1218                    );
1219
1220                    eprintln!(
1221                        "Test '{test_name}': exercised (false, true) branch (no nulls, negated)",
1222                    );
1223                }
1224            }
1225        }
1226
1227        Ok(())
1228    }
1229
1230    /// Test IN LIST for all integer types (Int8/16/32/64, UInt8/16/32/64).
1231    ///
1232    /// Test data: 0 (in list), 2 (not in list), [1, 3, 5] (other list values)
1233    #[test]
1234    fn in_list_int_types() -> Result<()> {
1235        let int_data = PrimitiveTestCaseData {
1236            value_in: 0,
1237            value_not_in: 2,
1238            other_list_values: vec![1, 3, 5],
1239        };
1240
1241        run_test_cases(vec![
1242            // Tests WITH nulls
1243            primitive_test_case("int8", ScalarValue::Int8, int_data.clone()),
1244            primitive_test_case("int16", ScalarValue::Int16, int_data.clone()),
1245            primitive_test_case("int32", ScalarValue::Int32, int_data.clone()),
1246            primitive_test_case("int64", ScalarValue::Int64, int_data.clone()),
1247            primitive_test_case("uint8", ScalarValue::UInt8, int_data.clone()),
1248            primitive_test_case("uint16", ScalarValue::UInt16, int_data.clone()),
1249            primitive_test_case("uint32", ScalarValue::UInt32, int_data.clone()),
1250            primitive_test_case("uint64", ScalarValue::UInt64, int_data.clone()),
1251            // Tests WITHOUT nulls - exercises (false, true) branch
1252            primitive_test_case_no_nulls("int32_no_nulls", ScalarValue::Int32, int_data),
1253        ])
1254    }
1255
1256    /// Test IN LIST for all string types (Utf8, LargeUtf8, Utf8View).
1257    ///
1258    /// Test data: "a" (in list), "d" (not in list), ["b", "c"] (other list values)
1259    #[test]
1260    fn in_list_string_types() -> Result<()> {
1261        let string_data = PrimitiveTestCaseData {
1262            value_in: "a",
1263            value_not_in: "d",
1264            other_list_values: vec!["b", "c"],
1265        };
1266
1267        run_test_cases(vec![
1268            primitive_test_case("utf8", ScalarValue::Utf8, string_data.clone()),
1269            primitive_test_case(
1270                "large_utf8",
1271                ScalarValue::LargeUtf8,
1272                string_data.clone(),
1273            ),
1274            primitive_test_case("utf8_view", ScalarValue::Utf8View, string_data),
1275        ])
1276    }
1277
1278    /// Test IN LIST for all binary types (Binary, LargeBinary, BinaryView).
1279    ///
1280    /// Test data: [1,2,3] (in list), [1,2,2] (not in list), [[4,5,6], [7,8,9]] (other list values)
1281    #[test]
1282    fn in_list_binary_types() -> Result<()> {
1283        let binary_data = PrimitiveTestCaseData {
1284            value_in: vec![1_u8, 2, 3],
1285            value_not_in: vec![1_u8, 2, 2],
1286            other_list_values: vec![vec![4_u8, 5, 6], vec![7_u8, 8, 9]],
1287        };
1288
1289        run_test_cases(vec![
1290            primitive_test_case("binary", ScalarValue::Binary, binary_data.clone()),
1291            primitive_test_case(
1292                "large_binary",
1293                ScalarValue::LargeBinary,
1294                binary_data.clone(),
1295            ),
1296            primitive_test_case("binary_view", ScalarValue::BinaryView, binary_data),
1297        ])
1298    }
1299
1300    /// Test IN LIST for date types (Date32, Date64).
1301    ///
1302    /// Test data: 0 (in list), 2 (not in list), [1, 3] (other list values)
1303    #[test]
1304    fn in_list_date_types() -> Result<()> {
1305        let date_data = PrimitiveTestCaseData {
1306            value_in: 0,
1307            value_not_in: 2,
1308            other_list_values: vec![1, 3],
1309        };
1310
1311        run_test_cases(vec![
1312            primitive_test_case("date32", ScalarValue::Date32, date_data.clone()),
1313            primitive_test_case("date64", ScalarValue::Date64, date_data),
1314        ])
1315    }
1316
1317    /// Test IN LIST for Decimal128 type.
1318    ///
1319    /// Test data: 0 (in list), 200 (not in list), [100, 300] (other list values) with precision=10, scale=2
1320    #[test]
1321    fn in_list_decimal() -> Result<()> {
1322        run_test_cases(vec![InListPrimitiveTestCase {
1323            name: "decimal128",
1324            value_in: ScalarValue::Decimal128(Some(0), 10, 2),
1325            value_not_in: ScalarValue::Decimal128(Some(200), 10, 2),
1326            other_list_values: vec![
1327                ScalarValue::Decimal128(Some(100), 10, 2),
1328                ScalarValue::Decimal128(Some(300), 10, 2),
1329            ],
1330            null_value: Some(ScalarValue::Decimal128(None, 10, 2)),
1331        }])
1332    }
1333
1334    /// Test IN LIST for timestamp types.
1335    ///
1336    /// Test data: 0 (in list), 2000 (not in list), [1000, 3000] (other list values)
1337    #[test]
1338    fn in_list_timestamp_types() -> Result<()> {
1339        run_test_cases(vec![
1340            InListPrimitiveTestCase {
1341                name: "timestamp_nanosecond",
1342                value_in: ScalarValue::TimestampNanosecond(Some(0), None),
1343                value_not_in: ScalarValue::TimestampNanosecond(Some(2000), None),
1344                other_list_values: vec![
1345                    ScalarValue::TimestampNanosecond(Some(1000), None),
1346                    ScalarValue::TimestampNanosecond(Some(3000), None),
1347                ],
1348                null_value: Some(ScalarValue::TimestampNanosecond(None, None)),
1349            },
1350            InListPrimitiveTestCase {
1351                name: "timestamp_millisecond_with_tz",
1352                value_in: ScalarValue::TimestampMillisecond(
1353                    Some(1500000),
1354                    Some("+05:00".into()),
1355                ),
1356                value_not_in: ScalarValue::TimestampMillisecond(
1357                    Some(2500000),
1358                    Some("+05:00".into()),
1359                ),
1360                other_list_values: vec![ScalarValue::TimestampMillisecond(
1361                    Some(3500000),
1362                    Some("+05:00".into()),
1363                )],
1364                null_value: Some(ScalarValue::TimestampMillisecond(
1365                    None,
1366                    Some("+05:00".into()),
1367                )),
1368            },
1369            InListPrimitiveTestCase {
1370                name: "timestamp_millisecond_mixed_tz",
1371                value_in: ScalarValue::TimestampMillisecond(
1372                    Some(1500000),
1373                    Some("+05:00".into()),
1374                ),
1375                value_not_in: ScalarValue::TimestampMillisecond(
1376                    Some(2500000),
1377                    Some("+05:00".into()),
1378                ),
1379                other_list_values: vec![
1380                    ScalarValue::TimestampMillisecond(
1381                        Some(3500000),
1382                        Some("+01:00".into()),
1383                    ),
1384                    ScalarValue::TimestampMillisecond(Some(4500000), Some("UTC".into())),
1385                ],
1386                null_value: Some(ScalarValue::TimestampMillisecond(
1387                    None,
1388                    Some("+05:00".into()),
1389                )),
1390            },
1391        ])
1392    }
1393
1394    #[test]
1395    fn in_list_float64() -> Result<()> {
1396        let schema = Schema::new(vec![Field::new("a", DataType::Float64, true)]);
1397        let a = Float64Array::from(vec![
1398            Some(0.0),
1399            Some(0.2),
1400            None,
1401            Some(f64::NAN),
1402            Some(-f64::NAN),
1403        ]);
1404        let col_a = col("a", &schema)?;
1405        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
1406
1407        // expression: "a in (0.0, 0.1)"
1408        let list = vec![lit(0.0f64), lit(0.1f64)];
1409        in_list!(
1410            batch,
1411            list,
1412            &false,
1413            vec![Some(true), Some(false), None, Some(false), Some(false)],
1414            Arc::clone(&col_a),
1415            &schema
1416        );
1417
1418        // expression: "a not in (0.0, 0.1)"
1419        let list = vec![lit(0.0f64), lit(0.1f64)];
1420        in_list!(
1421            batch,
1422            list,
1423            &true,
1424            vec![Some(false), Some(true), None, Some(true), Some(true)],
1425            Arc::clone(&col_a),
1426            &schema
1427        );
1428
1429        // expression: "a in (0.0, 0.1, NULL)"
1430        let list = vec![lit(0.0f64), lit(0.1f64), lit(ScalarValue::Null)];
1431        in_list!(
1432            batch,
1433            list,
1434            &false,
1435            vec![Some(true), None, None, None, None],
1436            Arc::clone(&col_a),
1437            &schema
1438        );
1439
1440        // expression: "a not in (0.0, 0.1, NULL)"
1441        let list = vec![lit(0.0f64), lit(0.1f64), lit(ScalarValue::Null)];
1442        in_list!(
1443            batch,
1444            list,
1445            &true,
1446            vec![Some(false), None, None, None, None],
1447            Arc::clone(&col_a),
1448            &schema
1449        );
1450
1451        // expression: "a in (0.0, 0.1, NaN)"
1452        let list = vec![lit(0.0f64), lit(0.1f64), lit(f64::NAN)];
1453        in_list!(
1454            batch,
1455            list,
1456            &false,
1457            vec![Some(true), Some(false), None, Some(true), Some(false)],
1458            Arc::clone(&col_a),
1459            &schema
1460        );
1461
1462        // expression: "a not in (0.0, 0.1, NaN)"
1463        let list = vec![lit(0.0f64), lit(0.1f64), lit(f64::NAN)];
1464        in_list!(
1465            batch,
1466            list,
1467            &true,
1468            vec![Some(false), Some(true), None, Some(false), Some(true)],
1469            Arc::clone(&col_a),
1470            &schema
1471        );
1472
1473        // expression: "a in (0.0, 0.1, -NaN)"
1474        let list = vec![lit(0.0f64), lit(0.1f64), lit(-f64::NAN)];
1475        in_list!(
1476            batch,
1477            list,
1478            &false,
1479            vec![Some(true), Some(false), None, Some(false), Some(true)],
1480            Arc::clone(&col_a),
1481            &schema
1482        );
1483
1484        // expression: "a not in (0.0, 0.1, -NaN)"
1485        let list = vec![lit(0.0f64), lit(0.1f64), lit(-f64::NAN)];
1486        in_list!(
1487            batch,
1488            list,
1489            &true,
1490            vec![Some(false), Some(true), None, Some(true), Some(false)],
1491            Arc::clone(&col_a),
1492            &schema
1493        );
1494
1495        Ok(())
1496    }
1497
1498    #[test]
1499    fn in_list_bool() -> Result<()> {
1500        let schema = Schema::new(vec![Field::new("a", DataType::Boolean, true)]);
1501        let a = BooleanArray::from(vec![Some(true), None]);
1502        let col_a = col("a", &schema)?;
1503        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
1504
1505        // expression: "a in (true)"
1506        let list = vec![lit(true)];
1507        in_list!(
1508            batch,
1509            list,
1510            &false,
1511            vec![Some(true), None],
1512            Arc::clone(&col_a),
1513            &schema
1514        );
1515
1516        // expression: "a not in (true)"
1517        let list = vec![lit(true)];
1518        in_list!(
1519            batch,
1520            list,
1521            &true,
1522            vec![Some(false), None],
1523            Arc::clone(&col_a),
1524            &schema
1525        );
1526
1527        // expression: "a in (true, NULL)"
1528        let list = vec![lit(true), lit(ScalarValue::Null)];
1529        in_list!(
1530            batch,
1531            list,
1532            &false,
1533            vec![Some(true), None],
1534            Arc::clone(&col_a),
1535            &schema
1536        );
1537
1538        // expression: "a not in (true, NULL)"
1539        let list = vec![lit(true), lit(ScalarValue::Null)];
1540        in_list!(
1541            batch,
1542            list,
1543            &true,
1544            vec![Some(false), None],
1545            Arc::clone(&col_a),
1546            &schema
1547        );
1548
1549        Ok(())
1550    }
1551
1552    macro_rules! test_nullable {
1553        ($COL:expr, $LIST:expr, $SCHEMA:expr, $EXPECTED:expr) => {{
1554            let (cast_expr, cast_list_exprs) = in_list_cast($COL, $LIST, $SCHEMA)?;
1555            let expr = in_list(cast_expr, cast_list_exprs, &false, $SCHEMA).unwrap();
1556            let result = expr.nullable($SCHEMA)?;
1557            assert_eq!($EXPECTED, result);
1558        }};
1559    }
1560
1561    #[test]
1562    fn in_list_nullable() -> Result<()> {
1563        let schema = Schema::new(vec![
1564            Field::new("c1_nullable", DataType::Int64, true),
1565            Field::new("c2_non_nullable", DataType::Int64, false),
1566        ]);
1567
1568        let c1_nullable = col("c1_nullable", &schema)?;
1569        let c2_non_nullable = col("c2_non_nullable", &schema)?;
1570
1571        // static_filter has no nulls
1572        let list = vec![lit(1_i64), lit(2_i64)];
1573        test_nullable!(Arc::clone(&c1_nullable), list.clone(), &schema, true);
1574        test_nullable!(Arc::clone(&c2_non_nullable), list.clone(), &schema, false);
1575
1576        // static_filter has nulls
1577        let list = vec![lit(1_i64), lit(2_i64), lit(ScalarValue::Null)];
1578        test_nullable!(Arc::clone(&c1_nullable), list.clone(), &schema, true);
1579        test_nullable!(Arc::clone(&c2_non_nullable), list.clone(), &schema, true);
1580
1581        let list = vec![Arc::clone(&c1_nullable)];
1582        test_nullable!(Arc::clone(&c2_non_nullable), list.clone(), &schema, true);
1583
1584        let list = vec![Arc::clone(&c2_non_nullable)];
1585        test_nullable!(Arc::clone(&c1_nullable), list.clone(), &schema, true);
1586
1587        let list = vec![Arc::clone(&c2_non_nullable), Arc::clone(&c2_non_nullable)];
1588        test_nullable!(Arc::clone(&c2_non_nullable), list.clone(), &schema, false);
1589
1590        Ok(())
1591    }
1592
1593    #[test]
1594    fn in_list_no_cols() -> Result<()> {
1595        // test logic when the in_list expression doesn't have any columns
1596        let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
1597        let a = Int32Array::from(vec![Some(1), Some(2), None]);
1598        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
1599
1600        let list = vec![lit(ScalarValue::from(1i32)), lit(ScalarValue::from(6i32))];
1601
1602        // 1 IN (1, 6)
1603        let expr = lit(ScalarValue::Int32(Some(1)));
1604        in_list!(
1605            batch,
1606            list.clone(),
1607            &false,
1608            // should have three outputs, as the input batch has three rows
1609            vec![Some(true), Some(true), Some(true)],
1610            expr,
1611            &schema
1612        );
1613
1614        // 2 IN (1, 6)
1615        let expr = lit(ScalarValue::Int32(Some(2)));
1616        in_list!(
1617            batch,
1618            list.clone(),
1619            &false,
1620            // should have three outputs, as the input batch has three rows
1621            vec![Some(false), Some(false), Some(false)],
1622            expr,
1623            &schema
1624        );
1625
1626        // NULL IN (1, 6)
1627        let expr = lit(ScalarValue::Int32(None));
1628        in_list!(
1629            batch,
1630            list.clone(),
1631            &false,
1632            // should have three outputs, as the input batch has three rows
1633            vec![None, None, None],
1634            expr,
1635            &schema
1636        );
1637
1638        Ok(())
1639    }
1640
1641    #[test]
1642    fn in_list_utf8_with_dict_types() -> Result<()> {
1643        fn dict_lit(key_type: DataType, value: &str) -> Arc<dyn PhysicalExpr> {
1644            lit(ScalarValue::Dictionary(
1645                Box::new(key_type),
1646                Box::new(ScalarValue::new_utf8(value.to_string())),
1647            ))
1648        }
1649
1650        fn null_dict_lit(key_type: DataType) -> Arc<dyn PhysicalExpr> {
1651            lit(ScalarValue::Dictionary(
1652                Box::new(key_type),
1653                Box::new(ScalarValue::Utf8(None)),
1654            ))
1655        }
1656
1657        let schema = Schema::new(vec![Field::new(
1658            "a",
1659            DataType::Dictionary(Box::new(DataType::UInt16), Box::new(DataType::Utf8)),
1660            true,
1661        )]);
1662        let a: UInt16DictionaryArray =
1663            vec![Some("a"), Some("d"), None].into_iter().collect();
1664        let col_a = col("a", &schema)?;
1665        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
1666
1667        // expression: "a in ("a", "b")"
1668        let lists = [
1669            vec![lit("a"), lit("b")],
1670            vec![
1671                dict_lit(DataType::Int8, "a"),
1672                dict_lit(DataType::UInt16, "b"),
1673            ],
1674        ];
1675        for list in lists.iter() {
1676            in_list_raw!(
1677                batch,
1678                list.clone(),
1679                &false,
1680                vec![Some(true), Some(false), None],
1681                Arc::clone(&col_a),
1682                &schema
1683            );
1684        }
1685
1686        // expression: "a not in ("a", "b")"
1687        for list in lists.iter() {
1688            in_list_raw!(
1689                batch,
1690                list.clone(),
1691                &true,
1692                vec![Some(false), Some(true), None],
1693                Arc::clone(&col_a),
1694                &schema
1695            );
1696        }
1697
1698        // expression: "a in ("a", "b", null)"
1699        let lists = [
1700            vec![lit("a"), lit("b"), lit(ScalarValue::Utf8(None))],
1701            vec![
1702                dict_lit(DataType::Int8, "a"),
1703                dict_lit(DataType::UInt16, "b"),
1704                null_dict_lit(DataType::UInt16),
1705            ],
1706        ];
1707        for list in lists.iter() {
1708            in_list_raw!(
1709                batch,
1710                list.clone(),
1711                &false,
1712                vec![Some(true), None, None],
1713                Arc::clone(&col_a),
1714                &schema
1715            );
1716        }
1717
1718        // expression: "a not in ("a", "b", null)"
1719        for list in lists.iter() {
1720            in_list_raw!(
1721                batch,
1722                list.clone(),
1723                &true,
1724                vec![Some(false), None, None],
1725                Arc::clone(&col_a),
1726                &schema
1727            );
1728        }
1729
1730        Ok(())
1731    }
1732
1733    #[test]
1734    fn test_fmt_sql_1() -> Result<()> {
1735        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
1736        let col_a = col("a", &schema)?;
1737
1738        // Test: a IN ('a', 'b')
1739        let list = vec![lit("a"), lit("b")];
1740        let expr = in_list(Arc::clone(&col_a), list, &false, &schema)?;
1741        let sql_string = fmt_sql(expr.as_ref()).to_string();
1742        let display_string = expr.to_string();
1743        assert_snapshot!(sql_string, @"a IN (a, b)");
1744        assert_snapshot!(display_string, @"a@0 IN (SET) ([a, b])");
1745        Ok(())
1746    }
1747
1748    #[test]
1749    fn test_fmt_sql_2() -> Result<()> {
1750        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
1751        let col_a = col("a", &schema)?;
1752
1753        // Test: a NOT IN ('a', 'b')
1754        let list = vec![lit("a"), lit("b")];
1755        let expr = in_list(Arc::clone(&col_a), list, &true, &schema)?;
1756        let sql_string = fmt_sql(expr.as_ref()).to_string();
1757        let display_string = expr.to_string();
1758
1759        assert_snapshot!(sql_string, @"a NOT IN (a, b)");
1760        assert_snapshot!(display_string, @"a@0 NOT IN (SET) ([a, b])");
1761        Ok(())
1762    }
1763
1764    #[test]
1765    fn test_fmt_sql_3() -> Result<()> {
1766        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
1767        let col_a = col("a", &schema)?;
1768        // Test: a IN ('a', 'b', NULL)
1769        let list = vec![lit("a"), lit("b"), lit(ScalarValue::Utf8(None))];
1770        let expr = in_list(Arc::clone(&col_a), list, &false, &schema)?;
1771        let sql_string = fmt_sql(expr.as_ref()).to_string();
1772        let display_string = expr.to_string();
1773
1774        assert_snapshot!(sql_string, @"a IN (a, b, NULL)");
1775        assert_snapshot!(display_string, @"a@0 IN (SET) ([a, b, NULL])");
1776        Ok(())
1777    }
1778
1779    #[test]
1780    fn test_fmt_sql_4() -> Result<()> {
1781        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
1782        let col_a = col("a", &schema)?;
1783        // Test: a NOT IN ('a', 'b', NULL)
1784        let list = vec![lit("a"), lit("b"), lit(ScalarValue::Utf8(None))];
1785        let expr = in_list(Arc::clone(&col_a), list, &true, &schema)?;
1786        let sql_string = fmt_sql(expr.as_ref()).to_string();
1787        let display_string = expr.to_string();
1788        assert_snapshot!(sql_string, @"a NOT IN (a, b, NULL)");
1789        assert_snapshot!(display_string, @"a@0 NOT IN (SET) ([a, b, NULL])");
1790        Ok(())
1791    }
1792
1793    #[test]
1794    fn in_list_struct() -> Result<()> {
1795        // Create schema with a struct column
1796        let struct_fields = Fields::from(vec![
1797            Field::new("x", DataType::Int32, false),
1798            Field::new("y", DataType::Utf8, false),
1799        ]);
1800        let schema = Schema::new(vec![Field::new(
1801            "a",
1802            DataType::Struct(struct_fields.clone()),
1803            true,
1804        )]);
1805
1806        // Create test data: array of structs
1807        let x_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
1808        let y_array = Arc::new(StringArray::from(vec!["a", "b", "c"]));
1809        let struct_array =
1810            StructArray::new(struct_fields.clone(), vec![x_array, y_array], None);
1811
1812        let col_a = col("a", &schema)?;
1813        let batch =
1814            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(struct_array)])?;
1815
1816        // Create literal structs for the IN list
1817        // Struct {x: 1, y: "a"}
1818        let struct1 = ScalarValue::Struct(Arc::new(StructArray::new(
1819            struct_fields.clone(),
1820            vec![
1821                Arc::new(Int32Array::from(vec![1])),
1822                Arc::new(StringArray::from(vec!["a"])),
1823            ],
1824            None,
1825        )));
1826
1827        // Struct {x: 3, y: "c"}
1828        let struct3 = ScalarValue::Struct(Arc::new(StructArray::new(
1829            struct_fields.clone(),
1830            vec![
1831                Arc::new(Int32Array::from(vec![3])),
1832                Arc::new(StringArray::from(vec!["c"])),
1833            ],
1834            None,
1835        )));
1836
1837        // Test: a IN ({1, "a"}, {3, "c"})
1838        let list = vec![lit(struct1.clone()), lit(struct3.clone())];
1839        in_list_raw!(
1840            batch,
1841            list.clone(),
1842            &false,
1843            vec![Some(true), Some(false), Some(true)],
1844            Arc::clone(&col_a),
1845            &schema
1846        );
1847
1848        // Test: a NOT IN ({1, "a"}, {3, "c"})
1849        in_list_raw!(
1850            batch,
1851            list,
1852            &true,
1853            vec![Some(false), Some(true), Some(false)],
1854            Arc::clone(&col_a),
1855            &schema
1856        );
1857
1858        Ok(())
1859    }
1860
1861    #[test]
1862    fn in_list_struct_with_nulls() -> Result<()> {
1863        // Create schema with a struct column
1864        let struct_fields = Fields::from(vec![
1865            Field::new("x", DataType::Int32, false),
1866            Field::new("y", DataType::Utf8, false),
1867        ]);
1868        let schema = Schema::new(vec![Field::new(
1869            "a",
1870            DataType::Struct(struct_fields.clone()),
1871            true,
1872        )]);
1873
1874        // Create test data with a null struct
1875        let x_array = Arc::new(Int32Array::from(vec![1, 2]));
1876        let y_array = Arc::new(StringArray::from(vec!["a", "b"]));
1877        let struct_array = StructArray::new(
1878            struct_fields.clone(),
1879            vec![x_array, y_array],
1880            Some(NullBuffer::from(vec![true, false])),
1881        );
1882
1883        let col_a = col("a", &schema)?;
1884        let batch =
1885            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(struct_array)])?;
1886
1887        // Create literal struct for the IN list
1888        let struct1 = ScalarValue::Struct(Arc::new(StructArray::new(
1889            struct_fields.clone(),
1890            vec![
1891                Arc::new(Int32Array::from(vec![1])),
1892                Arc::new(StringArray::from(vec!["a"])),
1893            ],
1894            None,
1895        )));
1896
1897        // Test: a IN ({1, "a"})
1898        let list = vec![lit(struct1.clone())];
1899        in_list_raw!(
1900            batch,
1901            list.clone(),
1902            &false,
1903            vec![Some(true), None],
1904            Arc::clone(&col_a),
1905            &schema
1906        );
1907
1908        // Test: a NOT IN ({1, "a"})
1909        in_list_raw!(
1910            batch,
1911            list,
1912            &true,
1913            vec![Some(false), None],
1914            Arc::clone(&col_a),
1915            &schema
1916        );
1917
1918        Ok(())
1919    }
1920
1921    #[test]
1922    fn in_list_struct_with_null_in_list() -> Result<()> {
1923        // Create schema with a struct column
1924        let struct_fields = Fields::from(vec![
1925            Field::new("x", DataType::Int32, false),
1926            Field::new("y", DataType::Utf8, false),
1927        ]);
1928        let schema = Schema::new(vec![Field::new(
1929            "a",
1930            DataType::Struct(struct_fields.clone()),
1931            true,
1932        )]);
1933
1934        // Create test data
1935        let x_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
1936        let y_array = Arc::new(StringArray::from(vec!["a", "b", "c"]));
1937        let struct_array =
1938            StructArray::new(struct_fields.clone(), vec![x_array, y_array], None);
1939
1940        let col_a = col("a", &schema)?;
1941        let batch =
1942            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(struct_array)])?;
1943
1944        // Create literal structs including a NULL
1945        let struct1 = ScalarValue::Struct(Arc::new(StructArray::new(
1946            struct_fields.clone(),
1947            vec![
1948                Arc::new(Int32Array::from(vec![1])),
1949                Arc::new(StringArray::from(vec!["a"])),
1950            ],
1951            None,
1952        )));
1953
1954        let null_struct = ScalarValue::Struct(Arc::new(StructArray::new_null(
1955            struct_fields.clone(),
1956            1,
1957        )));
1958
1959        // Test: a IN ({1, "a"}, NULL)
1960        let list = vec![lit(struct1), lit(null_struct.clone())];
1961        in_list_raw!(
1962            batch,
1963            list.clone(),
1964            &false,
1965            vec![Some(true), None, None],
1966            Arc::clone(&col_a),
1967            &schema
1968        );
1969
1970        // Test: a NOT IN ({1, "a"}, NULL)
1971        in_list_raw!(
1972            batch,
1973            list,
1974            &true,
1975            vec![Some(false), None, None],
1976            Arc::clone(&col_a),
1977            &schema
1978        );
1979
1980        Ok(())
1981    }
1982
1983    #[test]
1984    fn in_list_nested_struct() -> Result<()> {
1985        // Create nested struct schema
1986        let inner_struct_fields = Fields::from(vec![
1987            Field::new("a", DataType::Int32, false),
1988            Field::new("b", DataType::Utf8, false),
1989        ]);
1990        let outer_struct_fields = Fields::from(vec![
1991            Field::new(
1992                "inner",
1993                DataType::Struct(inner_struct_fields.clone()),
1994                false,
1995            ),
1996            Field::new("c", DataType::Int32, false),
1997        ]);
1998        let schema = Schema::new(vec![Field::new(
1999            "x",
2000            DataType::Struct(outer_struct_fields.clone()),
2001            true,
2002        )]);
2003
2004        // Create test data with nested structs
2005        let inner1 = Arc::new(StructArray::new(
2006            inner_struct_fields.clone(),
2007            vec![
2008                Arc::new(Int32Array::from(vec![1, 2])),
2009                Arc::new(StringArray::from(vec!["x", "y"])),
2010            ],
2011            None,
2012        ));
2013        let c_array = Arc::new(Int32Array::from(vec![10, 20]));
2014        let outer_array =
2015            StructArray::new(outer_struct_fields.clone(), vec![inner1, c_array], None);
2016
2017        let col_x = col("x", &schema)?;
2018        let batch =
2019            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(outer_array)])?;
2020
2021        // Create a nested struct literal matching the first row
2022        let inner_match = Arc::new(StructArray::new(
2023            inner_struct_fields.clone(),
2024            vec![
2025                Arc::new(Int32Array::from(vec![1])),
2026                Arc::new(StringArray::from(vec!["x"])),
2027            ],
2028            None,
2029        ));
2030        let outer_match = ScalarValue::Struct(Arc::new(StructArray::new(
2031            outer_struct_fields.clone(),
2032            vec![inner_match, Arc::new(Int32Array::from(vec![10]))],
2033            None,
2034        )));
2035
2036        // Test: x IN ({{1, "x"}, 10})
2037        let list = vec![lit(outer_match)];
2038        in_list_raw!(
2039            batch,
2040            list.clone(),
2041            &false,
2042            vec![Some(true), Some(false)],
2043            Arc::clone(&col_x),
2044            &schema
2045        );
2046
2047        // Test: x NOT IN ({{1, "x"}, 10})
2048        in_list_raw!(
2049            batch,
2050            list,
2051            &true,
2052            vec![Some(false), Some(true)],
2053            Arc::clone(&col_x),
2054            &schema
2055        );
2056
2057        Ok(())
2058    }
2059
2060    #[test]
2061    fn in_list_struct_with_exprs_not_array() -> Result<()> {
2062        // Test InList using expressions (not the array constructor) with structs
2063        // By using InListExpr::new directly, we bypass the array optimization
2064        // and use the Exprs variant, testing the expression evaluation path
2065
2066        // Create schema with a struct column {x: Int32, y: Utf8}
2067        let struct_fields = Fields::from(vec![
2068            Field::new("x", DataType::Int32, false),
2069            Field::new("y", DataType::Utf8, false),
2070        ]);
2071        let schema = Schema::new(vec![Field::new(
2072            "a",
2073            DataType::Struct(struct_fields.clone()),
2074            true,
2075        )]);
2076
2077        // Create test data: array of structs [{1, "a"}, {2, "b"}, {3, "c"}]
2078        let x_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
2079        let y_array = Arc::new(StringArray::from(vec!["a", "b", "c"]));
2080        let struct_array =
2081            StructArray::new(struct_fields.clone(), vec![x_array, y_array], None);
2082
2083        let col_a = col("a", &schema)?;
2084        let batch =
2085            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(struct_array)])?;
2086
2087        // Create struct literals with the SAME shape (so types are compatible)
2088        // Struct {x: 1, y: "a"}
2089        let struct1 = ScalarValue::Struct(Arc::new(StructArray::new(
2090            struct_fields.clone(),
2091            vec![
2092                Arc::new(Int32Array::from(vec![1])),
2093                Arc::new(StringArray::from(vec!["a"])),
2094            ],
2095            None,
2096        )));
2097
2098        // Struct {x: 3, y: "c"}
2099        let struct3 = ScalarValue::Struct(Arc::new(StructArray::new(
2100            struct_fields.clone(),
2101            vec![
2102                Arc::new(Int32Array::from(vec![3])),
2103                Arc::new(StringArray::from(vec!["c"])),
2104            ],
2105            None,
2106        )));
2107
2108        // Create list of struct expressions
2109        let list = vec![lit(struct1), lit(struct3)];
2110
2111        // Use InListExpr::new directly (not in_list()) to bypass array optimization
2112        // This creates an InList without a static filter
2113        let expr = Arc::new(InListExpr::new(Arc::clone(&col_a), list, false, None));
2114
2115        // Verify that the expression doesn't have a static filter
2116        // by checking the display string does NOT contain "(SET)"
2117        let display_string = expr.to_string();
2118        assert!(
2119            !display_string.contains("(SET)"),
2120            "Expected display string to NOT contain '(SET)' (should use Exprs variant), but got: {display_string}",
2121        );
2122
2123        // Evaluate the expression
2124        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
2125        let result = as_boolean_array(&result);
2126
2127        // Expected: first row {1, "a"} matches struct1,
2128        //           second row {2, "b"} doesn't match,
2129        //           third row {3, "c"} matches struct3
2130        let expected = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
2131        assert_eq!(result, &expected);
2132
2133        // Test NOT IN as well
2134        let expr_not = Arc::new(InListExpr::new(
2135            Arc::clone(&col_a),
2136            vec![
2137                lit(ScalarValue::Struct(Arc::new(StructArray::new(
2138                    struct_fields.clone(),
2139                    vec![
2140                        Arc::new(Int32Array::from(vec![1])),
2141                        Arc::new(StringArray::from(vec!["a"])),
2142                    ],
2143                    None,
2144                )))),
2145                lit(ScalarValue::Struct(Arc::new(StructArray::new(
2146                    struct_fields.clone(),
2147                    vec![
2148                        Arc::new(Int32Array::from(vec![3])),
2149                        Arc::new(StringArray::from(vec!["c"])),
2150                    ],
2151                    None,
2152                )))),
2153            ],
2154            true,
2155            None,
2156        ));
2157
2158        let result_not = expr_not.evaluate(&batch)?.into_array(batch.num_rows())?;
2159        let result_not = as_boolean_array(&result_not);
2160
2161        let expected_not = BooleanArray::from(vec![Some(false), Some(true), Some(false)]);
2162        assert_eq!(result_not, &expected_not);
2163
2164        Ok(())
2165    }
2166
2167    #[test]
2168    fn test_in_list_null_handling_comprehensive() -> Result<()> {
2169        // Comprehensive test demonstrating SQL three-valued logic for IN expressions
2170        // This test explicitly shows all possible outcomes: true, false, and null
2171        let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
2172
2173        // Test data: [1, 2, 3, null]
2174        // - 1 will match in both lists
2175        // - 2 will not match in either list
2176        // - 3 will not match in either list
2177        // - null is always null
2178        let a = Int64Array::from(vec![Some(1), Some(2), Some(3), None]);
2179        let col_a = col("a", &schema)?;
2180        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
2181
2182        // Case 1: List WITHOUT null - demonstrates true/false/null outcomes
2183        // "a IN (1, 4)" - 1 matches, 2 and 3 don't match, null is null
2184        let list = vec![lit(1i64), lit(4i64)];
2185        in_list!(
2186            batch,
2187            list,
2188            &false,
2189            vec![
2190                Some(true),  // 1 is in the list → true
2191                Some(false), // 2 is not in the list → false
2192                Some(false), // 3 is not in the list → false
2193                None,        // null IN (...) → null (SQL three-valued logic)
2194            ],
2195            Arc::clone(&col_a),
2196            &schema
2197        );
2198
2199        // Case 2: List WITH null - demonstrates null propagation for non-matches
2200        // "a IN (1, NULL)" - 1 matches (true), 2/3 don't match but list has null (null), null is null
2201        let list = vec![lit(1i64), lit(ScalarValue::Int64(None))];
2202        in_list!(
2203            batch,
2204            list,
2205            &false,
2206            vec![
2207                Some(true), // 1 is in the list → true (found match)
2208                None, // 2 is not in list, but list has NULL → null (might match NULL)
2209                None, // 3 is not in list, but list has NULL → null (might match NULL)
2210                None, // null IN (...) → null (SQL three-valued logic)
2211            ],
2212            Arc::clone(&col_a),
2213            &schema
2214        );
2215
2216        Ok(())
2217    }
2218
2219    #[test]
2220    fn test_in_list_with_only_nulls() -> Result<()> {
2221        // Edge case: IN list contains ONLY null values
2222        let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
2223        let a = Int64Array::from(vec![Some(1), Some(2), None]);
2224        let col_a = col("a", &schema)?;
2225        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
2226
2227        // "a IN (NULL, NULL)" - list has only nulls
2228        let list = vec![lit(ScalarValue::Int64(None)), lit(ScalarValue::Int64(None))];
2229
2230        // All results should be NULL because:
2231        // - Non-null values (1, 2) can't match anything concrete, but list might contain matching value
2232        // - NULL value is always NULL in IN expressions
2233        in_list!(
2234            batch,
2235            list.clone(),
2236            &false,
2237            vec![None, None, None],
2238            Arc::clone(&col_a),
2239            &schema
2240        );
2241
2242        // "a NOT IN (NULL, NULL)" - list has only nulls
2243        // All results should still be NULL due to three-valued logic
2244        in_list!(
2245            batch,
2246            list,
2247            &true,
2248            vec![None, None, None],
2249            Arc::clone(&col_a),
2250            &schema
2251        );
2252
2253        Ok(())
2254    }
2255
2256    #[test]
2257    fn test_in_list_multiple_nulls_deduplication() -> Result<()> {
2258        // Test that multiple NULLs in the list are handled correctly
2259        // This verifies deduplication doesn't break null handling
2260        let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
2261        let col_a = col("a", &schema)?;
2262
2263        // Create array with multiple nulls: [1, 2, NULL, NULL, 3, NULL]
2264        let array = Arc::new(Int64Array::from(vec![
2265            Some(1),
2266            Some(2),
2267            None,
2268            None,
2269            Some(3),
2270            None,
2271        ])) as ArrayRef;
2272
2273        // Create InListExpr from array
2274        let expr = Arc::new(InListExpr::try_new_from_array(
2275            Arc::clone(&col_a),
2276            array,
2277            false,
2278        )?) as Arc<dyn PhysicalExpr>;
2279
2280        // Create test data: [1, 2, 3, 4, null]
2281        let a = Int64Array::from(vec![Some(1), Some(2), Some(3), Some(4), None]);
2282        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
2283
2284        // Evaluate the expression
2285        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
2286        let result = as_boolean_array(&result);
2287
2288        // Expected behavior with multiple NULLs in list:
2289        // - Values in the list (1,2,3) → true
2290        // - Values not in the list (4) → NULL (because list contains NULL)
2291        // - NULL input → NULL
2292        let expected = BooleanArray::from(vec![
2293            Some(true), // 1 is in list
2294            Some(true), // 2 is in list
2295            Some(true), // 3 is in list
2296            None,       // 4 not in list, but list has NULLs
2297            None,       // NULL input
2298        ]);
2299        assert_eq!(result, &expected);
2300
2301        Ok(())
2302    }
2303
2304    #[test]
2305    fn test_not_in_null_handling_comprehensive() -> Result<()> {
2306        // Comprehensive test demonstrating SQL three-valued logic for NOT IN expressions
2307        // This test explicitly shows all possible outcomes for NOT IN: true, false, and null
2308        let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
2309
2310        // Test data: [1, 2, 3, null]
2311        let a = Int64Array::from(vec![Some(1), Some(2), Some(3), None]);
2312        let col_a = col("a", &schema)?;
2313        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
2314
2315        // Case 1: List WITHOUT null - demonstrates true/false/null outcomes for NOT IN
2316        // "a NOT IN (1, 4)" - 1 matches (false), 2 and 3 don't match (true), null is null
2317        let list = vec![lit(1i64), lit(4i64)];
2318        in_list!(
2319            batch,
2320            list,
2321            &true,
2322            vec![
2323                Some(false), // 1 is in the list → NOT IN returns false
2324                Some(true),  // 2 is not in the list → NOT IN returns true
2325                Some(true),  // 3 is not in the list → NOT IN returns true
2326                None,        // null NOT IN (...) → null (SQL three-valued logic)
2327            ],
2328            Arc::clone(&col_a),
2329            &schema
2330        );
2331
2332        // Case 2: List WITH null - demonstrates null propagation for NOT IN
2333        // "a NOT IN (1, NULL)" - 1 matches (false), 2/3 don't match but list has null (null), null is null
2334        let list = vec![lit(1i64), lit(ScalarValue::Int64(None))];
2335        in_list!(
2336            batch,
2337            list,
2338            &true,
2339            vec![
2340                Some(false), // 1 is in the list → NOT IN returns false
2341                None, // 2 is not in known values, but list has NULL → null (can't prove it's not in list)
2342                None, // 3 is not in known values, but list has NULL → null (can't prove it's not in list)
2343                None, // null NOT IN (...) → null (SQL three-valued logic)
2344            ],
2345            Arc::clone(&col_a),
2346            &schema
2347        );
2348
2349        Ok(())
2350    }
2351
2352    #[test]
2353    fn test_in_list_null_type_column() -> Result<()> {
2354        // Test with a column that has DataType::Null (not just nullable values)
2355        // All values in a NullArray are null by definition
2356        let schema = Schema::new(vec![Field::new("a", DataType::Null, true)]);
2357        let a = NullArray::new(3);
2358        let col_a = col("a", &schema)?;
2359        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
2360
2361        // "null_column IN (1, 2)" - comparing Null type against Int64 list
2362        // Note: This tests type coercion behavior between Null and Int64
2363        let list = vec![lit(1i64), lit(2i64)];
2364
2365        // All results should be NULL because:
2366        // - Every value in the column is null (DataType::Null)
2367        // - null IN (anything) always returns null per SQL three-valued logic
2368        in_list!(
2369            batch,
2370            list.clone(),
2371            &false,
2372            vec![None, None, None],
2373            Arc::clone(&col_a),
2374            &schema
2375        );
2376
2377        // "null_column NOT IN (1, 2)"
2378        // Same behavior for NOT IN - null NOT IN (anything) is still null
2379        in_list!(
2380            batch,
2381            list,
2382            &true,
2383            vec![None, None, None],
2384            Arc::clone(&col_a),
2385            &schema
2386        );
2387
2388        Ok(())
2389    }
2390
2391    #[test]
2392    fn test_in_list_null_type_list() -> Result<()> {
2393        // Test with a list that has DataType::Null
2394        let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
2395        let a = Int64Array::from(vec![Some(1), Some(2), None]);
2396        let col_a = col("a", &schema)?;
2397
2398        // Create a NullArray as the list
2399        let null_array = Arc::new(NullArray::new(2)) as ArrayRef;
2400
2401        // Try to create InListExpr with a NullArray list
2402        // This tests whether try_new_from_array can handle Null type arrays
2403        let expr = Arc::new(InListExpr::try_new_from_array(
2404            Arc::clone(&col_a),
2405            null_array,
2406            false,
2407        )?) as Arc<dyn PhysicalExpr>;
2408        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
2409        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
2410        let result = as_boolean_array(&result);
2411
2412        // If it succeeds, all results should be NULL
2413        // because the list contains only null type values
2414        let expected = BooleanArray::from(vec![None, None, None]);
2415        assert_eq!(result, &expected);
2416
2417        Ok(())
2418    }
2419
2420    #[test]
2421    fn test_in_list_null_type_both() -> Result<()> {
2422        // Test when both column and list are DataType::Null
2423        let schema = Schema::new(vec![Field::new("a", DataType::Null, true)]);
2424        let a = NullArray::new(3);
2425        let col_a = col("a", &schema)?;
2426
2427        // Create a NullArray as the list
2428        let null_array = Arc::new(NullArray::new(2)) as ArrayRef;
2429
2430        // Try to create InListExpr with both Null types
2431        let expr = Arc::new(InListExpr::try_new_from_array(
2432            Arc::clone(&col_a),
2433            null_array,
2434            false,
2435        )?) as Arc<dyn PhysicalExpr>;
2436
2437        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
2438        let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
2439        let result = as_boolean_array(&result);
2440
2441        // If successful, all results should be NULL
2442        // null IN [null, null] -> null
2443        let expected = BooleanArray::from(vec![None, None, None]);
2444        assert_eq!(result, &expected);
2445
2446        Ok(())
2447    }
2448
2449    #[test]
2450    fn test_in_list_comprehensive_null_handling() -> Result<()> {
2451        // Comprehensive test for IN LIST operations with various NULL handling scenarios.
2452        // This test covers the key cases validated against DuckDB as the source of truth.
2453        //
2454        // Note: Some scalar literal tests (like NULL IN (1, 2)) are omitted as they
2455        // appear to expose an issue with static filter optimization. These are covered
2456        // by existing tests like in_list_no_cols().
2457
2458        let schema = Arc::new(Schema::new(vec![Field::new("b", DataType::Int32, true)]));
2459        let col_b = col("b", &schema)?;
2460        let null_i32 = ScalarValue::Int32(None);
2461
2462        // Helper to create a batch
2463        let make_batch = |values: Vec<Option<i32>>| -> Result<RecordBatch> {
2464            let array = Arc::new(Int32Array::from(values));
2465            Ok(RecordBatch::try_new(Arc::clone(&schema), vec![array])?)
2466        };
2467
2468        // Helper to run a test
2469        let run_test = |batch: &RecordBatch,
2470                        expr: Arc<dyn PhysicalExpr>,
2471                        list: Vec<Arc<dyn PhysicalExpr>>,
2472                        expected: Vec<Option<bool>>|
2473         -> Result<()> {
2474            let in_expr = in_list(expr, list, &false, schema.as_ref())?;
2475            let result = in_expr.evaluate(batch)?.into_array(batch.num_rows())?;
2476            let result = as_boolean_array(&result);
2477            assert_eq!(result, &BooleanArray::from(expected));
2478            Ok(())
2479        };
2480
2481        // ========================================================================
2482        // COLUMN TESTS - col(b) IN [1, 2]
2483        // ========================================================================
2484
2485        // [1] IN (1, 2) => [TRUE]
2486        let batch = make_batch(vec![Some(1)])?;
2487        run_test(
2488            &batch,
2489            Arc::clone(&col_b),
2490            vec![lit(1i32), lit(2i32)],
2491            vec![Some(true)],
2492        )?;
2493
2494        // [1, 2] IN (1, 2) => [TRUE, TRUE]
2495        let batch = make_batch(vec![Some(1), Some(2)])?;
2496        run_test(
2497            &batch,
2498            Arc::clone(&col_b),
2499            vec![lit(1i32), lit(2i32)],
2500            vec![Some(true), Some(true)],
2501        )?;
2502
2503        // [3, 4] IN (1, 2) => [FALSE, FALSE]
2504        let batch = make_batch(vec![Some(3), Some(4)])?;
2505        run_test(
2506            &batch,
2507            Arc::clone(&col_b),
2508            vec![lit(1i32), lit(2i32)],
2509            vec![Some(false), Some(false)],
2510        )?;
2511
2512        // [1, NULL] IN (1, 2) => [TRUE, NULL]
2513        let batch = make_batch(vec![Some(1), None])?;
2514        run_test(
2515            &batch,
2516            Arc::clone(&col_b),
2517            vec![lit(1i32), lit(2i32)],
2518            vec![Some(true), None],
2519        )?;
2520
2521        // [3, NULL] IN (1, 2) => [FALSE, NULL] (no match, NULL is NULL)
2522        let batch = make_batch(vec![Some(3), None])?;
2523        run_test(
2524            &batch,
2525            Arc::clone(&col_b),
2526            vec![lit(1i32), lit(2i32)],
2527            vec![Some(false), None],
2528        )?;
2529
2530        // ========================================================================
2531        // COLUMN WITH NULL IN LIST - col(b) IN [NULL, 1]
2532        // ========================================================================
2533
2534        // [1] IN (NULL, 1) => [TRUE] (found match)
2535        let batch = make_batch(vec![Some(1)])?;
2536        run_test(
2537            &batch,
2538            Arc::clone(&col_b),
2539            vec![lit(null_i32.clone()), lit(1i32)],
2540            vec![Some(true)],
2541        )?;
2542
2543        // [2] IN (NULL, 1) => [NULL] (no match, but list has NULL)
2544        let batch = make_batch(vec![Some(2)])?;
2545        run_test(
2546            &batch,
2547            Arc::clone(&col_b),
2548            vec![lit(null_i32.clone()), lit(1i32)],
2549            vec![None],
2550        )?;
2551
2552        // [NULL] IN (NULL, 1) => [NULL]
2553        let batch = make_batch(vec![None])?;
2554        run_test(
2555            &batch,
2556            Arc::clone(&col_b),
2557            vec![lit(null_i32.clone()), lit(1i32)],
2558            vec![None],
2559        )?;
2560
2561        // ========================================================================
2562        // COLUMN WITH ALL NULLS IN LIST - col(b) IN [NULL, NULL]
2563        // ========================================================================
2564
2565        // [1] IN (NULL, NULL) => [NULL]
2566        let batch = make_batch(vec![Some(1)])?;
2567        run_test(
2568            &batch,
2569            Arc::clone(&col_b),
2570            vec![lit(null_i32.clone()), lit(null_i32.clone())],
2571            vec![None],
2572        )?;
2573
2574        // [NULL] IN (NULL, NULL) => [NULL]
2575        let batch = make_batch(vec![None])?;
2576        run_test(
2577            &batch,
2578            Arc::clone(&col_b),
2579            vec![lit(null_i32.clone()), lit(null_i32.clone())],
2580            vec![None],
2581        )?;
2582
2583        // ========================================================================
2584        // LITERAL IN LIST WITH COLUMN - lit(1) IN [2, col(b)]
2585        // ========================================================================
2586
2587        // 1 IN (2, [1]) => [TRUE] (matches column value)
2588        let batch = make_batch(vec![Some(1)])?;
2589        run_test(
2590            &batch,
2591            lit(1i32),
2592            vec![lit(2i32), Arc::clone(&col_b)],
2593            vec![Some(true)],
2594        )?;
2595
2596        // 1 IN (2, [3]) => [FALSE] (no match)
2597        let batch = make_batch(vec![Some(3)])?;
2598        run_test(
2599            &batch,
2600            lit(1i32),
2601            vec![lit(2i32), Arc::clone(&col_b)],
2602            vec![Some(false)],
2603        )?;
2604
2605        // 1 IN (2, [NULL]) => [NULL] (no match, column is NULL)
2606        let batch = make_batch(vec![None])?;
2607        run_test(
2608            &batch,
2609            lit(1i32),
2610            vec![lit(2i32), Arc::clone(&col_b)],
2611            vec![None],
2612        )?;
2613
2614        // ========================================================================
2615        // COLUMN IN LIST CONTAINING ITSELF - col(b) IN [1, col(b)]
2616        // ========================================================================
2617
2618        // [1] IN (1, [1]) => [TRUE] (always matches - either list literal or itself)
2619        let batch = make_batch(vec![Some(1)])?;
2620        run_test(
2621            &batch,
2622            Arc::clone(&col_b),
2623            vec![lit(1i32), Arc::clone(&col_b)],
2624            vec![Some(true)],
2625        )?;
2626
2627        // [2] IN (1, [2]) => [TRUE] (matches itself)
2628        let batch = make_batch(vec![Some(2)])?;
2629        run_test(
2630            &batch,
2631            Arc::clone(&col_b),
2632            vec![lit(1i32), Arc::clone(&col_b)],
2633            vec![Some(true)],
2634        )?;
2635
2636        // [NULL] IN (1, [NULL]) => [NULL] (NULL is never equal to anything)
2637        let batch = make_batch(vec![None])?;
2638        run_test(
2639            &batch,
2640            Arc::clone(&col_b),
2641            vec![lit(1i32), Arc::clone(&col_b)],
2642            vec![None],
2643        )?;
2644
2645        Ok(())
2646    }
2647
2648    #[test]
2649    fn test_in_list_scalar_literal_cases() -> Result<()> {
2650        // Test scalar literal cases (both NULL and non-NULL) to ensure SQL three-valued
2651        // logic is correctly implemented. This covers the important case where a scalar
2652        // value is tested against a list containing NULL.
2653
2654        let schema = Arc::new(Schema::new(vec![Field::new("b", DataType::Int32, true)]));
2655        let null_i32 = ScalarValue::Int32(None);
2656
2657        // Helper to create a batch
2658        let make_batch = |values: Vec<Option<i32>>| -> Result<RecordBatch> {
2659            let array = Arc::new(Int32Array::from(values));
2660            Ok(RecordBatch::try_new(Arc::clone(&schema), vec![array])?)
2661        };
2662
2663        // Helper to run a test
2664        let run_test = |batch: &RecordBatch,
2665                        expr: Arc<dyn PhysicalExpr>,
2666                        list: Vec<Arc<dyn PhysicalExpr>>,
2667                        negated: bool,
2668                        expected: Vec<Option<bool>>|
2669         -> Result<()> {
2670            let in_expr = in_list(expr, list, &negated, schema.as_ref())?;
2671            let result = in_expr.evaluate(batch)?.into_array(batch.num_rows())?;
2672            let result = as_boolean_array(&result);
2673            let expected_array = BooleanArray::from(expected);
2674            assert_eq!(
2675                result,
2676                &expected_array,
2677                "Expected {:?}, got {:?}",
2678                expected_array,
2679                result.iter().collect::<Vec<_>>()
2680            );
2681            Ok(())
2682        };
2683
2684        let batch = make_batch(vec![Some(1)])?;
2685
2686        // ========================================================================
2687        // NULL LITERAL TESTS
2688        // According to SQL semantics, NULL IN (any_list) should always return NULL
2689        // ========================================================================
2690
2691        // NULL IN (1, 1) => NULL
2692        run_test(
2693            &batch,
2694            lit(null_i32.clone()),
2695            vec![lit(1i32), lit(1i32)],
2696            false,
2697            vec![None],
2698        )?;
2699
2700        // NULL IN (NULL, 1) => NULL
2701        run_test(
2702            &batch,
2703            lit(null_i32.clone()),
2704            vec![lit(null_i32.clone()), lit(1i32)],
2705            false,
2706            vec![None],
2707        )?;
2708
2709        // NULL IN (NULL, NULL) => NULL
2710        run_test(
2711            &batch,
2712            lit(null_i32.clone()),
2713            vec![lit(null_i32.clone()), lit(null_i32.clone())],
2714            false,
2715            vec![None],
2716        )?;
2717
2718        // ========================================================================
2719        // NON-NULL SCALAR LITERALS WITH NULL IN LIST - Int32
2720        // When a scalar value is NOT in a list containing NULL, the result is NULL
2721        // When a scalar value IS in the list, the result is TRUE (NULL doesn't matter)
2722        // ========================================================================
2723
2724        // 3 IN (0, 1, 2, NULL) => NULL (not in list, but list has NULL)
2725        run_test(
2726            &batch,
2727            lit(3i32),
2728            vec![lit(0i32), lit(1i32), lit(2i32), lit(null_i32.clone())],
2729            false,
2730            vec![None],
2731        )?;
2732
2733        // 3 NOT IN (0, 1, 2, NULL) => NULL (not in list, but list has NULL)
2734        run_test(
2735            &batch,
2736            lit(3i32),
2737            vec![lit(0i32), lit(1i32), lit(2i32), lit(null_i32.clone())],
2738            true,
2739            vec![None],
2740        )?;
2741
2742        // 1 IN (0, 1, 2, NULL) => TRUE (found match, NULL doesn't matter)
2743        run_test(
2744            &batch,
2745            lit(1i32),
2746            vec![lit(0i32), lit(1i32), lit(2i32), lit(null_i32.clone())],
2747            false,
2748            vec![Some(true)],
2749        )?;
2750
2751        // 1 NOT IN (0, 1, 2, NULL) => FALSE (found match, NULL doesn't matter)
2752        run_test(
2753            &batch,
2754            lit(1i32),
2755            vec![lit(0i32), lit(1i32), lit(2i32), lit(null_i32.clone())],
2756            true,
2757            vec![Some(false)],
2758        )?;
2759
2760        // ========================================================================
2761        // NON-NULL SCALAR LITERALS WITH NULL IN LIST - String
2762        // Same semantics as Int32 but with string type
2763        // ========================================================================
2764
2765        let schema_str =
2766            Arc::new(Schema::new(vec![Field::new("s", DataType::Utf8, true)]));
2767        let batch_str = RecordBatch::try_new(
2768            Arc::clone(&schema_str),
2769            vec![Arc::new(StringArray::from(vec![Some("dummy")]))],
2770        )?;
2771        let null_str = ScalarValue::Utf8(None);
2772
2773        let run_test_str = |expr: Arc<dyn PhysicalExpr>,
2774                            list: Vec<Arc<dyn PhysicalExpr>>,
2775                            negated: bool,
2776                            expected: Vec<Option<bool>>|
2777         -> Result<()> {
2778            let in_expr = in_list(expr, list, &negated, schema_str.as_ref())?;
2779            let result = in_expr
2780                .evaluate(&batch_str)?
2781                .into_array(batch_str.num_rows())?;
2782            let result = as_boolean_array(&result);
2783            let expected_array = BooleanArray::from(expected);
2784            assert_eq!(
2785                result,
2786                &expected_array,
2787                "Expected {:?}, got {:?}",
2788                expected_array,
2789                result.iter().collect::<Vec<_>>()
2790            );
2791            Ok(())
2792        };
2793
2794        // 'c' IN ('a', 'b', NULL) => NULL (not in list, but list has NULL)
2795        run_test_str(
2796            lit("c"),
2797            vec![lit("a"), lit("b"), lit(null_str.clone())],
2798            false,
2799            vec![None],
2800        )?;
2801
2802        // 'c' NOT IN ('a', 'b', NULL) => NULL (not in list, but list has NULL)
2803        run_test_str(
2804            lit("c"),
2805            vec![lit("a"), lit("b"), lit(null_str.clone())],
2806            true,
2807            vec![None],
2808        )?;
2809
2810        // 'a' IN ('a', 'b', NULL) => TRUE (found match, NULL doesn't matter)
2811        run_test_str(
2812            lit("a"),
2813            vec![lit("a"), lit("b"), lit(null_str.clone())],
2814            false,
2815            vec![Some(true)],
2816        )?;
2817
2818        // 'a' NOT IN ('a', 'b', NULL) => FALSE (found match, NULL doesn't matter)
2819        run_test_str(
2820            lit("a"),
2821            vec![lit("a"), lit("b"), lit(null_str.clone())],
2822            true,
2823            vec![Some(false)],
2824        )?;
2825
2826        Ok(())
2827    }
2828
2829    #[test]
2830    fn test_in_list_tuple_cases() -> Result<()> {
2831        // Test tuple/struct cases from the original request: (lit, lit) IN (lit, lit)
2832        // These test row-wise comparisons like (1, 2) IN ((1, 2), (3, 4))
2833
2834        let schema = Arc::new(Schema::new(vec![Field::new("b", DataType::Int32, true)]));
2835
2836        // Helper to create struct scalars for tuple comparisons
2837        let make_struct = |v1: Option<i32>, v2: Option<i32>| -> ScalarValue {
2838            let fields = Fields::from(vec![
2839                Field::new("field_0", DataType::Int32, true),
2840                Field::new("field_1", DataType::Int32, true),
2841            ]);
2842            ScalarValue::Struct(Arc::new(StructArray::new(
2843                fields,
2844                vec![
2845                    Arc::new(Int32Array::from(vec![v1])),
2846                    Arc::new(Int32Array::from(vec![v2])),
2847                ],
2848                None,
2849            )))
2850        };
2851
2852        // Need a single row batch for scalar tests
2853        let batch = RecordBatch::try_new(
2854            Arc::clone(&schema),
2855            vec![Arc::new(Int32Array::from(vec![Some(1)]))],
2856        )?;
2857
2858        // Helper to run tuple tests
2859        let run_tuple_test = |lhs: ScalarValue,
2860                              list: Vec<ScalarValue>,
2861                              expected: Vec<Option<bool>>|
2862         -> Result<()> {
2863            let expr = in_list(
2864                lit(lhs),
2865                list.into_iter().map(lit).collect(),
2866                &false,
2867                schema.as_ref(),
2868            )?;
2869            let result = expr.evaluate(&batch)?.into_array(batch.num_rows())?;
2870            let result = as_boolean_array(&result);
2871            assert_eq!(result, &BooleanArray::from(expected));
2872            Ok(())
2873        };
2874
2875        // (NULL, NULL) IN ((1, 2)) => FALSE (tuples don't match)
2876        run_tuple_test(
2877            make_struct(None, None),
2878            vec![make_struct(Some(1), Some(2))],
2879            vec![Some(false)],
2880        )?;
2881
2882        // (NULL, NULL) IN ((NULL, 1)) => FALSE
2883        run_tuple_test(
2884            make_struct(None, None),
2885            vec![make_struct(None, Some(1))],
2886            vec![Some(false)],
2887        )?;
2888
2889        // (NULL, NULL) IN ((NULL, NULL)) => TRUE (exact match including nulls)
2890        run_tuple_test(
2891            make_struct(None, None),
2892            vec![make_struct(None, None)],
2893            vec![Some(true)],
2894        )?;
2895
2896        // (NULL, 1) IN ((1, 2)) => FALSE
2897        run_tuple_test(
2898            make_struct(None, Some(1)),
2899            vec![make_struct(Some(1), Some(2))],
2900            vec![Some(false)],
2901        )?;
2902
2903        // (NULL, 1) IN ((NULL, 1)) => TRUE (exact match)
2904        run_tuple_test(
2905            make_struct(None, Some(1)),
2906            vec![make_struct(None, Some(1))],
2907            vec![Some(true)],
2908        )?;
2909
2910        // (NULL, 1) IN ((NULL, NULL)) => FALSE
2911        run_tuple_test(
2912            make_struct(None, Some(1)),
2913            vec![make_struct(None, None)],
2914            vec![Some(false)],
2915        )?;
2916
2917        // (1, 2) IN ((1, 2)) => TRUE
2918        run_tuple_test(
2919            make_struct(Some(1), Some(2)),
2920            vec![make_struct(Some(1), Some(2))],
2921            vec![Some(true)],
2922        )?;
2923
2924        // (1, 3) IN ((1, 2)) => FALSE
2925        run_tuple_test(
2926            make_struct(Some(1), Some(3)),
2927            vec![make_struct(Some(1), Some(2))],
2928            vec![Some(false)],
2929        )?;
2930
2931        // (4, 4) IN ((1, 2)) => FALSE
2932        run_tuple_test(
2933            make_struct(Some(4), Some(4)),
2934            vec![make_struct(Some(1), Some(2))],
2935            vec![Some(false)],
2936        )?;
2937
2938        // (1, 1) IN ((NULL, 1)) => FALSE
2939        run_tuple_test(
2940            make_struct(Some(1), Some(1)),
2941            vec![make_struct(None, Some(1))],
2942            vec![Some(false)],
2943        )?;
2944
2945        // (1, 1) IN ((NULL, NULL)) => FALSE
2946        run_tuple_test(
2947            make_struct(Some(1), Some(1)),
2948            vec![make_struct(None, None)],
2949            vec![Some(false)],
2950        )?;
2951
2952        Ok(())
2953    }
2954
2955    #[test]
2956    fn test_in_list_dictionary_int32() -> Result<()> {
2957        // Create schema with dictionary-encoded Int32 column
2958        let dict_type =
2959            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Int32));
2960        let schema = Schema::new(vec![Field::new("a", dict_type.clone(), false)]);
2961        let col_a = col("a", &schema)?;
2962
2963        // Create IN list with Int32 literals: (100, 200, 300)
2964        let list = vec![lit(100i32), lit(200i32), lit(300i32)];
2965
2966        // Create InListExpr via in_list() - this uses Int32StaticFilter for Int32 lists
2967        let expr = in_list(col_a, list, &false, &schema)?;
2968
2969        // Create dictionary-encoded batch with values [100, 200, 500]
2970        // Dictionary: keys [0, 1, 2] -> values [100, 200, 500]
2971        // Using values clearly distinct from keys to avoid confusion
2972        let keys = Int8Array::from(vec![0, 1, 2]);
2973        let values = Int32Array::from(vec![100, 200, 500]);
2974        let dict_array: ArrayRef =
2975            Arc::new(DictionaryArray::try_new(keys, Arc::new(values))?);
2976        let batch = RecordBatch::try_new(Arc::new(schema), vec![dict_array])?;
2977
2978        // Expected: [100 IN (100,200,300), 200 IN (100,200,300), 500 IN (100,200,300)] = [true, true, false]
2979        let result = expr.evaluate(&batch)?.into_array(3)?;
2980        let result = as_boolean_array(&result);
2981        assert_eq!(result, &BooleanArray::from(vec![true, true, false]));
2982        Ok(())
2983    }
2984
2985    #[test]
2986    fn test_in_list_dictionary_types() -> Result<()> {
2987        // Helper functions for creating dictionary literals
2988        fn dict_lit_int64(key_type: DataType, value: i64) -> Arc<dyn PhysicalExpr> {
2989            lit(ScalarValue::Dictionary(
2990                Box::new(key_type),
2991                Box::new(ScalarValue::Int64(Some(value))),
2992            ))
2993        }
2994
2995        fn dict_lit_float64(key_type: DataType, value: f64) -> Arc<dyn PhysicalExpr> {
2996            lit(ScalarValue::Dictionary(
2997                Box::new(key_type),
2998                Box::new(ScalarValue::Float64(Some(value))),
2999            ))
3000        }
3001
3002        // Test case structures
3003        struct DictNeedleTest {
3004            list_values: Vec<Arc<dyn PhysicalExpr>>,
3005            expected: Vec<Option<bool>>,
3006        }
3007
3008        struct DictionaryInListTestCase {
3009            name: &'static str,
3010            dict_type: DataType,
3011            dict_keys: Vec<Option<i8>>,
3012            dict_values: ArrayRef,
3013            list_values_no_null: Vec<Arc<dyn PhysicalExpr>>,
3014            list_values_with_null: Vec<Arc<dyn PhysicalExpr>>,
3015            expected_1: Vec<Option<bool>>,
3016            expected_2: Vec<Option<bool>>,
3017            expected_3: Vec<Option<bool>>,
3018            expected_4: Vec<Option<bool>>,
3019            dict_needle_test: Option<DictNeedleTest>,
3020        }
3021
3022        // Test harness function
3023        fn run_dictionary_in_list_test(
3024            test_case: DictionaryInListTestCase,
3025        ) -> Result<()> {
3026            // Create schema with dictionary type
3027            let schema =
3028                Schema::new(vec![Field::new("a", test_case.dict_type.clone(), true)]);
3029            let col_a = col("a", &schema)?;
3030
3031            // Create dictionary array from keys and values
3032            let keys = Int8Array::from(test_case.dict_keys.clone());
3033            let dict_array: ArrayRef =
3034                Arc::new(DictionaryArray::try_new(keys, test_case.dict_values)?);
3035            let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![dict_array])?;
3036
3037            let exp1 = test_case.expected_1.clone();
3038            let exp2 = test_case.expected_2.clone();
3039            let exp3 = test_case.expected_3.clone();
3040            let exp4 = test_case.expected_4;
3041
3042            // Test 1: a IN (values_no_null)
3043            in_list!(
3044                batch,
3045                test_case.list_values_no_null.clone(),
3046                &false,
3047                exp1,
3048                Arc::clone(&col_a),
3049                &schema
3050            );
3051
3052            // Test 2: a NOT IN (values_no_null)
3053            in_list!(
3054                batch,
3055                test_case.list_values_no_null.clone(),
3056                &true,
3057                exp2,
3058                Arc::clone(&col_a),
3059                &schema
3060            );
3061
3062            // Test 3: a IN (values_with_null)
3063            in_list!(
3064                batch,
3065                test_case.list_values_with_null.clone(),
3066                &false,
3067                exp3,
3068                Arc::clone(&col_a),
3069                &schema
3070            );
3071
3072            // Test 4: a NOT IN (values_with_null)
3073            in_list!(
3074                batch,
3075                test_case.list_values_with_null,
3076                &true,
3077                exp4,
3078                Arc::clone(&col_a),
3079                &schema
3080            );
3081
3082            // Optional: Dictionary needle test (if provided)
3083            if let Some(needle_test) = test_case.dict_needle_test {
3084                in_list_raw!(
3085                    batch,
3086                    needle_test.list_values,
3087                    &false,
3088                    needle_test.expected,
3089                    Arc::clone(&col_a),
3090                    &schema
3091                );
3092            }
3093
3094            Ok(())
3095        }
3096
3097        // Test case 1: UTF8
3098        // Dictionary: keys [0, 1, null] → values ["a", "d", -]
3099        // Rows: ["a", "d", null]
3100        let utf8_case = DictionaryInListTestCase {
3101            name: "dictionary_utf8",
3102            dict_type: DataType::Dictionary(
3103                Box::new(DataType::Int8),
3104                Box::new(DataType::Utf8),
3105            ),
3106            dict_keys: vec![Some(0), Some(1), None],
3107            dict_values: Arc::new(StringArray::from(vec![Some("a"), Some("d")])),
3108            list_values_no_null: vec![lit("a"), lit("b")],
3109            list_values_with_null: vec![lit("a"), lit("b"), lit(ScalarValue::Utf8(None))],
3110            expected_1: vec![Some(true), Some(false), None],
3111            expected_2: vec![Some(false), Some(true), None],
3112            expected_3: vec![Some(true), None, None],
3113            expected_4: vec![Some(false), None, None],
3114            dict_needle_test: None,
3115        };
3116
3117        // Test case 2: Int64 with dictionary needles
3118        // Dictionary: keys [0, 1, null] → values [10, 20, -]
3119        // Rows: [10, 20, null]
3120        let int64_case = DictionaryInListTestCase {
3121            name: "dictionary_int64",
3122            dict_type: DataType::Dictionary(
3123                Box::new(DataType::Int8),
3124                Box::new(DataType::Int64),
3125            ),
3126            dict_keys: vec![Some(0), Some(1), None],
3127            dict_values: Arc::new(Int64Array::from(vec![Some(10), Some(20)])),
3128            list_values_no_null: vec![lit(10i64), lit(15i64)],
3129            list_values_with_null: vec![
3130                lit(10i64),
3131                lit(15i64),
3132                lit(ScalarValue::Int64(None)),
3133            ],
3134            expected_1: vec![Some(true), Some(false), None],
3135            expected_2: vec![Some(false), Some(true), None],
3136            expected_3: vec![Some(true), None, None],
3137            expected_4: vec![Some(false), None, None],
3138            dict_needle_test: Some(DictNeedleTest {
3139                list_values: vec![
3140                    dict_lit_int64(DataType::Int16, 10),
3141                    dict_lit_int64(DataType::Int16, 15),
3142                ],
3143                expected: vec![Some(true), Some(false), None],
3144            }),
3145        };
3146
3147        // Test case 3: Float64 with NaN and dictionary needles
3148        // Dictionary: keys [0, 1, null, 2] → values [1.5, 3.7, NaN, -]
3149        // Rows: [1.5, 3.7, null, NaN]
3150        // Note: NaN is a value (not null), so it goes in the values array
3151        let float64_case = DictionaryInListTestCase {
3152            name: "dictionary_float64",
3153            dict_type: DataType::Dictionary(
3154                Box::new(DataType::Int8),
3155                Box::new(DataType::Float64),
3156            ),
3157            dict_keys: vec![Some(0), Some(1), None, Some(2)],
3158            dict_values: Arc::new(Float64Array::from(vec![
3159                Some(1.5),      // index 0
3160                Some(3.7),      // index 1
3161                Some(f64::NAN), // index 2
3162            ])),
3163            list_values_no_null: vec![lit(1.5f64), lit(2.0f64)],
3164            list_values_with_null: vec![
3165                lit(1.5f64),
3166                lit(2.0f64),
3167                lit(ScalarValue::Float64(None)),
3168            ],
3169            // Test 1: a IN (1.5, 2.0) → [true, false, null, false]
3170            // NaN is false because NaN not in list and no NULL in list
3171            expected_1: vec![Some(true), Some(false), None, Some(false)],
3172            // Test 2: a NOT IN (1.5, 2.0) → [false, true, null, true]
3173            // NaN is true because NaN not in list
3174            expected_2: vec![Some(false), Some(true), None, Some(true)],
3175            // Test 3: a IN (1.5, 2.0, NULL) → [true, null, null, null]
3176            // 3.7 and NaN become null due to NULL in list (three-valued logic)
3177            expected_3: vec![Some(true), None, None, None],
3178            // Test 4: a NOT IN (1.5, 2.0, NULL) → [false, null, null, null]
3179            // 3.7 and NaN become null due to NULL in list
3180            expected_4: vec![Some(false), None, None, None],
3181            dict_needle_test: Some(DictNeedleTest {
3182                list_values: vec![
3183                    dict_lit_float64(DataType::UInt16, 1.5),
3184                    dict_lit_float64(DataType::UInt16, 2.0),
3185                ],
3186                expected: vec![Some(true), Some(false), None, Some(false)],
3187            }),
3188        };
3189
3190        // Execute all test cases
3191        let test_name = utf8_case.name;
3192        run_dictionary_in_list_test(utf8_case).map_err(|e| {
3193            datafusion_common::DataFusionError::Execution(format!(
3194                "Dictionary test '{test_name}' failed: {e}"
3195            ))
3196        })?;
3197
3198        let test_name = int64_case.name;
3199        run_dictionary_in_list_test(int64_case).map_err(|e| {
3200            datafusion_common::DataFusionError::Execution(format!(
3201                "Dictionary test '{test_name}' failed: {e}"
3202            ))
3203        })?;
3204
3205        let test_name = float64_case.name;
3206        run_dictionary_in_list_test(float64_case).map_err(|e| {
3207            datafusion_common::DataFusionError::Execution(format!(
3208                "Dictionary test '{test_name}' failed: {e}"
3209            ))
3210        })?;
3211
3212        // Additional test: Dictionary deduplication with repeated keys
3213        // This tests that multiple rows with the same key (pointing to the same value)
3214        // are evaluated correctly
3215        let dedup_case = DictionaryInListTestCase {
3216            name: "dictionary_deduplication",
3217            dict_type: DataType::Dictionary(
3218                Box::new(DataType::Int8),
3219                Box::new(DataType::Utf8),
3220            ),
3221            // Keys: [0, 1, 0, 1, null] - keys 0 and 1 are repeated
3222            // This creates data: ["a", "d", "a", "d", null]
3223            dict_keys: vec![Some(0), Some(1), Some(0), Some(1), None],
3224            dict_values: Arc::new(StringArray::from(vec![Some("a"), Some("d")])),
3225            list_values_no_null: vec![lit("a"), lit("b")],
3226            list_values_with_null: vec![lit("a"), lit("b"), lit(ScalarValue::Utf8(None))],
3227            // Test 1: a IN ("a", "b") → [true, false, true, false, null]
3228            // Rows 0 and 2 both have key 0 → "a", so both are true
3229            expected_1: vec![Some(true), Some(false), Some(true), Some(false), None],
3230            // Test 2: a NOT IN ("a", "b") → [false, true, false, true, null]
3231            expected_2: vec![Some(false), Some(true), Some(false), Some(true), None],
3232            // Test 3: a IN ("a", "b", NULL) → [true, null, true, null, null]
3233            // "d" becomes null due to NULL in list
3234            expected_3: vec![Some(true), None, Some(true), None, None],
3235            // Test 4: a NOT IN ("a", "b", NULL) → [false, null, false, null, null]
3236            expected_4: vec![Some(false), None, Some(false), None, None],
3237            dict_needle_test: None,
3238        };
3239
3240        let test_name = dedup_case.name;
3241        run_dictionary_in_list_test(dedup_case).map_err(|e| {
3242            datafusion_common::DataFusionError::Execution(format!(
3243                "Dictionary test '{test_name}' failed: {e}"
3244            ))
3245        })?;
3246
3247        // Additional test for Float64 NaN in IN list
3248        let dict_type =
3249            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Float64));
3250        let schema = Schema::new(vec![Field::new("a", dict_type.clone(), true)]);
3251        let col_a = col("a", &schema)?;
3252
3253        let keys = Int8Array::from(vec![Some(0), Some(1), None, Some(2)]);
3254        let values = Float64Array::from(vec![Some(1.5), Some(3.7), Some(f64::NAN)]);
3255        let dict_array: ArrayRef =
3256            Arc::new(DictionaryArray::try_new(keys, Arc::new(values))?);
3257        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![dict_array])?;
3258
3259        // Test: a IN (1.5, 2.0, NaN)
3260        let list_with_nan = vec![lit(1.5f64), lit(2.0f64), lit(f64::NAN)];
3261        in_list!(
3262            batch,
3263            list_with_nan,
3264            &false,
3265            vec![Some(true), Some(false), None, Some(true)],
3266            col_a,
3267            &schema
3268        );
3269
3270        Ok(())
3271    }
3272
3273    #[test]
3274    fn test_in_list_esoteric_types() -> Result<()> {
3275        // Test esoteric/less common types to validate the transform and mapping flow.
3276        // These types are reinterpreted to base primitive types (e.g., Timestamp -> UInt64,
3277        // Interval -> Decimal128, Float16 -> UInt16). We just need to verify basic
3278        // functionality works - no need for comprehensive null handling tests.
3279
3280        // Helper: simple IN test that expects [Some(true), Some(false)]
3281        let test_type = |data_type: DataType,
3282                         in_array: ArrayRef,
3283                         list_values: Vec<ScalarValue>|
3284         -> Result<()> {
3285            let schema = Schema::new(vec![Field::new("a", data_type.clone(), false)]);
3286            let col_a = col("a", &schema)?;
3287            let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![in_array])?;
3288
3289            let list = list_values.into_iter().map(lit).collect();
3290            in_list!(
3291                batch,
3292                list,
3293                &false,
3294                vec![Some(true), Some(false)],
3295                col_a,
3296                &schema
3297            );
3298            Ok(())
3299        };
3300
3301        // Timestamp types (all units map to Int64 -> UInt64)
3302        test_type(
3303            DataType::Timestamp(TimeUnit::Second, None),
3304            Arc::new(TimestampSecondArray::from(vec![Some(1000), Some(2000)])),
3305            vec![
3306                ScalarValue::TimestampSecond(Some(1000), None),
3307                ScalarValue::TimestampSecond(Some(1500), None),
3308            ],
3309        )?;
3310
3311        test_type(
3312            DataType::Timestamp(TimeUnit::Millisecond, None),
3313            Arc::new(TimestampMillisecondArray::from(vec![
3314                Some(1000000),
3315                Some(2000000),
3316            ])),
3317            vec![
3318                ScalarValue::TimestampMillisecond(Some(1000000), None),
3319                ScalarValue::TimestampMillisecond(Some(1500000), None),
3320            ],
3321        )?;
3322
3323        test_type(
3324            DataType::Timestamp(TimeUnit::Microsecond, None),
3325            Arc::new(TimestampMicrosecondArray::from(vec![
3326                Some(1000000000),
3327                Some(2000000000),
3328            ])),
3329            vec![
3330                ScalarValue::TimestampMicrosecond(Some(1000000000), None),
3331                ScalarValue::TimestampMicrosecond(Some(1500000000), None),
3332            ],
3333        )?;
3334
3335        // Time32 and Time64 (map to Int32 -> UInt32 and Int64 -> UInt64 respectively)
3336        test_type(
3337            DataType::Time32(TimeUnit::Second),
3338            Arc::new(Time32SecondArray::from(vec![Some(3600), Some(7200)])),
3339            vec![
3340                ScalarValue::Time32Second(Some(3600)),
3341                ScalarValue::Time32Second(Some(5400)),
3342            ],
3343        )?;
3344
3345        test_type(
3346            DataType::Time32(TimeUnit::Millisecond),
3347            Arc::new(Time32MillisecondArray::from(vec![
3348                Some(3600000),
3349                Some(7200000),
3350            ])),
3351            vec![
3352                ScalarValue::Time32Millisecond(Some(3600000)),
3353                ScalarValue::Time32Millisecond(Some(5400000)),
3354            ],
3355        )?;
3356
3357        test_type(
3358            DataType::Time64(TimeUnit::Microsecond),
3359            Arc::new(Time64MicrosecondArray::from(vec![
3360                Some(3600000000),
3361                Some(7200000000),
3362            ])),
3363            vec![
3364                ScalarValue::Time64Microsecond(Some(3600000000)),
3365                ScalarValue::Time64Microsecond(Some(5400000000)),
3366            ],
3367        )?;
3368
3369        test_type(
3370            DataType::Time64(TimeUnit::Nanosecond),
3371            Arc::new(Time64NanosecondArray::from(vec![
3372                Some(3600000000000),
3373                Some(7200000000000),
3374            ])),
3375            vec![
3376                ScalarValue::Time64Nanosecond(Some(3600000000000)),
3377                ScalarValue::Time64Nanosecond(Some(5400000000000)),
3378            ],
3379        )?;
3380
3381        // Duration types (map to Int64 -> UInt64)
3382        test_type(
3383            DataType::Duration(TimeUnit::Second),
3384            Arc::new(DurationSecondArray::from(vec![Some(86400), Some(172800)])),
3385            vec![
3386                ScalarValue::DurationSecond(Some(86400)),
3387                ScalarValue::DurationSecond(Some(129600)),
3388            ],
3389        )?;
3390
3391        test_type(
3392            DataType::Duration(TimeUnit::Millisecond),
3393            Arc::new(DurationMillisecondArray::from(vec![
3394                Some(86400000),
3395                Some(172800000),
3396            ])),
3397            vec![
3398                ScalarValue::DurationMillisecond(Some(86400000)),
3399                ScalarValue::DurationMillisecond(Some(129600000)),
3400            ],
3401        )?;
3402
3403        test_type(
3404            DataType::Duration(TimeUnit::Microsecond),
3405            Arc::new(DurationMicrosecondArray::from(vec![
3406                Some(86400000000),
3407                Some(172800000000),
3408            ])),
3409            vec![
3410                ScalarValue::DurationMicrosecond(Some(86400000000)),
3411                ScalarValue::DurationMicrosecond(Some(129600000000)),
3412            ],
3413        )?;
3414
3415        test_type(
3416            DataType::Duration(TimeUnit::Nanosecond),
3417            Arc::new(DurationNanosecondArray::from(vec![
3418                Some(86400000000000),
3419                Some(172800000000000),
3420            ])),
3421            vec![
3422                ScalarValue::DurationNanosecond(Some(86400000000000)),
3423                ScalarValue::DurationNanosecond(Some(129600000000000)),
3424            ],
3425        )?;
3426
3427        // Interval types (map to 16-byte Decimal128Type)
3428        test_type(
3429            DataType::Interval(IntervalUnit::YearMonth),
3430            Arc::new(IntervalYearMonthArray::from(vec![Some(12), Some(24)])),
3431            vec![
3432                ScalarValue::IntervalYearMonth(Some(12)),
3433                ScalarValue::IntervalYearMonth(Some(18)),
3434            ],
3435        )?;
3436
3437        test_type(
3438            DataType::Interval(IntervalUnit::DayTime),
3439            Arc::new(IntervalDayTimeArray::from(vec![
3440                Some(IntervalDayTime {
3441                    days: 1,
3442                    milliseconds: 0,
3443                }),
3444                Some(IntervalDayTime {
3445                    days: 2,
3446                    milliseconds: 0,
3447                }),
3448            ])),
3449            vec![
3450                ScalarValue::IntervalDayTime(Some(IntervalDayTime {
3451                    days: 1,
3452                    milliseconds: 0,
3453                })),
3454                ScalarValue::IntervalDayTime(Some(IntervalDayTime {
3455                    days: 1,
3456                    milliseconds: 500,
3457                })),
3458            ],
3459        )?;
3460
3461        test_type(
3462            DataType::Interval(IntervalUnit::MonthDayNano),
3463            Arc::new(IntervalMonthDayNanoArray::from(vec![
3464                Some(IntervalMonthDayNano {
3465                    months: 1,
3466                    days: 0,
3467                    nanoseconds: 0,
3468                }),
3469                Some(IntervalMonthDayNano {
3470                    months: 2,
3471                    days: 0,
3472                    nanoseconds: 0,
3473                }),
3474            ])),
3475            vec![
3476                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
3477                    months: 1,
3478                    days: 0,
3479                    nanoseconds: 0,
3480                })),
3481                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
3482                    months: 1,
3483                    days: 15,
3484                    nanoseconds: 0,
3485                })),
3486            ],
3487        )?;
3488
3489        // Decimal256 (maps to Decimal128Type for 16-byte width)
3490        // Need to use with_precision_and_scale() to set the metadata
3491        let precision = 38;
3492        let scale = 10;
3493        test_type(
3494            DataType::Decimal256(precision, scale),
3495            Arc::new(
3496                Decimal256Array::from(vec![
3497                    Some(i256::from(12345)),
3498                    Some(i256::from(67890)),
3499                ])
3500                .with_precision_and_scale(precision, scale)?,
3501            ),
3502            vec![
3503                ScalarValue::Decimal256(Some(i256::from(12345)), precision, scale),
3504                ScalarValue::Decimal256(Some(i256::from(54321)), precision, scale),
3505            ],
3506        )?;
3507
3508        Ok(())
3509    }
3510}