datafusion_common/scalar/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarValue`]: stores single  values
19
20mod consts;
21mod struct_builder;
22
23use std::borrow::Borrow;
24use std::cmp::Ordering;
25use std::collections::{HashSet, VecDeque};
26use std::convert::Infallible;
27use std::fmt;
28use std::hash::Hash;
29use std::hash::Hasher;
30use std::iter::repeat_n;
31use std::mem::{size_of, size_of_val};
32use std::str::FromStr;
33use std::sync::Arc;
34
35use crate::arrow_datafusion_err;
36use crate::cast::{
37    as_decimal128_array, as_decimal256_array, as_dictionary_array,
38    as_fixed_size_binary_array, as_fixed_size_list_array,
39};
40use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
41use crate::format::DEFAULT_CAST_OPTIONS;
42use crate::hash_utils::create_hashes;
43use crate::utils::SingleRowListArrayBuilder;
44use arrow::array::{
45    types::{IntervalDayTime, IntervalMonthDayNano},
46    *,
47};
48use arrow::buffer::ScalarBuffer;
49use arrow::compute::kernels::{
50    cast::{cast_with_options, CastOptions},
51    numeric::*,
52};
53use arrow::datatypes::{
54    i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType,
55    Date32Type, Date64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type,
56    Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
57    IntervalYearMonthType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
58    TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type,
59    UInt8Type, UnionFields, UnionMode, DECIMAL128_MAX_PRECISION,
60};
61use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
62use half::f16;
63pub use struct_builder::ScalarStructBuilder;
64
65/// A dynamically typed, nullable single value.
66///
67/// While an arrow  [`Array`]) stores one or more values of the same type, in a
68/// single column, a `ScalarValue` stores a single value of a single type, the
69/// equivalent of 1 row and one column.
70///
71/// ```text
72///  ┌────────┐
73///  │ value1 │
74///  │ value2 │                  ┌────────┐
75///  │ value3 │                  │ value2 │
76///  │  ...   │                  └────────┘
77///  │ valueN │
78///  └────────┘
79///
80///    Array                     ScalarValue
81///
82/// stores multiple,             stores a single,
83/// possibly null, values of     possible null, value
84/// the same type
85/// ```
86///
87/// # Performance
88///
89/// In general, performance will be better using arrow [`Array`]s rather than
90/// [`ScalarValue`], as it is far more efficient to process multiple values at
91/// once (vectorized processing).
92///
93/// # Example
94/// ```
95/// # use datafusion_common::ScalarValue;
96/// // Create single scalar value for an Int32 value
97/// let s1 = ScalarValue::Int32(Some(10));
98///
99/// // You can also create values using the From impl:
100/// let s2 = ScalarValue::from(10i32);
101/// assert_eq!(s1, s2);
102/// ```
103///
104/// # Null Handling
105///
106/// `ScalarValue` represents null values in the same way as Arrow. Nulls are
107/// "typed" in the sense that a null value in an [`Int32Array`] is different
108/// from a null value in a [`Float64Array`], and is different from the values in
109/// a [`NullArray`].
110///
111/// ```
112/// # fn main() -> datafusion_common::Result<()> {
113/// # use std::collections::hash_set::Difference;
114/// # use datafusion_common::ScalarValue;
115/// # use arrow::datatypes::DataType;
116/// // You can create a 'null' Int32 value directly:
117/// let s1 = ScalarValue::Int32(None);
118///
119/// // You can also create a null value for a given datatype:
120/// let s2 = ScalarValue::try_from(&DataType::Int32)?;
121/// assert_eq!(s1, s2);
122///
123/// // Note that this is DIFFERENT than a `ScalarValue::Null`
124/// let s3 = ScalarValue::Null;
125/// assert_ne!(s1, s3);
126/// # Ok(())
127/// # }
128/// ```
129///
130/// # Nested Types
131///
132/// `List` / `LargeList` / `FixedSizeList` / `Struct` / `Map` are represented as a
133/// single element array of the corresponding type.
134///
135/// ## Example: Creating [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
136/// ```
137/// # use std::sync::Arc;
138/// # use arrow::datatypes::{DataType, Field};
139/// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder};
140/// // Build a struct like: {a: 1, b: "foo"}
141/// let field_a = Field::new("a", DataType::Int32, false);
142/// let field_b = Field::new("b", DataType::Utf8, false);
143///
144/// let s1 = ScalarStructBuilder::new()
145///    .with_scalar(field_a, ScalarValue::from(1i32))
146///    .with_scalar(field_b, ScalarValue::from("foo"))
147///    .build();
148/// ```
149///
150/// ## Example: Creating a null [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
151/// ```
152/// # use std::sync::Arc;
153/// # use arrow::datatypes::{DataType, Field};
154/// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder};
155/// // Build a struct representing a NULL value
156/// let fields = vec![
157///     Field::new("a", DataType::Int32, false),
158///     Field::new("b", DataType::Utf8, false),
159/// ];
160///
161/// let s1 = ScalarStructBuilder::new_null(fields);
162/// ```
163///
164/// ## Example: Creating [`ScalarValue::Struct`] directly
165/// ```
166/// # use std::sync::Arc;
167/// # use arrow::datatypes::{DataType, Field, Fields};
168/// # use arrow::array::{ArrayRef, Int32Array, StructArray, StringArray};
169/// # use datafusion_common::ScalarValue;
170/// // Build a struct like: {a: 1, b: "foo"}
171/// // Field description
172/// let fields = Fields::from(vec![
173///   Field::new("a", DataType::Int32, false),
174///   Field::new("b", DataType::Utf8, false),
175/// ]);
176/// // one row arrays for each field
177/// let arrays: Vec<ArrayRef> = vec![
178///   Arc::new(Int32Array::from(vec![1])),
179///   Arc::new(StringArray::from(vec!["foo"])),
180/// ];
181/// // no nulls for this array
182/// let nulls = None;
183/// let arr = StructArray::new(fields, arrays, nulls);
184///
185/// // Create a ScalarValue::Struct directly
186/// let s1 = ScalarValue::Struct(Arc::new(arr));
187/// ```
188///
189///
190/// # Further Reading
191/// See [datatypes](https://arrow.apache.org/docs/python/api/datatypes.html) for
192/// details on datatypes and the [format](https://github.com/apache/arrow/blob/master/format/Schema.fbs#L354-L375)
193/// for the definitive reference.
194#[derive(Clone)]
195pub enum ScalarValue {
196    /// represents `DataType::Null` (castable to/from any other type)
197    Null,
198    /// true or false value
199    Boolean(Option<bool>),
200    /// 16bit float
201    Float16(Option<f16>),
202    /// 32bit float
203    Float32(Option<f32>),
204    /// 64bit float
205    Float64(Option<f64>),
206    /// 128bit decimal, using the i128 to represent the decimal, precision scale
207    Decimal128(Option<i128>, u8, i8),
208    /// 256bit decimal, using the i256 to represent the decimal, precision scale
209    Decimal256(Option<i256>, u8, i8),
210    /// signed 8bit int
211    Int8(Option<i8>),
212    /// signed 16bit int
213    Int16(Option<i16>),
214    /// signed 32bit int
215    Int32(Option<i32>),
216    /// signed 64bit int
217    Int64(Option<i64>),
218    /// unsigned 8bit int
219    UInt8(Option<u8>),
220    /// unsigned 16bit int
221    UInt16(Option<u16>),
222    /// unsigned 32bit int
223    UInt32(Option<u32>),
224    /// unsigned 64bit int
225    UInt64(Option<u64>),
226    /// utf-8 encoded string.
227    Utf8(Option<String>),
228    /// utf-8 encoded string but from view types.
229    Utf8View(Option<String>),
230    /// utf-8 encoded string representing a LargeString's arrow type.
231    LargeUtf8(Option<String>),
232    /// binary
233    Binary(Option<Vec<u8>>),
234    /// binary but from view types.
235    BinaryView(Option<Vec<u8>>),
236    /// fixed size binary
237    FixedSizeBinary(i32, Option<Vec<u8>>),
238    /// large binary
239    LargeBinary(Option<Vec<u8>>),
240    /// Fixed size list scalar.
241    ///
242    /// The array must be a FixedSizeListArray with length 1.
243    FixedSizeList(Arc<FixedSizeListArray>),
244    /// Represents a single element of a [`ListArray`] as an [`ArrayRef`]
245    ///
246    /// The array must be a ListArray with length 1.
247    List(Arc<ListArray>),
248    /// The array must be a LargeListArray with length 1.
249    LargeList(Arc<LargeListArray>),
250    /// Represents a single element [`StructArray`] as an [`ArrayRef`]. See
251    /// [`ScalarValue`] for examples of how to create instances of this type.
252    Struct(Arc<StructArray>),
253    /// Represents a single element [`MapArray`] as an [`ArrayRef`].
254    Map(Arc<MapArray>),
255    /// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01
256    Date32(Option<i32>),
257    /// Date stored as a signed 64bit int milliseconds since UNIX epoch 1970-01-01
258    Date64(Option<i64>),
259    /// Time stored as a signed 32bit int as seconds since midnight
260    Time32Second(Option<i32>),
261    /// Time stored as a signed 32bit int as milliseconds since midnight
262    Time32Millisecond(Option<i32>),
263    /// Time stored as a signed 64bit int as microseconds since midnight
264    Time64Microsecond(Option<i64>),
265    /// Time stored as a signed 64bit int as nanoseconds since midnight
266    Time64Nanosecond(Option<i64>),
267    /// Timestamp Second
268    TimestampSecond(Option<i64>, Option<Arc<str>>),
269    /// Timestamp Milliseconds
270    TimestampMillisecond(Option<i64>, Option<Arc<str>>),
271    /// Timestamp Microseconds
272    TimestampMicrosecond(Option<i64>, Option<Arc<str>>),
273    /// Timestamp Nanoseconds
274    TimestampNanosecond(Option<i64>, Option<Arc<str>>),
275    /// Number of elapsed whole months
276    IntervalYearMonth(Option<i32>),
277    /// Number of elapsed days and milliseconds (no leap seconds)
278    /// stored as 2 contiguous 32-bit signed integers
279    IntervalDayTime(Option<IntervalDayTime>),
280    /// A triple of the number of elapsed months, days, and nanoseconds.
281    /// Months and days are encoded as 32-bit signed integers.
282    /// Nanoseconds is encoded as a 64-bit signed integer (no leap seconds).
283    IntervalMonthDayNano(Option<IntervalMonthDayNano>),
284    /// Duration in seconds
285    DurationSecond(Option<i64>),
286    /// Duration in milliseconds
287    DurationMillisecond(Option<i64>),
288    /// Duration in microseconds
289    DurationMicrosecond(Option<i64>),
290    /// Duration in nanoseconds
291    DurationNanosecond(Option<i64>),
292    /// A nested datatype that can represent slots of differing types. Components:
293    /// `.0`: a tuple of union `type_id` and the single value held by this Scalar
294    /// `.1`: the list of fields, zero-to-one of which will by set in `.0`
295    /// `.2`: the physical storage of the source/destination UnionArray from which this Scalar came
296    Union(Option<(i8, Box<ScalarValue>)>, UnionFields, UnionMode),
297    /// Dictionary type: index type and value
298    Dictionary(Box<DataType>, Box<ScalarValue>),
299}
300
301impl Hash for Fl<f16> {
302    fn hash<H: Hasher>(&self, state: &mut H) {
303        self.0.to_bits().hash(state);
304    }
305}
306
307// manual implementation of `PartialEq`
308impl PartialEq for ScalarValue {
309    fn eq(&self, other: &Self) -> bool {
310        use ScalarValue::*;
311        // This purposely doesn't have a catch-all "(_, _)" so that
312        // any newly added enum variant will require editing this list
313        // or else face a compile error
314        match (self, other) {
315            (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
316                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
317            }
318            (Decimal128(_, _, _), _) => false,
319            (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
320                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
321            }
322            (Decimal256(_, _, _), _) => false,
323            (Boolean(v1), Boolean(v2)) => v1.eq(v2),
324            (Boolean(_), _) => false,
325            (Float32(v1), Float32(v2)) => match (v1, v2) {
326                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
327                _ => v1.eq(v2),
328            },
329            (Float16(v1), Float16(v2)) => match (v1, v2) {
330                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
331                _ => v1.eq(v2),
332            },
333            (Float32(_), _) => false,
334            (Float16(_), _) => false,
335            (Float64(v1), Float64(v2)) => match (v1, v2) {
336                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
337                _ => v1.eq(v2),
338            },
339            (Float64(_), _) => false,
340            (Int8(v1), Int8(v2)) => v1.eq(v2),
341            (Int8(_), _) => false,
342            (Int16(v1), Int16(v2)) => v1.eq(v2),
343            (Int16(_), _) => false,
344            (Int32(v1), Int32(v2)) => v1.eq(v2),
345            (Int32(_), _) => false,
346            (Int64(v1), Int64(v2)) => v1.eq(v2),
347            (Int64(_), _) => false,
348            (UInt8(v1), UInt8(v2)) => v1.eq(v2),
349            (UInt8(_), _) => false,
350            (UInt16(v1), UInt16(v2)) => v1.eq(v2),
351            (UInt16(_), _) => false,
352            (UInt32(v1), UInt32(v2)) => v1.eq(v2),
353            (UInt32(_), _) => false,
354            (UInt64(v1), UInt64(v2)) => v1.eq(v2),
355            (UInt64(_), _) => false,
356            (Utf8(v1), Utf8(v2)) => v1.eq(v2),
357            (Utf8(_), _) => false,
358            (Utf8View(v1), Utf8View(v2)) => v1.eq(v2),
359            (Utf8View(_), _) => false,
360            (LargeUtf8(v1), LargeUtf8(v2)) => v1.eq(v2),
361            (LargeUtf8(_), _) => false,
362            (Binary(v1), Binary(v2)) => v1.eq(v2),
363            (Binary(_), _) => false,
364            (BinaryView(v1), BinaryView(v2)) => v1.eq(v2),
365            (BinaryView(_), _) => false,
366            (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.eq(v2),
367            (FixedSizeBinary(_, _), _) => false,
368            (LargeBinary(v1), LargeBinary(v2)) => v1.eq(v2),
369            (LargeBinary(_), _) => false,
370            (FixedSizeList(v1), FixedSizeList(v2)) => v1.eq(v2),
371            (FixedSizeList(_), _) => false,
372            (List(v1), List(v2)) => v1.eq(v2),
373            (List(_), _) => false,
374            (LargeList(v1), LargeList(v2)) => v1.eq(v2),
375            (LargeList(_), _) => false,
376            (Struct(v1), Struct(v2)) => v1.eq(v2),
377            (Struct(_), _) => false,
378            (Map(v1), Map(v2)) => v1.eq(v2),
379            (Map(_), _) => false,
380            (Date32(v1), Date32(v2)) => v1.eq(v2),
381            (Date32(_), _) => false,
382            (Date64(v1), Date64(v2)) => v1.eq(v2),
383            (Date64(_), _) => false,
384            (Time32Second(v1), Time32Second(v2)) => v1.eq(v2),
385            (Time32Second(_), _) => false,
386            (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.eq(v2),
387            (Time32Millisecond(_), _) => false,
388            (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.eq(v2),
389            (Time64Microsecond(_), _) => false,
390            (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.eq(v2),
391            (Time64Nanosecond(_), _) => false,
392            (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2),
393            (TimestampSecond(_, _), _) => false,
394            (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2),
395            (TimestampMillisecond(_, _), _) => false,
396            (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => v1.eq(v2),
397            (TimestampMicrosecond(_, _), _) => false,
398            (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2),
399            (TimestampNanosecond(_, _), _) => false,
400            (DurationSecond(v1), DurationSecond(v2)) => v1.eq(v2),
401            (DurationSecond(_), _) => false,
402            (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.eq(v2),
403            (DurationMillisecond(_), _) => false,
404            (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.eq(v2),
405            (DurationMicrosecond(_), _) => false,
406            (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.eq(v2),
407            (DurationNanosecond(_), _) => false,
408            (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2),
409            (IntervalYearMonth(_), _) => false,
410            (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2),
411            (IntervalDayTime(_), _) => false,
412            (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2),
413            (IntervalMonthDayNano(_), _) => false,
414            (Union(val1, fields1, mode1), Union(val2, fields2, mode2)) => {
415                val1.eq(val2) && fields1.eq(fields2) && mode1.eq(mode2)
416            }
417            (Union(_, _, _), _) => false,
418            (Dictionary(k1, v1), Dictionary(k2, v2)) => k1.eq(k2) && v1.eq(v2),
419            (Dictionary(_, _), _) => false,
420            (Null, Null) => true,
421            (Null, _) => false,
422        }
423    }
424}
425
426// manual implementation of `PartialOrd`
427impl PartialOrd for ScalarValue {
428    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
429        use ScalarValue::*;
430        // This purposely doesn't have a catch-all "(_, _)" so that
431        // any newly added enum variant will require editing this list
432        // or else face a compile error
433        match (self, other) {
434            (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
435                if p1.eq(p2) && s1.eq(s2) {
436                    v1.partial_cmp(v2)
437                } else {
438                    // Two decimal values can be compared if they have the same precision and scale.
439                    None
440                }
441            }
442            (Decimal128(_, _, _), _) => None,
443            (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
444                if p1.eq(p2) && s1.eq(s2) {
445                    v1.partial_cmp(v2)
446                } else {
447                    // Two decimal values can be compared if they have the same precision and scale.
448                    None
449                }
450            }
451            (Decimal256(_, _, _), _) => None,
452            (Boolean(v1), Boolean(v2)) => v1.partial_cmp(v2),
453            (Boolean(_), _) => None,
454            (Float32(v1), Float32(v2)) => match (v1, v2) {
455                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
456                _ => v1.partial_cmp(v2),
457            },
458            (Float16(v1), Float16(v2)) => match (v1, v2) {
459                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
460                _ => v1.partial_cmp(v2),
461            },
462            (Float32(_), _) => None,
463            (Float16(_), _) => None,
464            (Float64(v1), Float64(v2)) => match (v1, v2) {
465                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
466                _ => v1.partial_cmp(v2),
467            },
468            (Float64(_), _) => None,
469            (Int8(v1), Int8(v2)) => v1.partial_cmp(v2),
470            (Int8(_), _) => None,
471            (Int16(v1), Int16(v2)) => v1.partial_cmp(v2),
472            (Int16(_), _) => None,
473            (Int32(v1), Int32(v2)) => v1.partial_cmp(v2),
474            (Int32(_), _) => None,
475            (Int64(v1), Int64(v2)) => v1.partial_cmp(v2),
476            (Int64(_), _) => None,
477            (UInt8(v1), UInt8(v2)) => v1.partial_cmp(v2),
478            (UInt8(_), _) => None,
479            (UInt16(v1), UInt16(v2)) => v1.partial_cmp(v2),
480            (UInt16(_), _) => None,
481            (UInt32(v1), UInt32(v2)) => v1.partial_cmp(v2),
482            (UInt32(_), _) => None,
483            (UInt64(v1), UInt64(v2)) => v1.partial_cmp(v2),
484            (UInt64(_), _) => None,
485            (Utf8(v1), Utf8(v2)) => v1.partial_cmp(v2),
486            (Utf8(_), _) => None,
487            (LargeUtf8(v1), LargeUtf8(v2)) => v1.partial_cmp(v2),
488            (LargeUtf8(_), _) => None,
489            (Utf8View(v1), Utf8View(v2)) => v1.partial_cmp(v2),
490            (Utf8View(_), _) => None,
491            (Binary(v1), Binary(v2)) => v1.partial_cmp(v2),
492            (Binary(_), _) => None,
493            (BinaryView(v1), BinaryView(v2)) => v1.partial_cmp(v2),
494            (BinaryView(_), _) => None,
495            (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.partial_cmp(v2),
496            (FixedSizeBinary(_, _), _) => None,
497            (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
498            (LargeBinary(_), _) => None,
499            // ScalarValue::List / ScalarValue::FixedSizeList / ScalarValue::LargeList are ensure to have length 1
500            (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()),
501            (FixedSizeList(arr1), FixedSizeList(arr2)) => {
502                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
503            }
504            (LargeList(arr1), LargeList(arr2)) => {
505                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
506            }
507            (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None,
508            (Struct(struct_arr1), Struct(struct_arr2)) => {
509                partial_cmp_struct(struct_arr1.as_ref(), struct_arr2.as_ref())
510            }
511            (Struct(_), _) => None,
512            (Map(map_arr1), Map(map_arr2)) => partial_cmp_map(map_arr1, map_arr2),
513            (Map(_), _) => None,
514            (Date32(v1), Date32(v2)) => v1.partial_cmp(v2),
515            (Date32(_), _) => None,
516            (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
517            (Date64(_), _) => None,
518            (Time32Second(v1), Time32Second(v2)) => v1.partial_cmp(v2),
519            (Time32Second(_), _) => None,
520            (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.partial_cmp(v2),
521            (Time32Millisecond(_), _) => None,
522            (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.partial_cmp(v2),
523            (Time64Microsecond(_), _) => None,
524            (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.partial_cmp(v2),
525            (Time64Nanosecond(_), _) => None,
526            (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2),
527            (TimestampSecond(_, _), _) => None,
528            (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => {
529                v1.partial_cmp(v2)
530            }
531            (TimestampMillisecond(_, _), _) => None,
532            (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => {
533                v1.partial_cmp(v2)
534            }
535            (TimestampMicrosecond(_, _), _) => None,
536            (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => {
537                v1.partial_cmp(v2)
538            }
539            (TimestampNanosecond(_, _), _) => None,
540            (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2),
541            (IntervalYearMonth(_), _) => None,
542            (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2),
543            (IntervalDayTime(_), _) => None,
544            (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2),
545            (IntervalMonthDayNano(_), _) => None,
546            (DurationSecond(v1), DurationSecond(v2)) => v1.partial_cmp(v2),
547            (DurationSecond(_), _) => None,
548            (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.partial_cmp(v2),
549            (DurationMillisecond(_), _) => None,
550            (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.partial_cmp(v2),
551            (DurationMicrosecond(_), _) => None,
552            (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.partial_cmp(v2),
553            (DurationNanosecond(_), _) => None,
554            (Union(v1, t1, m1), Union(v2, t2, m2)) => {
555                if t1.eq(t2) && m1.eq(m2) {
556                    v1.partial_cmp(v2)
557                } else {
558                    None
559                }
560            }
561            (Union(_, _, _), _) => None,
562            (Dictionary(k1, v1), Dictionary(k2, v2)) => {
563                // Don't compare if the key types don't match (it is effectively a different datatype)
564                if k1 == k2 {
565                    v1.partial_cmp(v2)
566                } else {
567                    None
568                }
569            }
570            (Dictionary(_, _), _) => None,
571            (Null, Null) => Some(Ordering::Equal),
572            (Null, _) => None,
573        }
574    }
575}
576
577/// List/LargeList/FixedSizeList scalars always have a single element
578/// array. This function returns that array
579fn first_array_for_list(arr: &dyn Array) -> ArrayRef {
580    assert_eq!(arr.len(), 1);
581    if let Some(arr) = arr.as_list_opt::<i32>() {
582        arr.value(0)
583    } else if let Some(arr) = arr.as_list_opt::<i64>() {
584        arr.value(0)
585    } else if let Some(arr) = arr.as_fixed_size_list_opt() {
586        arr.value(0)
587    } else {
588        unreachable!("Since only List / LargeList / FixedSizeList are supported, this should never happen")
589    }
590}
591
592/// Compares two List/LargeList/FixedSizeList scalars
593fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option<Ordering> {
594    if arr1.data_type() != arr2.data_type() {
595        return None;
596    }
597    let arr1 = first_array_for_list(arr1);
598    let arr2 = first_array_for_list(arr2);
599
600    let min_length = arr1.len().min(arr2.len());
601    let arr1_trimmed = arr1.slice(0, min_length);
602    let arr2_trimmed = arr2.slice(0, min_length);
603
604    let lt_res = arrow::compute::kernels::cmp::lt(&arr1_trimmed, &arr2_trimmed).ok()?;
605    let eq_res = arrow::compute::kernels::cmp::eq(&arr1_trimmed, &arr2_trimmed).ok()?;
606
607    for j in 0..lt_res.len() {
608        // In Postgres, NULL values in lists are always considered to be greater than non-NULL values:
609        //
610        // $ SELECT ARRAY[NULL]::integer[] > ARRAY[1]
611        // true
612        //
613        // These next two if statements are introduced for replicating Postgres behavior, as
614        // arrow::compute does not account for this.
615        if arr1_trimmed.is_null(j) && !arr2_trimmed.is_null(j) {
616            return Some(Ordering::Greater);
617        }
618        if !arr1_trimmed.is_null(j) && arr2_trimmed.is_null(j) {
619            return Some(Ordering::Less);
620        }
621
622        if lt_res.is_valid(j) && lt_res.value(j) {
623            return Some(Ordering::Less);
624        }
625        if eq_res.is_valid(j) && !eq_res.value(j) {
626            return Some(Ordering::Greater);
627        }
628    }
629
630    Some(arr1.len().cmp(&arr2.len()))
631}
632
633fn flatten<'a>(array: &'a StructArray, columns: &mut Vec<&'a ArrayRef>) {
634    for i in 0..array.num_columns() {
635        let column = array.column(i);
636        if let Some(nested_struct) = column.as_any().downcast_ref::<StructArray>() {
637            // If it's a nested struct, recursively expand
638            flatten(nested_struct, columns);
639        } else {
640            // If it's a primitive type, add directly
641            columns.push(column);
642        }
643    }
644}
645
646pub fn partial_cmp_struct(s1: &StructArray, s2: &StructArray) -> Option<Ordering> {
647    if s1.len() != s2.len() {
648        return None;
649    }
650
651    if s1.data_type() != s2.data_type() {
652        return None;
653    }
654
655    let mut expanded_columns1 = Vec::with_capacity(s1.num_columns());
656    let mut expanded_columns2 = Vec::with_capacity(s2.num_columns());
657
658    flatten(s1, &mut expanded_columns1);
659    flatten(s2, &mut expanded_columns2);
660
661    for col_index in 0..expanded_columns1.len() {
662        let arr1 = expanded_columns1[col_index];
663        let arr2 = expanded_columns2[col_index];
664
665        let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
666        let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
667
668        for j in 0..lt_res.len() {
669            if lt_res.is_valid(j) && lt_res.value(j) {
670                return Some(Ordering::Less);
671            }
672            if eq_res.is_valid(j) && !eq_res.value(j) {
673                return Some(Ordering::Greater);
674            }
675        }
676    }
677    Some(Ordering::Equal)
678}
679
680fn partial_cmp_map(m1: &Arc<MapArray>, m2: &Arc<MapArray>) -> Option<Ordering> {
681    if m1.len() != m2.len() {
682        return None;
683    }
684
685    if m1.data_type() != m2.data_type() {
686        return None;
687    }
688
689    for col_index in 0..m1.len() {
690        let arr1 = m1.entries().column(col_index);
691        let arr2 = m2.entries().column(col_index);
692
693        let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
694        let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
695
696        for j in 0..lt_res.len() {
697            if lt_res.is_valid(j) && lt_res.value(j) {
698                return Some(Ordering::Less);
699            }
700            if eq_res.is_valid(j) && !eq_res.value(j) {
701                return Some(Ordering::Greater);
702            }
703        }
704    }
705    Some(Ordering::Equal)
706}
707
708impl Eq for ScalarValue {}
709
710//Float wrapper over f32/f64. Just because we cannot build std::hash::Hash for floats directly we have to do it through type wrapper
711struct Fl<T>(T);
712
713macro_rules! hash_float_value {
714    ($(($t:ty, $i:ty)),+) => {
715        $(impl std::hash::Hash for Fl<$t> {
716            #[inline]
717            fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
718                state.write(&<$i>::from_ne_bytes(self.0.to_ne_bytes()).to_ne_bytes())
719            }
720        })+
721    };
722}
723
724hash_float_value!((f64, u64), (f32, u32));
725
726// manual implementation of `Hash`
727//
728// # Panics
729//
730// Panics if there is an error when creating hash values for rows
731impl Hash for ScalarValue {
732    fn hash<H: Hasher>(&self, state: &mut H) {
733        use ScalarValue::*;
734        match self {
735            Decimal128(v, p, s) => {
736                v.hash(state);
737                p.hash(state);
738                s.hash(state)
739            }
740            Decimal256(v, p, s) => {
741                v.hash(state);
742                p.hash(state);
743                s.hash(state)
744            }
745            Boolean(v) => v.hash(state),
746            Float16(v) => v.map(Fl).hash(state),
747            Float32(v) => v.map(Fl).hash(state),
748            Float64(v) => v.map(Fl).hash(state),
749            Int8(v) => v.hash(state),
750            Int16(v) => v.hash(state),
751            Int32(v) => v.hash(state),
752            Int64(v) => v.hash(state),
753            UInt8(v) => v.hash(state),
754            UInt16(v) => v.hash(state),
755            UInt32(v) => v.hash(state),
756            UInt64(v) => v.hash(state),
757            Utf8(v) | LargeUtf8(v) | Utf8View(v) => v.hash(state),
758            Binary(v) | FixedSizeBinary(_, v) | LargeBinary(v) | BinaryView(v) => {
759                v.hash(state)
760            }
761            List(arr) => {
762                hash_nested_array(arr.to_owned() as ArrayRef, state);
763            }
764            LargeList(arr) => {
765                hash_nested_array(arr.to_owned() as ArrayRef, state);
766            }
767            FixedSizeList(arr) => {
768                hash_nested_array(arr.to_owned() as ArrayRef, state);
769            }
770            Struct(arr) => {
771                hash_nested_array(arr.to_owned() as ArrayRef, state);
772            }
773            Map(arr) => {
774                hash_nested_array(arr.to_owned() as ArrayRef, state);
775            }
776            Date32(v) => v.hash(state),
777            Date64(v) => v.hash(state),
778            Time32Second(v) => v.hash(state),
779            Time32Millisecond(v) => v.hash(state),
780            Time64Microsecond(v) => v.hash(state),
781            Time64Nanosecond(v) => v.hash(state),
782            TimestampSecond(v, _) => v.hash(state),
783            TimestampMillisecond(v, _) => v.hash(state),
784            TimestampMicrosecond(v, _) => v.hash(state),
785            TimestampNanosecond(v, _) => v.hash(state),
786            DurationSecond(v) => v.hash(state),
787            DurationMillisecond(v) => v.hash(state),
788            DurationMicrosecond(v) => v.hash(state),
789            DurationNanosecond(v) => v.hash(state),
790            IntervalYearMonth(v) => v.hash(state),
791            IntervalDayTime(v) => v.hash(state),
792            IntervalMonthDayNano(v) => v.hash(state),
793            Union(v, t, m) => {
794                v.hash(state);
795                t.hash(state);
796                m.hash(state);
797            }
798            Dictionary(k, v) => {
799                k.hash(state);
800                v.hash(state);
801            }
802            // stable hash for Null value
803            Null => 1.hash(state),
804        }
805    }
806}
807
808fn hash_nested_array<H: Hasher>(arr: ArrayRef, state: &mut H) {
809    let arrays = vec![arr.to_owned()];
810    let hashes_buffer = &mut vec![0; arr.len()];
811    let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0);
812    let hashes = create_hashes(&arrays, &random_state, hashes_buffer).unwrap();
813    // Hash back to std::hash::Hasher
814    hashes.hash(state);
815}
816
817/// Return a reference to the values array and the index into it for a
818/// dictionary array
819///
820/// # Errors
821///
822/// Errors if the array cannot be downcasted to DictionaryArray
823#[inline]
824pub fn get_dict_value<K: ArrowDictionaryKeyType>(
825    array: &dyn Array,
826    index: usize,
827) -> Result<(&ArrayRef, Option<usize>)> {
828    let dict_array = as_dictionary_array::<K>(array)?;
829    Ok((dict_array.values(), dict_array.key(index)))
830}
831
832/// Create a dictionary array representing `value` repeated `size`
833/// times
834fn dict_from_scalar<K: ArrowDictionaryKeyType>(
835    value: &ScalarValue,
836    size: usize,
837) -> Result<ArrayRef> {
838    // values array is one element long (the value)
839    let values_array = value.to_array_of_size(1)?;
840
841    // Create a key array with `size` elements, each of 0
842    let key_array: PrimitiveArray<K> = repeat_n(
843        if value.is_null() {
844            None
845        } else {
846            Some(K::default_value())
847        },
848        size,
849    )
850    .collect();
851
852    // create a new DictionaryArray
853    //
854    // Note: this path could be made faster by using the ArrayData
855    // APIs and skipping validation, if it every comes up in
856    // performance traces.
857    Ok(Arc::new(
858        DictionaryArray::<K>::try_new(key_array, values_array)?, // should always be valid by construction above
859    ))
860}
861
862/// Create a dictionary array representing all the values in values
863fn dict_from_values<K: ArrowDictionaryKeyType>(
864    values_array: ArrayRef,
865) -> Result<ArrayRef> {
866    // Create a key array with `size` elements of 0..array_len for all
867    // non-null value elements
868    let key_array: PrimitiveArray<K> = (0..values_array.len())
869        .map(|index| {
870            if values_array.is_valid(index) {
871                let native_index = K::Native::from_usize(index).ok_or_else(|| {
872                    DataFusionError::Internal(format!(
873                        "Can not create index of type {} from value {}",
874                        K::DATA_TYPE,
875                        index
876                    ))
877                })?;
878                Ok(Some(native_index))
879            } else {
880                Ok(None)
881            }
882        })
883        .collect::<Result<Vec<_>>>()?
884        .into_iter()
885        .collect();
886
887    // create a new DictionaryArray
888    //
889    // Note: this path could be made faster by using the ArrayData
890    // APIs and skipping validation, if it every comes up in
891    // performance traces.
892    let dict_array = DictionaryArray::<K>::try_new(key_array, values_array)?;
893    Ok(Arc::new(dict_array))
894}
895
896macro_rules! typed_cast_tz {
897    ($array:expr, $index:expr, $ARRAYTYPE:ident, $SCALAR:ident, $TZ:expr) => {{
898        use std::any::type_name;
899        let array = $array
900            .as_any()
901            .downcast_ref::<$ARRAYTYPE>()
902            .ok_or_else(|| {
903                DataFusionError::Internal(format!(
904                    "could not cast value to {}",
905                    type_name::<$ARRAYTYPE>()
906                ))
907            })?;
908        Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
909            match array.is_null($index) {
910                true => None,
911                false => Some(array.value($index).into()),
912            },
913            $TZ.clone(),
914        ))
915    }};
916}
917
918macro_rules! typed_cast {
919    ($array:expr, $index:expr, $ARRAYTYPE:ident, $SCALAR:ident) => {{
920        use std::any::type_name;
921        let array = $array
922            .as_any()
923            .downcast_ref::<$ARRAYTYPE>()
924            .ok_or_else(|| {
925                DataFusionError::Internal(format!(
926                    "could not cast value to {}",
927                    type_name::<$ARRAYTYPE>()
928                ))
929            })?;
930        Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
931            match array.is_null($index) {
932                true => None,
933                false => Some(array.value($index).into()),
934            },
935        ))
936    }};
937}
938
939macro_rules! build_array_from_option {
940    ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
941        match $EXPR {
942            Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
943            None => new_null_array(&DataType::$DATA_TYPE, $SIZE),
944        }
945    }};
946    ($DATA_TYPE:ident, $ENUM:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
947        match $EXPR {
948            Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
949            None => new_null_array(&DataType::$DATA_TYPE($ENUM), $SIZE),
950        }
951    }};
952}
953
954macro_rules! build_timestamp_array_from_option {
955    ($TIME_UNIT:expr, $TZ:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {
956        match $EXPR {
957            Some(value) => {
958                Arc::new($ARRAY_TYPE::from_value(*value, $SIZE).with_timezone_opt($TZ))
959            }
960            None => new_null_array(&DataType::Timestamp($TIME_UNIT, $TZ), $SIZE),
961        }
962    };
963}
964
965macro_rules! eq_array_primitive {
966    ($array:expr, $index:expr, $ARRAYTYPE:ident, $VALUE:expr) => {{
967        use std::any::type_name;
968        let array = $array
969            .as_any()
970            .downcast_ref::<$ARRAYTYPE>()
971            .ok_or_else(|| {
972                DataFusionError::Internal(format!(
973                    "could not cast value to {}",
974                    type_name::<$ARRAYTYPE>()
975                ))
976            })?;
977        let is_valid = array.is_valid($index);
978        Ok::<bool, DataFusionError>(match $VALUE {
979            Some(val) => is_valid && &array.value($index) == val,
980            None => !is_valid,
981        })
982    }};
983}
984
985impl ScalarValue {
986    /// Create a [`Result<ScalarValue>`] with the provided value and datatype
987    ///
988    /// # Panics
989    ///
990    /// Panics if d is not compatible with T
991    pub fn new_primitive<T: ArrowPrimitiveType>(
992        a: Option<T::Native>,
993        d: &DataType,
994    ) -> Result<Self> {
995        match a {
996            None => d.try_into(),
997            Some(v) => {
998                let array = PrimitiveArray::<T>::new(vec![v].into(), None)
999                    .with_data_type(d.clone());
1000                Self::try_from_array(&array, 0)
1001            }
1002        }
1003    }
1004
1005    /// Create a decimal Scalar from value/precision and scale.
1006    pub fn try_new_decimal128(value: i128, precision: u8, scale: i8) -> Result<Self> {
1007        // make sure the precision and scale is valid
1008        if precision <= DECIMAL128_MAX_PRECISION && scale.unsigned_abs() <= precision {
1009            return Ok(ScalarValue::Decimal128(Some(value), precision, scale));
1010        }
1011        _internal_err!(
1012            "Can not new a decimal type ScalarValue for precision {precision} and scale {scale}"
1013        )
1014    }
1015
1016    /// Create a Null instance of ScalarValue for this datatype
1017    ///
1018    /// Example
1019    /// ```
1020    /// use datafusion_common::ScalarValue;
1021    /// use arrow::datatypes::DataType;
1022    ///
1023    /// let scalar = ScalarValue::try_new_null(&DataType::Int32).unwrap();
1024    /// assert_eq!(scalar.is_null(), true);
1025    /// assert_eq!(scalar.data_type(), DataType::Int32);
1026    /// ```
1027    pub fn try_new_null(data_type: &DataType) -> Result<Self> {
1028        Ok(match data_type {
1029            DataType::Boolean => ScalarValue::Boolean(None),
1030            DataType::Float16 => ScalarValue::Float16(None),
1031            DataType::Float64 => ScalarValue::Float64(None),
1032            DataType::Float32 => ScalarValue::Float32(None),
1033            DataType::Int8 => ScalarValue::Int8(None),
1034            DataType::Int16 => ScalarValue::Int16(None),
1035            DataType::Int32 => ScalarValue::Int32(None),
1036            DataType::Int64 => ScalarValue::Int64(None),
1037            DataType::UInt8 => ScalarValue::UInt8(None),
1038            DataType::UInt16 => ScalarValue::UInt16(None),
1039            DataType::UInt32 => ScalarValue::UInt32(None),
1040            DataType::UInt64 => ScalarValue::UInt64(None),
1041            DataType::Decimal128(precision, scale) => {
1042                ScalarValue::Decimal128(None, *precision, *scale)
1043            }
1044            DataType::Decimal256(precision, scale) => {
1045                ScalarValue::Decimal256(None, *precision, *scale)
1046            }
1047            DataType::Utf8 => ScalarValue::Utf8(None),
1048            DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
1049            DataType::Utf8View => ScalarValue::Utf8View(None),
1050            DataType::Binary => ScalarValue::Binary(None),
1051            DataType::BinaryView => ScalarValue::BinaryView(None),
1052            DataType::FixedSizeBinary(len) => ScalarValue::FixedSizeBinary(*len, None),
1053            DataType::LargeBinary => ScalarValue::LargeBinary(None),
1054            DataType::Date32 => ScalarValue::Date32(None),
1055            DataType::Date64 => ScalarValue::Date64(None),
1056            DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(None),
1057            DataType::Time32(TimeUnit::Millisecond) => {
1058                ScalarValue::Time32Millisecond(None)
1059            }
1060            DataType::Time64(TimeUnit::Microsecond) => {
1061                ScalarValue::Time64Microsecond(None)
1062            }
1063            DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64Nanosecond(None),
1064            DataType::Timestamp(TimeUnit::Second, tz_opt) => {
1065                ScalarValue::TimestampSecond(None, tz_opt.clone())
1066            }
1067            DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => {
1068                ScalarValue::TimestampMillisecond(None, tz_opt.clone())
1069            }
1070            DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => {
1071                ScalarValue::TimestampMicrosecond(None, tz_opt.clone())
1072            }
1073            DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => {
1074                ScalarValue::TimestampNanosecond(None, tz_opt.clone())
1075            }
1076            DataType::Interval(IntervalUnit::YearMonth) => {
1077                ScalarValue::IntervalYearMonth(None)
1078            }
1079            DataType::Interval(IntervalUnit::DayTime) => {
1080                ScalarValue::IntervalDayTime(None)
1081            }
1082            DataType::Interval(IntervalUnit::MonthDayNano) => {
1083                ScalarValue::IntervalMonthDayNano(None)
1084            }
1085            DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None),
1086            DataType::Duration(TimeUnit::Millisecond) => {
1087                ScalarValue::DurationMillisecond(None)
1088            }
1089            DataType::Duration(TimeUnit::Microsecond) => {
1090                ScalarValue::DurationMicrosecond(None)
1091            }
1092            DataType::Duration(TimeUnit::Nanosecond) => {
1093                ScalarValue::DurationNanosecond(None)
1094            }
1095            DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary(
1096                index_type.clone(),
1097                Box::new(value_type.as_ref().try_into()?),
1098            ),
1099            // `ScalaValue::List` contains single element `ListArray`.
1100            DataType::List(field_ref) => ScalarValue::List(Arc::new(
1101                GenericListArray::new_null(Arc::clone(field_ref), 1),
1102            )),
1103            // `ScalarValue::LargeList` contains single element `LargeListArray`.
1104            DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new(
1105                GenericListArray::new_null(Arc::clone(field_ref), 1),
1106            )),
1107            // `ScalaValue::FixedSizeList` contains single element `FixedSizeList`.
1108            DataType::FixedSizeList(field_ref, fixed_length) => {
1109                ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null(
1110                    Arc::clone(field_ref),
1111                    *fixed_length,
1112                    1,
1113                )))
1114            }
1115            DataType::Struct(fields) => ScalarValue::Struct(
1116                new_null_array(&DataType::Struct(fields.to_owned()), 1)
1117                    .as_struct()
1118                    .to_owned()
1119                    .into(),
1120            ),
1121            DataType::Map(fields, sorted) => ScalarValue::Map(
1122                new_null_array(&DataType::Map(fields.to_owned(), sorted.to_owned()), 1)
1123                    .as_map()
1124                    .to_owned()
1125                    .into(),
1126            ),
1127            DataType::Union(fields, mode) => {
1128                ScalarValue::Union(None, fields.clone(), *mode)
1129            }
1130            DataType::Null => ScalarValue::Null,
1131            _ => {
1132                return _not_impl_err!(
1133                    "Can't create a null scalar from data_type \"{data_type:?}\""
1134                );
1135            }
1136        })
1137    }
1138
1139    /// Returns a [`ScalarValue::Utf8`] representing `val`
1140    pub fn new_utf8(val: impl Into<String>) -> Self {
1141        ScalarValue::from(val.into())
1142    }
1143
1144    /// Returns a [`ScalarValue::Utf8View`] representing `val`
1145    pub fn new_utf8view(val: impl Into<String>) -> Self {
1146        ScalarValue::Utf8View(Some(val.into()))
1147    }
1148
1149    /// Returns a [`ScalarValue::IntervalYearMonth`] representing
1150    /// `years` years and `months` months
1151    pub fn new_interval_ym(years: i32, months: i32) -> Self {
1152        let val = IntervalYearMonthType::make_value(years, months);
1153        ScalarValue::IntervalYearMonth(Some(val))
1154    }
1155
1156    /// Returns a [`ScalarValue::IntervalDayTime`] representing
1157    /// `days` days and `millis` milliseconds
1158    pub fn new_interval_dt(days: i32, millis: i32) -> Self {
1159        let val = IntervalDayTimeType::make_value(days, millis);
1160        Self::IntervalDayTime(Some(val))
1161    }
1162
1163    /// Returns a [`ScalarValue::IntervalMonthDayNano`] representing
1164    /// `months` months and `days` days, and `nanos` nanoseconds
1165    pub fn new_interval_mdn(months: i32, days: i32, nanos: i64) -> Self {
1166        let val = IntervalMonthDayNanoType::make_value(months, days, nanos);
1167        ScalarValue::IntervalMonthDayNano(Some(val))
1168    }
1169
1170    /// Returns a [`ScalarValue`] representing
1171    /// `value` and `tz_opt` timezone
1172    pub fn new_timestamp<T: ArrowTimestampType>(
1173        value: Option<i64>,
1174        tz_opt: Option<Arc<str>>,
1175    ) -> Self {
1176        match T::UNIT {
1177            TimeUnit::Second => ScalarValue::TimestampSecond(value, tz_opt),
1178            TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(value, tz_opt),
1179            TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(value, tz_opt),
1180            TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(value, tz_opt),
1181        }
1182    }
1183
1184    /// Returns a [`ScalarValue`] representing PI
1185    pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
1186        match datatype {
1187            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
1188            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
1189            _ => _internal_err!("PI is not supported for data type: {:?}", datatype),
1190        }
1191    }
1192
1193    /// Returns a [`ScalarValue`] representing PI's upper bound
1194    pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
1195        // TODO: replace the constants with next_up/next_down when
1196        // they are stabilized: https://doc.rust-lang.org/std/primitive.f64.html#method.next_up
1197        match datatype {
1198            DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
1199            DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
1200            _ => {
1201                _internal_err!("PI_UPPER is not supported for data type: {:?}", datatype)
1202            }
1203        }
1204    }
1205
1206    /// Returns a [`ScalarValue`] representing -PI's lower bound
1207    pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
1208        match datatype {
1209            DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
1210            DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
1211            _ => {
1212                _internal_err!("-PI_LOWER is not supported for data type: {:?}", datatype)
1213            }
1214        }
1215    }
1216
1217    /// Returns a [`ScalarValue`] representing FRAC_PI_2's upper bound
1218    pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
1219        match datatype {
1220            DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
1221            DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
1222            _ => {
1223                _internal_err!(
1224                    "PI_UPPER/2 is not supported for data type: {:?}",
1225                    datatype
1226                )
1227            }
1228        }
1229    }
1230
1231    // Returns a [`ScalarValue`] representing FRAC_PI_2's lower bound
1232    pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
1233        match datatype {
1234            DataType::Float32 => {
1235                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
1236            }
1237            DataType::Float64 => {
1238                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
1239            }
1240            _ => {
1241                _internal_err!(
1242                    "-PI/2_LOWER is not supported for data type: {:?}",
1243                    datatype
1244                )
1245            }
1246        }
1247    }
1248
1249    /// Returns a [`ScalarValue`] representing -PI
1250    pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
1251        match datatype {
1252            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
1253            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
1254            _ => _internal_err!("-PI is not supported for data type: {:?}", datatype),
1255        }
1256    }
1257
1258    /// Returns a [`ScalarValue`] representing PI/2
1259    pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1260        match datatype {
1261            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
1262            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
1263            _ => _internal_err!("PI/2 is not supported for data type: {:?}", datatype),
1264        }
1265    }
1266
1267    /// Returns a [`ScalarValue`] representing -PI/2
1268    pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1269        match datatype {
1270            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
1271            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
1272            _ => _internal_err!("-PI/2 is not supported for data type: {:?}", datatype),
1273        }
1274    }
1275
1276    /// Returns a [`ScalarValue`] representing infinity
1277    pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
1278        match datatype {
1279            DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
1280            DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
1281            _ => {
1282                _internal_err!("Infinity is not supported for data type: {:?}", datatype)
1283            }
1284        }
1285    }
1286
1287    /// Returns a [`ScalarValue`] representing negative infinity
1288    pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
1289        match datatype {
1290            DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
1291            DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
1292            _ => {
1293                _internal_err!(
1294                    "Negative Infinity is not supported for data type: {:?}",
1295                    datatype
1296                )
1297            }
1298        }
1299    }
1300
1301    /// Create a zero value in the given type.
1302    pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
1303        Ok(match datatype {
1304            DataType::Boolean => ScalarValue::Boolean(Some(false)),
1305            DataType::Int8 => ScalarValue::Int8(Some(0)),
1306            DataType::Int16 => ScalarValue::Int16(Some(0)),
1307            DataType::Int32 => ScalarValue::Int32(Some(0)),
1308            DataType::Int64 => ScalarValue::Int64(Some(0)),
1309            DataType::UInt8 => ScalarValue::UInt8(Some(0)),
1310            DataType::UInt16 => ScalarValue::UInt16(Some(0)),
1311            DataType::UInt32 => ScalarValue::UInt32(Some(0)),
1312            DataType::UInt64 => ScalarValue::UInt64(Some(0)),
1313            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(0.0))),
1314            DataType::Float32 => ScalarValue::Float32(Some(0.0)),
1315            DataType::Float64 => ScalarValue::Float64(Some(0.0)),
1316            DataType::Decimal128(precision, scale) => {
1317                ScalarValue::Decimal128(Some(0), *precision, *scale)
1318            }
1319            DataType::Decimal256(precision, scale) => {
1320                ScalarValue::Decimal256(Some(i256::ZERO), *precision, *scale)
1321            }
1322            DataType::Timestamp(TimeUnit::Second, tz) => {
1323                ScalarValue::TimestampSecond(Some(0), tz.clone())
1324            }
1325            DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1326                ScalarValue::TimestampMillisecond(Some(0), tz.clone())
1327            }
1328            DataType::Timestamp(TimeUnit::Microsecond, tz) => {
1329                ScalarValue::TimestampMicrosecond(Some(0), tz.clone())
1330            }
1331            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
1332                ScalarValue::TimestampNanosecond(Some(0), tz.clone())
1333            }
1334            DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(Some(0)),
1335            DataType::Time32(TimeUnit::Millisecond) => {
1336                ScalarValue::Time32Millisecond(Some(0))
1337            }
1338            DataType::Time64(TimeUnit::Microsecond) => {
1339                ScalarValue::Time64Microsecond(Some(0))
1340            }
1341            DataType::Time64(TimeUnit::Nanosecond) => {
1342                ScalarValue::Time64Nanosecond(Some(0))
1343            }
1344            DataType::Interval(IntervalUnit::YearMonth) => {
1345                ScalarValue::IntervalYearMonth(Some(0))
1346            }
1347            DataType::Interval(IntervalUnit::DayTime) => {
1348                ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
1349            }
1350            DataType::Interval(IntervalUnit::MonthDayNano) => {
1351                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
1352            }
1353            DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(Some(0)),
1354            DataType::Duration(TimeUnit::Millisecond) => {
1355                ScalarValue::DurationMillisecond(Some(0))
1356            }
1357            DataType::Duration(TimeUnit::Microsecond) => {
1358                ScalarValue::DurationMicrosecond(Some(0))
1359            }
1360            DataType::Duration(TimeUnit::Nanosecond) => {
1361                ScalarValue::DurationNanosecond(Some(0))
1362            }
1363            DataType::Date32 => ScalarValue::Date32(Some(0)),
1364            DataType::Date64 => ScalarValue::Date64(Some(0)),
1365            _ => {
1366                return _not_impl_err!(
1367                    "Can't create a zero scalar from data_type \"{datatype:?}\""
1368                );
1369            }
1370        })
1371    }
1372
1373    /// Create an one value in the given type.
1374    pub fn new_one(datatype: &DataType) -> Result<ScalarValue> {
1375        Ok(match datatype {
1376            DataType::Int8 => ScalarValue::Int8(Some(1)),
1377            DataType::Int16 => ScalarValue::Int16(Some(1)),
1378            DataType::Int32 => ScalarValue::Int32(Some(1)),
1379            DataType::Int64 => ScalarValue::Int64(Some(1)),
1380            DataType::UInt8 => ScalarValue::UInt8(Some(1)),
1381            DataType::UInt16 => ScalarValue::UInt16(Some(1)),
1382            DataType::UInt32 => ScalarValue::UInt32(Some(1)),
1383            DataType::UInt64 => ScalarValue::UInt64(Some(1)),
1384            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(1.0))),
1385            DataType::Float32 => ScalarValue::Float32(Some(1.0)),
1386            DataType::Float64 => ScalarValue::Float64(Some(1.0)),
1387            _ => {
1388                return _not_impl_err!(
1389                    "Can't create an one scalar from data_type \"{datatype:?}\""
1390                );
1391            }
1392        })
1393    }
1394
1395    /// Create a negative one value in the given type.
1396    pub fn new_negative_one(datatype: &DataType) -> Result<ScalarValue> {
1397        Ok(match datatype {
1398            DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)),
1399            DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)),
1400            DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)),
1401            DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)),
1402            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(-1.0))),
1403            DataType::Float32 => ScalarValue::Float32(Some(-1.0)),
1404            DataType::Float64 => ScalarValue::Float64(Some(-1.0)),
1405            _ => {
1406                return _not_impl_err!(
1407                    "Can't create a negative one scalar from data_type \"{datatype:?}\""
1408                );
1409            }
1410        })
1411    }
1412
1413    pub fn new_ten(datatype: &DataType) -> Result<ScalarValue> {
1414        Ok(match datatype {
1415            DataType::Int8 => ScalarValue::Int8(Some(10)),
1416            DataType::Int16 => ScalarValue::Int16(Some(10)),
1417            DataType::Int32 => ScalarValue::Int32(Some(10)),
1418            DataType::Int64 => ScalarValue::Int64(Some(10)),
1419            DataType::UInt8 => ScalarValue::UInt8(Some(10)),
1420            DataType::UInt16 => ScalarValue::UInt16(Some(10)),
1421            DataType::UInt32 => ScalarValue::UInt32(Some(10)),
1422            DataType::UInt64 => ScalarValue::UInt64(Some(10)),
1423            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(10.0))),
1424            DataType::Float32 => ScalarValue::Float32(Some(10.0)),
1425            DataType::Float64 => ScalarValue::Float64(Some(10.0)),
1426            _ => {
1427                return _not_impl_err!(
1428                    "Can't create a ten scalar from data_type \"{datatype:?}\""
1429                );
1430            }
1431        })
1432    }
1433
1434    /// return the [`DataType`] of this `ScalarValue`
1435    pub fn data_type(&self) -> DataType {
1436        match self {
1437            ScalarValue::Boolean(_) => DataType::Boolean,
1438            ScalarValue::UInt8(_) => DataType::UInt8,
1439            ScalarValue::UInt16(_) => DataType::UInt16,
1440            ScalarValue::UInt32(_) => DataType::UInt32,
1441            ScalarValue::UInt64(_) => DataType::UInt64,
1442            ScalarValue::Int8(_) => DataType::Int8,
1443            ScalarValue::Int16(_) => DataType::Int16,
1444            ScalarValue::Int32(_) => DataType::Int32,
1445            ScalarValue::Int64(_) => DataType::Int64,
1446            ScalarValue::Decimal128(_, precision, scale) => {
1447                DataType::Decimal128(*precision, *scale)
1448            }
1449            ScalarValue::Decimal256(_, precision, scale) => {
1450                DataType::Decimal256(*precision, *scale)
1451            }
1452            ScalarValue::TimestampSecond(_, tz_opt) => {
1453                DataType::Timestamp(TimeUnit::Second, tz_opt.clone())
1454            }
1455            ScalarValue::TimestampMillisecond(_, tz_opt) => {
1456                DataType::Timestamp(TimeUnit::Millisecond, tz_opt.clone())
1457            }
1458            ScalarValue::TimestampMicrosecond(_, tz_opt) => {
1459                DataType::Timestamp(TimeUnit::Microsecond, tz_opt.clone())
1460            }
1461            ScalarValue::TimestampNanosecond(_, tz_opt) => {
1462                DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1463            }
1464            ScalarValue::Float16(_) => DataType::Float16,
1465            ScalarValue::Float32(_) => DataType::Float32,
1466            ScalarValue::Float64(_) => DataType::Float64,
1467            ScalarValue::Utf8(_) => DataType::Utf8,
1468            ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
1469            ScalarValue::Utf8View(_) => DataType::Utf8View,
1470            ScalarValue::Binary(_) => DataType::Binary,
1471            ScalarValue::BinaryView(_) => DataType::BinaryView,
1472            ScalarValue::FixedSizeBinary(sz, _) => DataType::FixedSizeBinary(*sz),
1473            ScalarValue::LargeBinary(_) => DataType::LargeBinary,
1474            ScalarValue::List(arr) => arr.data_type().to_owned(),
1475            ScalarValue::LargeList(arr) => arr.data_type().to_owned(),
1476            ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(),
1477            ScalarValue::Struct(arr) => arr.data_type().to_owned(),
1478            ScalarValue::Map(arr) => arr.data_type().to_owned(),
1479            ScalarValue::Date32(_) => DataType::Date32,
1480            ScalarValue::Date64(_) => DataType::Date64,
1481            ScalarValue::Time32Second(_) => DataType::Time32(TimeUnit::Second),
1482            ScalarValue::Time32Millisecond(_) => DataType::Time32(TimeUnit::Millisecond),
1483            ScalarValue::Time64Microsecond(_) => DataType::Time64(TimeUnit::Microsecond),
1484            ScalarValue::Time64Nanosecond(_) => DataType::Time64(TimeUnit::Nanosecond),
1485            ScalarValue::IntervalYearMonth(_) => {
1486                DataType::Interval(IntervalUnit::YearMonth)
1487            }
1488            ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
1489            ScalarValue::IntervalMonthDayNano(_) => {
1490                DataType::Interval(IntervalUnit::MonthDayNano)
1491            }
1492            ScalarValue::DurationSecond(_) => DataType::Duration(TimeUnit::Second),
1493            ScalarValue::DurationMillisecond(_) => {
1494                DataType::Duration(TimeUnit::Millisecond)
1495            }
1496            ScalarValue::DurationMicrosecond(_) => {
1497                DataType::Duration(TimeUnit::Microsecond)
1498            }
1499            ScalarValue::DurationNanosecond(_) => {
1500                DataType::Duration(TimeUnit::Nanosecond)
1501            }
1502            ScalarValue::Union(_, fields, mode) => DataType::Union(fields.clone(), *mode),
1503            ScalarValue::Dictionary(k, v) => {
1504                DataType::Dictionary(k.clone(), Box::new(v.data_type()))
1505            }
1506            ScalarValue::Null => DataType::Null,
1507        }
1508    }
1509
1510    /// Calculate arithmetic negation for a scalar value
1511    pub fn arithmetic_negate(&self) -> Result<Self> {
1512        fn neg_checked_with_ctx<T: ArrowNativeTypeOp>(
1513            v: T,
1514            ctx: impl Fn() -> String,
1515        ) -> Result<T> {
1516            v.neg_checked()
1517                .map_err(|e| arrow_datafusion_err!(e).context(ctx()))
1518        }
1519        match self {
1520            ScalarValue::Int8(None)
1521            | ScalarValue::Int16(None)
1522            | ScalarValue::Int32(None)
1523            | ScalarValue::Int64(None)
1524            | ScalarValue::Float16(None)
1525            | ScalarValue::Float32(None)
1526            | ScalarValue::Float64(None) => Ok(self.clone()),
1527            ScalarValue::Float16(Some(v)) => {
1528                Ok(ScalarValue::Float16(Some(f16::from_f32(-v.to_f32()))))
1529            }
1530            ScalarValue::Float64(Some(v)) => Ok(ScalarValue::Float64(Some(-v))),
1531            ScalarValue::Float32(Some(v)) => Ok(ScalarValue::Float32(Some(-v))),
1532            ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(v.neg_checked()?))),
1533            ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(v.neg_checked()?))),
1534            ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(v.neg_checked()?))),
1535            ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(v.neg_checked()?))),
1536            ScalarValue::IntervalYearMonth(Some(v)) => Ok(
1537                ScalarValue::IntervalYearMonth(Some(neg_checked_with_ctx(*v, || {
1538                    format!("In negation of IntervalYearMonth({v})")
1539                })?)),
1540            ),
1541            ScalarValue::IntervalDayTime(Some(v)) => {
1542                let (days, ms) = IntervalDayTimeType::to_parts(*v);
1543                let val = IntervalDayTimeType::make_value(
1544                    neg_checked_with_ctx(days, || {
1545                        format!("In negation of days {days} in IntervalDayTime")
1546                    })?,
1547                    neg_checked_with_ctx(ms, || {
1548                        format!("In negation of milliseconds {ms} in IntervalDayTime")
1549                    })?,
1550                );
1551                Ok(ScalarValue::IntervalDayTime(Some(val)))
1552            }
1553            ScalarValue::IntervalMonthDayNano(Some(v)) => {
1554                let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
1555                let val = IntervalMonthDayNanoType::make_value(
1556                    neg_checked_with_ctx(months, || {
1557                        format!("In negation of months {months} of IntervalMonthDayNano")
1558                    })?,
1559                    neg_checked_with_ctx(days, || {
1560                        format!("In negation of days {days} of IntervalMonthDayNano")
1561                    })?,
1562                    neg_checked_with_ctx(nanos, || {
1563                        format!("In negation of nanos {nanos} of IntervalMonthDayNano")
1564                    })?,
1565                );
1566                Ok(ScalarValue::IntervalMonthDayNano(Some(val)))
1567            }
1568            ScalarValue::Decimal128(Some(v), precision, scale) => {
1569                Ok(ScalarValue::Decimal128(
1570                    Some(neg_checked_with_ctx(*v, || {
1571                        format!("In negation of Decimal128({v}, {precision}, {scale})")
1572                    })?),
1573                    *precision,
1574                    *scale,
1575                ))
1576            }
1577            ScalarValue::Decimal256(Some(v), precision, scale) => {
1578                Ok(ScalarValue::Decimal256(
1579                    Some(neg_checked_with_ctx(*v, || {
1580                        format!("In negation of Decimal256({v}, {precision}, {scale})")
1581                    })?),
1582                    *precision,
1583                    *scale,
1584                ))
1585            }
1586            ScalarValue::TimestampSecond(Some(v), tz) => {
1587                Ok(ScalarValue::TimestampSecond(
1588                    Some(neg_checked_with_ctx(*v, || {
1589                        format!("In negation of TimestampSecond({v})")
1590                    })?),
1591                    tz.clone(),
1592                ))
1593            }
1594            ScalarValue::TimestampNanosecond(Some(v), tz) => {
1595                Ok(ScalarValue::TimestampNanosecond(
1596                    Some(neg_checked_with_ctx(*v, || {
1597                        format!("In negation of TimestampNanoSecond({v})")
1598                    })?),
1599                    tz.clone(),
1600                ))
1601            }
1602            ScalarValue::TimestampMicrosecond(Some(v), tz) => {
1603                Ok(ScalarValue::TimestampMicrosecond(
1604                    Some(neg_checked_with_ctx(*v, || {
1605                        format!("In negation of TimestampMicroSecond({v})")
1606                    })?),
1607                    tz.clone(),
1608                ))
1609            }
1610            ScalarValue::TimestampMillisecond(Some(v), tz) => {
1611                Ok(ScalarValue::TimestampMillisecond(
1612                    Some(neg_checked_with_ctx(*v, || {
1613                        format!("In negation of TimestampMilliSecond({v})")
1614                    })?),
1615                    tz.clone(),
1616                ))
1617            }
1618            value => _internal_err!(
1619                "Can not run arithmetic negative on scalar value {value:?}"
1620            ),
1621        }
1622    }
1623
1624    /// Wrapping addition of `ScalarValue`
1625    ///
1626    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1627    /// should operate on Arrays directly, using vectorized array kernels
1628    pub fn add<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1629        let r = add_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1630        Self::try_from_array(r.as_ref(), 0)
1631    }
1632    /// Checked addition of `ScalarValue`
1633    ///
1634    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1635    /// should operate on Arrays directly, using vectorized array kernels
1636    pub fn add_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1637        let r = add(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1638        Self::try_from_array(r.as_ref(), 0)
1639    }
1640
1641    /// Wrapping subtraction of `ScalarValue`
1642    ///
1643    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1644    /// should operate on Arrays directly, using vectorized array kernels
1645    pub fn sub<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1646        let r = sub_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1647        Self::try_from_array(r.as_ref(), 0)
1648    }
1649
1650    /// Checked subtraction of `ScalarValue`
1651    ///
1652    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1653    /// should operate on Arrays directly, using vectorized array kernels
1654    pub fn sub_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1655        let r = sub(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1656        Self::try_from_array(r.as_ref(), 0)
1657    }
1658
1659    /// Wrapping multiplication of `ScalarValue`
1660    ///
1661    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1662    /// should operate on Arrays directly, using vectorized array kernels.
1663    pub fn mul<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1664        let r = mul_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1665        Self::try_from_array(r.as_ref(), 0)
1666    }
1667
1668    /// Checked multiplication of `ScalarValue`
1669    ///
1670    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1671    /// should operate on Arrays directly, using vectorized array kernels.
1672    pub fn mul_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1673        let r = mul(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1674        Self::try_from_array(r.as_ref(), 0)
1675    }
1676
1677    /// Performs `lhs / rhs`
1678    ///
1679    /// Overflow or division by zero will result in an error, with exception to
1680    /// floating point numbers, which instead follow the IEEE 754 rules.
1681    ///
1682    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1683    /// should operate on Arrays directly, using vectorized array kernels.
1684    pub fn div<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1685        let r = div(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1686        Self::try_from_array(r.as_ref(), 0)
1687    }
1688
1689    /// Performs `lhs % rhs`
1690    ///
1691    /// Overflow or division by zero will result in an error, with exception to
1692    /// floating point numbers, which instead follow the IEEE 754 rules.
1693    ///
1694    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1695    /// should operate on Arrays directly, using vectorized array kernels.
1696    pub fn rem<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1697        let r = rem(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1698        Self::try_from_array(r.as_ref(), 0)
1699    }
1700
1701    pub fn is_unsigned(&self) -> bool {
1702        matches!(
1703            self,
1704            ScalarValue::UInt8(_)
1705                | ScalarValue::UInt16(_)
1706                | ScalarValue::UInt32(_)
1707                | ScalarValue::UInt64(_)
1708        )
1709    }
1710
1711    /// whether this value is null or not.
1712    pub fn is_null(&self) -> bool {
1713        match self {
1714            ScalarValue::Boolean(v) => v.is_none(),
1715            ScalarValue::Null => true,
1716            ScalarValue::Float16(v) => v.is_none(),
1717            ScalarValue::Float32(v) => v.is_none(),
1718            ScalarValue::Float64(v) => v.is_none(),
1719            ScalarValue::Decimal128(v, _, _) => v.is_none(),
1720            ScalarValue::Decimal256(v, _, _) => v.is_none(),
1721            ScalarValue::Int8(v) => v.is_none(),
1722            ScalarValue::Int16(v) => v.is_none(),
1723            ScalarValue::Int32(v) => v.is_none(),
1724            ScalarValue::Int64(v) => v.is_none(),
1725            ScalarValue::UInt8(v) => v.is_none(),
1726            ScalarValue::UInt16(v) => v.is_none(),
1727            ScalarValue::UInt32(v) => v.is_none(),
1728            ScalarValue::UInt64(v) => v.is_none(),
1729            ScalarValue::Utf8(v)
1730            | ScalarValue::Utf8View(v)
1731            | ScalarValue::LargeUtf8(v) => v.is_none(),
1732            ScalarValue::Binary(v)
1733            | ScalarValue::BinaryView(v)
1734            | ScalarValue::FixedSizeBinary(_, v)
1735            | ScalarValue::LargeBinary(v) => v.is_none(),
1736            // arr.len() should be 1 for a list scalar, but we don't seem to
1737            // enforce that anywhere, so we still check against array length.
1738            ScalarValue::List(arr) => arr.len() == arr.null_count(),
1739            ScalarValue::LargeList(arr) => arr.len() == arr.null_count(),
1740            ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(),
1741            ScalarValue::Struct(arr) => arr.len() == arr.null_count(),
1742            ScalarValue::Map(arr) => arr.len() == arr.null_count(),
1743            ScalarValue::Date32(v) => v.is_none(),
1744            ScalarValue::Date64(v) => v.is_none(),
1745            ScalarValue::Time32Second(v) => v.is_none(),
1746            ScalarValue::Time32Millisecond(v) => v.is_none(),
1747            ScalarValue::Time64Microsecond(v) => v.is_none(),
1748            ScalarValue::Time64Nanosecond(v) => v.is_none(),
1749            ScalarValue::TimestampSecond(v, _) => v.is_none(),
1750            ScalarValue::TimestampMillisecond(v, _) => v.is_none(),
1751            ScalarValue::TimestampMicrosecond(v, _) => v.is_none(),
1752            ScalarValue::TimestampNanosecond(v, _) => v.is_none(),
1753            ScalarValue::IntervalYearMonth(v) => v.is_none(),
1754            ScalarValue::IntervalDayTime(v) => v.is_none(),
1755            ScalarValue::IntervalMonthDayNano(v) => v.is_none(),
1756            ScalarValue::DurationSecond(v) => v.is_none(),
1757            ScalarValue::DurationMillisecond(v) => v.is_none(),
1758            ScalarValue::DurationMicrosecond(v) => v.is_none(),
1759            ScalarValue::DurationNanosecond(v) => v.is_none(),
1760            ScalarValue::Union(v, _, _) => match v {
1761                Some((_, s)) => s.is_null(),
1762                None => true,
1763            },
1764            ScalarValue::Dictionary(_, v) => v.is_null(),
1765        }
1766    }
1767
1768    /// Absolute distance between two numeric values (of the same type). This method will return
1769    /// None if either one of the arguments are null. It might also return None if the resulting
1770    /// distance is greater than [`usize::MAX`]. If the type is a float, then the distance will be
1771    /// rounded to the nearest integer.
1772    ///
1773    ///
1774    /// Note: the datatype itself must support subtraction.
1775    pub fn distance(&self, other: &ScalarValue) -> Option<usize> {
1776        match (self, other) {
1777            (Self::Int8(Some(l)), Self::Int8(Some(r))) => Some(l.abs_diff(*r) as _),
1778            (Self::Int16(Some(l)), Self::Int16(Some(r))) => Some(l.abs_diff(*r) as _),
1779            (Self::Int32(Some(l)), Self::Int32(Some(r))) => Some(l.abs_diff(*r) as _),
1780            (Self::Int64(Some(l)), Self::Int64(Some(r))) => Some(l.abs_diff(*r) as _),
1781            (Self::UInt8(Some(l)), Self::UInt8(Some(r))) => Some(l.abs_diff(*r) as _),
1782            (Self::UInt16(Some(l)), Self::UInt16(Some(r))) => Some(l.abs_diff(*r) as _),
1783            (Self::UInt32(Some(l)), Self::UInt32(Some(r))) => Some(l.abs_diff(*r) as _),
1784            (Self::UInt64(Some(l)), Self::UInt64(Some(r))) => Some(l.abs_diff(*r) as _),
1785            // TODO: we might want to look into supporting ceil/floor here for floats.
1786            (Self::Float16(Some(l)), Self::Float16(Some(r))) => {
1787                Some((f16::to_f32(*l) - f16::to_f32(*r)).abs().round() as _)
1788            }
1789            (Self::Float32(Some(l)), Self::Float32(Some(r))) => {
1790                Some((l - r).abs().round() as _)
1791            }
1792            (Self::Float64(Some(l)), Self::Float64(Some(r))) => {
1793                Some((l - r).abs().round() as _)
1794            }
1795            _ => None,
1796        }
1797    }
1798
1799    /// Converts a scalar value into an 1-row array.
1800    ///
1801    /// # Errors
1802    ///
1803    /// Errors if the ScalarValue cannot be converted into a 1-row array
1804    pub fn to_array(&self) -> Result<ArrayRef> {
1805        self.to_array_of_size(1)
1806    }
1807
1808    /// Converts a scalar into an arrow [`Scalar`] (which implements
1809    /// the [`Datum`] interface).
1810    ///
1811    /// This can be used to call arrow compute kernels such as `lt`
1812    ///
1813    /// # Errors
1814    ///
1815    /// Errors if the ScalarValue cannot be converted into a 1-row array
1816    ///
1817    /// # Example
1818    /// ```
1819    /// use datafusion_common::ScalarValue;
1820    /// use arrow::array::{BooleanArray, Int32Array};
1821    ///
1822    /// let arr = Int32Array::from(vec![Some(1), None, Some(10)]);
1823    /// let five = ScalarValue::Int32(Some(5));
1824    ///
1825    /// let result = arrow::compute::kernels::cmp::lt(
1826    ///   &arr,
1827    ///   &five.to_scalar().unwrap(),
1828    /// ).unwrap();
1829    ///
1830    /// let expected = BooleanArray::from(vec![
1831    ///     Some(true),
1832    ///     None,
1833    ///     Some(false)
1834    ///   ]
1835    /// );
1836    ///
1837    /// assert_eq!(&result, &expected);
1838    /// ```
1839    /// [`Datum`]: arrow::array::Datum
1840    pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
1841        Ok(Scalar::new(self.to_array_of_size(1)?))
1842    }
1843
1844    /// Converts an iterator of references [`ScalarValue`] into an [`ArrayRef`]
1845    /// corresponding to those values. For example, an iterator of
1846    /// [`ScalarValue::Int32`] would be converted to an [`Int32Array`].
1847    ///
1848    /// Returns an error if the iterator is empty or if the
1849    /// [`ScalarValue`]s are not all the same type
1850    ///
1851    /// # Panics
1852    ///
1853    /// Panics if `self` is a dictionary with invalid key type
1854    ///
1855    /// # Example
1856    /// ```
1857    /// use datafusion_common::ScalarValue;
1858    /// use arrow::array::{ArrayRef, BooleanArray};
1859    ///
1860    /// let scalars = vec![
1861    ///   ScalarValue::Boolean(Some(true)),
1862    ///   ScalarValue::Boolean(None),
1863    ///   ScalarValue::Boolean(Some(false)),
1864    /// ];
1865    ///
1866    /// // Build an Array from the list of ScalarValues
1867    /// let array = ScalarValue::iter_to_array(scalars.into_iter())
1868    ///   .unwrap();
1869    ///
1870    /// let expected: ArrayRef = std::sync::Arc::new(
1871    ///   BooleanArray::from(vec![
1872    ///     Some(true),
1873    ///     None,
1874    ///     Some(false)
1875    ///   ]
1876    /// ));
1877    ///
1878    /// assert_eq!(&array, &expected);
1879    /// ```
1880    pub fn iter_to_array(
1881        scalars: impl IntoIterator<Item = ScalarValue>,
1882    ) -> Result<ArrayRef> {
1883        let mut scalars = scalars.into_iter().peekable();
1884
1885        // figure out the type based on the first element
1886        let data_type = match scalars.peek() {
1887            None => {
1888                return _exec_err!("Empty iterator passed to ScalarValue::iter_to_array");
1889            }
1890            Some(sv) => sv.data_type(),
1891        };
1892
1893        /// Creates an array of $ARRAY_TY by unpacking values of
1894        /// SCALAR_TY for primitive types
1895        macro_rules! build_array_primitive {
1896            ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
1897                {
1898                    let array = scalars.map(|sv| {
1899                        if let ScalarValue::$SCALAR_TY(v) = sv {
1900                            Ok(v)
1901                        } else {
1902                            _exec_err!(
1903                                "Inconsistent types in ScalarValue::iter_to_array. \
1904                                    Expected {:?}, got {:?}",
1905                                data_type, sv
1906                            )
1907                        }
1908                    })
1909                    .collect::<Result<$ARRAY_TY>>()?;
1910                    Arc::new(array)
1911                }
1912            }};
1913        }
1914
1915        macro_rules! build_array_primitive_tz {
1916            ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
1917                {
1918                    let array = scalars.map(|sv| {
1919                        if let ScalarValue::$SCALAR_TY(v, _) = sv {
1920                            Ok(v)
1921                        } else {
1922                            _exec_err!(
1923                                "Inconsistent types in ScalarValue::iter_to_array. \
1924                                    Expected {:?}, got {:?}",
1925                                data_type, sv
1926                            )
1927                        }
1928                    })
1929                    .collect::<Result<$ARRAY_TY>>()?;
1930                    Arc::new(array.with_timezone_opt($TZ.clone()))
1931                }
1932            }};
1933        }
1934
1935        /// Creates an array of $ARRAY_TY by unpacking values of
1936        /// SCALAR_TY for "string-like" types.
1937        macro_rules! build_array_string {
1938            ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
1939                {
1940                    let array = scalars.map(|sv| {
1941                        if let ScalarValue::$SCALAR_TY(v) = sv {
1942                            Ok(v)
1943                        } else {
1944                            _exec_err!(
1945                                "Inconsistent types in ScalarValue::iter_to_array. \
1946                                    Expected {:?}, got {:?}",
1947                                data_type, sv
1948                            )
1949                        }
1950                    })
1951                    .collect::<Result<$ARRAY_TY>>()?;
1952                    Arc::new(array)
1953                }
1954            }};
1955        }
1956
1957        let array: ArrayRef = match &data_type {
1958            DataType::Decimal128(precision, scale) => {
1959                let decimal_array =
1960                    ScalarValue::iter_to_decimal_array(scalars, *precision, *scale)?;
1961                Arc::new(decimal_array)
1962            }
1963            DataType::Decimal256(precision, scale) => {
1964                let decimal_array =
1965                    ScalarValue::iter_to_decimal256_array(scalars, *precision, *scale)?;
1966                Arc::new(decimal_array)
1967            }
1968            DataType::Null => ScalarValue::iter_to_null_array(scalars)?,
1969            DataType::Boolean => build_array_primitive!(BooleanArray, Boolean),
1970            DataType::Float16 => build_array_primitive!(Float16Array, Float16),
1971            DataType::Float32 => build_array_primitive!(Float32Array, Float32),
1972            DataType::Float64 => build_array_primitive!(Float64Array, Float64),
1973            DataType::Int8 => build_array_primitive!(Int8Array, Int8),
1974            DataType::Int16 => build_array_primitive!(Int16Array, Int16),
1975            DataType::Int32 => build_array_primitive!(Int32Array, Int32),
1976            DataType::Int64 => build_array_primitive!(Int64Array, Int64),
1977            DataType::UInt8 => build_array_primitive!(UInt8Array, UInt8),
1978            DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
1979            DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
1980            DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
1981            DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
1982            DataType::Utf8 => build_array_string!(StringArray, Utf8),
1983            DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
1984            DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView),
1985            DataType::Binary => build_array_string!(BinaryArray, Binary),
1986            DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
1987            DataType::Date32 => build_array_primitive!(Date32Array, Date32),
1988            DataType::Date64 => build_array_primitive!(Date64Array, Date64),
1989            DataType::Time32(TimeUnit::Second) => {
1990                build_array_primitive!(Time32SecondArray, Time32Second)
1991            }
1992            DataType::Time32(TimeUnit::Millisecond) => {
1993                build_array_primitive!(Time32MillisecondArray, Time32Millisecond)
1994            }
1995            DataType::Time64(TimeUnit::Microsecond) => {
1996                build_array_primitive!(Time64MicrosecondArray, Time64Microsecond)
1997            }
1998            DataType::Time64(TimeUnit::Nanosecond) => {
1999                build_array_primitive!(Time64NanosecondArray, Time64Nanosecond)
2000            }
2001            DataType::Timestamp(TimeUnit::Second, tz) => {
2002                build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz)
2003            }
2004            DataType::Timestamp(TimeUnit::Millisecond, tz) => {
2005                build_array_primitive_tz!(
2006                    TimestampMillisecondArray,
2007                    TimestampMillisecond,
2008                    tz
2009                )
2010            }
2011            DataType::Timestamp(TimeUnit::Microsecond, tz) => {
2012                build_array_primitive_tz!(
2013                    TimestampMicrosecondArray,
2014                    TimestampMicrosecond,
2015                    tz
2016                )
2017            }
2018            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
2019                build_array_primitive_tz!(
2020                    TimestampNanosecondArray,
2021                    TimestampNanosecond,
2022                    tz
2023                )
2024            }
2025            DataType::Duration(TimeUnit::Second) => {
2026                build_array_primitive!(DurationSecondArray, DurationSecond)
2027            }
2028            DataType::Duration(TimeUnit::Millisecond) => {
2029                build_array_primitive!(DurationMillisecondArray, DurationMillisecond)
2030            }
2031            DataType::Duration(TimeUnit::Microsecond) => {
2032                build_array_primitive!(DurationMicrosecondArray, DurationMicrosecond)
2033            }
2034            DataType::Duration(TimeUnit::Nanosecond) => {
2035                build_array_primitive!(DurationNanosecondArray, DurationNanosecond)
2036            }
2037            DataType::Interval(IntervalUnit::DayTime) => {
2038                build_array_primitive!(IntervalDayTimeArray, IntervalDayTime)
2039            }
2040            DataType::Interval(IntervalUnit::YearMonth) => {
2041                build_array_primitive!(IntervalYearMonthArray, IntervalYearMonth)
2042            }
2043            DataType::Interval(IntervalUnit::MonthDayNano) => {
2044                build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano)
2045            }
2046            DataType::FixedSizeList(_, _) => {
2047                // arrow::compute::concat does not allow inconsistent types including the size of FixedSizeList.
2048                // The length of nulls here we got is 1, so we need to resize the length of nulls to
2049                // the length of non-nulls.
2050                let mut arrays =
2051                    scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2052                let first_non_null_data_type = arrays
2053                    .iter()
2054                    .find(|sv| !sv.is_null(0))
2055                    .map(|sv| sv.data_type().to_owned());
2056                if let Some(DataType::FixedSizeList(f, l)) = first_non_null_data_type {
2057                    for array in arrays.iter_mut() {
2058                        if array.is_null(0) {
2059                            *array = Arc::new(FixedSizeListArray::new_null(
2060                                Arc::clone(&f),
2061                                l,
2062                                1,
2063                            ));
2064                        }
2065                    }
2066                }
2067                let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2068                arrow::compute::concat(arrays.as_slice())?
2069            }
2070            DataType::List(_)
2071            | DataType::LargeList(_)
2072            | DataType::Map(_, _)
2073            | DataType::Struct(_)
2074            | DataType::Union(_, _) => {
2075                let arrays = scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2076                let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2077                arrow::compute::concat(arrays.as_slice())?
2078            }
2079            DataType::Dictionary(key_type, value_type) => {
2080                // create the values array
2081                let value_scalars = scalars
2082                    .map(|scalar| match scalar {
2083                        ScalarValue::Dictionary(inner_key_type, scalar) => {
2084                            if &inner_key_type == key_type {
2085                                Ok(*scalar)
2086                            } else {
2087                                _exec_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})")
2088                            }
2089                        }
2090                        _ => {
2091                            _exec_err!(
2092                                "Expected scalar of type {value_type} but found: {scalar} {scalar:?}"
2093                            )
2094                        }
2095                    })
2096                    .collect::<Result<Vec<_>>>()?;
2097
2098                let values = Self::iter_to_array(value_scalars)?;
2099                assert_eq!(values.data_type(), value_type.as_ref());
2100
2101                match key_type.as_ref() {
2102                    DataType::Int8 => dict_from_values::<Int8Type>(values)?,
2103                    DataType::Int16 => dict_from_values::<Int16Type>(values)?,
2104                    DataType::Int32 => dict_from_values::<Int32Type>(values)?,
2105                    DataType::Int64 => dict_from_values::<Int64Type>(values)?,
2106                    DataType::UInt8 => dict_from_values::<UInt8Type>(values)?,
2107                    DataType::UInt16 => dict_from_values::<UInt16Type>(values)?,
2108                    DataType::UInt32 => dict_from_values::<UInt32Type>(values)?,
2109                    DataType::UInt64 => dict_from_values::<UInt64Type>(values)?,
2110                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2111                }
2112            }
2113            DataType::FixedSizeBinary(size) => {
2114                let array = scalars
2115                    .map(|sv| {
2116                        if let ScalarValue::FixedSizeBinary(_, v) = sv {
2117                            Ok(v)
2118                        } else {
2119                            _exec_err!(
2120                                "Inconsistent types in ScalarValue::iter_to_array. \
2121                                Expected {data_type:?}, got {sv:?}"
2122                            )
2123                        }
2124                    })
2125                    .collect::<Result<Vec<_>>>()?;
2126                let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2127                    array.into_iter(),
2128                    *size,
2129                )?;
2130                Arc::new(array)
2131            }
2132            // explicitly enumerate unsupported types so newly added
2133            // types must be aknowledged, Time32 and Time64 types are
2134            // not supported if the TimeUnit is not valid (Time32 can
2135            // only be used with Second and Millisecond, Time64 only
2136            // with Microsecond and Nanosecond)
2137            DataType::Time32(TimeUnit::Microsecond)
2138            | DataType::Time32(TimeUnit::Nanosecond)
2139            | DataType::Time64(TimeUnit::Second)
2140            | DataType::Time64(TimeUnit::Millisecond)
2141            | DataType::RunEndEncoded(_, _)
2142            | DataType::ListView(_)
2143            | DataType::LargeListView(_) => {
2144                return _not_impl_err!(
2145                    "Unsupported creation of {:?} array from ScalarValue {:?}",
2146                    data_type,
2147                    scalars.peek()
2148                );
2149            }
2150        };
2151        Ok(array)
2152    }
2153
2154    fn iter_to_null_array(
2155        scalars: impl IntoIterator<Item = ScalarValue>,
2156    ) -> Result<ArrayRef> {
2157        let length = scalars.into_iter().try_fold(
2158            0usize,
2159            |r, element: ScalarValue| match element {
2160                ScalarValue::Null => Ok::<usize, DataFusionError>(r + 1),
2161                s => {
2162                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2163                }
2164            },
2165        )?;
2166        Ok(new_null_array(&DataType::Null, length))
2167    }
2168
2169    fn iter_to_decimal_array(
2170        scalars: impl IntoIterator<Item = ScalarValue>,
2171        precision: u8,
2172        scale: i8,
2173    ) -> Result<Decimal128Array> {
2174        let array = scalars
2175            .into_iter()
2176            .map(|element: ScalarValue| match element {
2177                ScalarValue::Decimal128(v1, _, _) => Ok(v1),
2178                s => {
2179                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2180                }
2181            })
2182            .collect::<Result<Decimal128Array>>()?
2183            .with_precision_and_scale(precision, scale)?;
2184        Ok(array)
2185    }
2186
2187    fn iter_to_decimal256_array(
2188        scalars: impl IntoIterator<Item = ScalarValue>,
2189        precision: u8,
2190        scale: i8,
2191    ) -> Result<Decimal256Array> {
2192        let array = scalars
2193            .into_iter()
2194            .map(|element: ScalarValue| match element {
2195                ScalarValue::Decimal256(v1, _, _) => Ok(v1),
2196                s => {
2197                    _internal_err!(
2198                        "Expected ScalarValue::Decimal256 element. Received {s:?}"
2199                    )
2200                }
2201            })
2202            .collect::<Result<Decimal256Array>>()?
2203            .with_precision_and_scale(precision, scale)?;
2204        Ok(array)
2205    }
2206
2207    fn build_decimal_array(
2208        value: Option<i128>,
2209        precision: u8,
2210        scale: i8,
2211        size: usize,
2212    ) -> Result<Decimal128Array> {
2213        Ok(match value {
2214            Some(val) => Decimal128Array::from(vec![val; size])
2215                .with_precision_and_scale(precision, scale)?,
2216            None => {
2217                let mut builder = Decimal128Array::builder(size)
2218                    .with_precision_and_scale(precision, scale)?;
2219                builder.append_nulls(size);
2220                builder.finish()
2221            }
2222        })
2223    }
2224
2225    fn build_decimal256_array(
2226        value: Option<i256>,
2227        precision: u8,
2228        scale: i8,
2229        size: usize,
2230    ) -> Result<Decimal256Array> {
2231        Ok(repeat_n(value, size)
2232            .collect::<Decimal256Array>()
2233            .with_precision_and_scale(precision, scale)?)
2234    }
2235
2236    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
2237    /// `data_type`, to a single element [`ListArray`].
2238    ///
2239    /// Example
2240    /// ```
2241    /// use datafusion_common::ScalarValue;
2242    /// use arrow::array::{ListArray, Int32Array};
2243    /// use arrow::datatypes::{DataType, Int32Type};
2244    /// use datafusion_common::cast::as_list_array;
2245    ///
2246    /// let scalars = vec![
2247    ///    ScalarValue::Int32(Some(1)),
2248    ///    ScalarValue::Int32(None),
2249    ///    ScalarValue::Int32(Some(2))
2250    /// ];
2251    ///
2252    /// let result = ScalarValue::new_list(&scalars, &DataType::Int32, true);
2253    ///
2254    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(
2255    ///     vec![
2256    ///        Some(vec![Some(1), None, Some(2)])
2257    ///     ]);
2258    ///
2259    /// assert_eq!(*result, expected);
2260    /// ```
2261    pub fn new_list(
2262        values: &[ScalarValue],
2263        data_type: &DataType,
2264        nullable: bool,
2265    ) -> Arc<ListArray> {
2266        let values = if values.is_empty() {
2267            new_empty_array(data_type)
2268        } else {
2269            Self::iter_to_array(values.iter().cloned()).unwrap()
2270        };
2271        Arc::new(
2272            SingleRowListArrayBuilder::new(values)
2273                .with_nullable(nullable)
2274                .build_list_array(),
2275        )
2276    }
2277
2278    /// Same as [`ScalarValue::new_list`] but with nullable set to true.
2279    pub fn new_list_nullable(
2280        values: &[ScalarValue],
2281        data_type: &DataType,
2282    ) -> Arc<ListArray> {
2283        Self::new_list(values, data_type, true)
2284    }
2285
2286    /// Create ListArray with Null with specific data type
2287    ///
2288    /// - new_null_list(i32, nullable, 1): `ListArray[NULL]`
2289    pub fn new_null_list(data_type: DataType, nullable: bool, null_len: usize) -> Self {
2290        let data_type = DataType::List(Field::new_list_field(data_type, nullable).into());
2291        Self::List(Arc::new(ListArray::from(ArrayData::new_null(
2292            &data_type, null_len,
2293        ))))
2294    }
2295
2296    /// Converts `IntoIterator<Item = ScalarValue>` where each element has type corresponding to
2297    /// `data_type`, to a [`ListArray`].
2298    ///
2299    /// Example
2300    /// ```
2301    /// use datafusion_common::ScalarValue;
2302    /// use arrow::array::{ListArray, Int32Array};
2303    /// use arrow::datatypes::{DataType, Int32Type};
2304    /// use datafusion_common::cast::as_list_array;
2305    ///
2306    /// let scalars = vec![
2307    ///    ScalarValue::Int32(Some(1)),
2308    ///    ScalarValue::Int32(None),
2309    ///    ScalarValue::Int32(Some(2))
2310    /// ];
2311    ///
2312    /// let result = ScalarValue::new_list_from_iter(scalars.into_iter(), &DataType::Int32, true);
2313    ///
2314    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(
2315    ///     vec![
2316    ///        Some(vec![Some(1), None, Some(2)])
2317    ///     ]);
2318    ///
2319    /// assert_eq!(*result, expected);
2320    /// ```
2321    pub fn new_list_from_iter(
2322        values: impl IntoIterator<Item = ScalarValue> + ExactSizeIterator,
2323        data_type: &DataType,
2324        nullable: bool,
2325    ) -> Arc<ListArray> {
2326        let values = if values.len() == 0 {
2327            new_empty_array(data_type)
2328        } else {
2329            Self::iter_to_array(values).unwrap()
2330        };
2331        Arc::new(
2332            SingleRowListArrayBuilder::new(values)
2333                .with_nullable(nullable)
2334                .build_list_array(),
2335        )
2336    }
2337
2338    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
2339    /// `data_type`, to a [`LargeListArray`].
2340    ///
2341    /// Example
2342    /// ```
2343    /// use datafusion_common::ScalarValue;
2344    /// use arrow::array::{LargeListArray, Int32Array};
2345    /// use arrow::datatypes::{DataType, Int32Type};
2346    /// use datafusion_common::cast::as_large_list_array;
2347    ///
2348    /// let scalars = vec![
2349    ///    ScalarValue::Int32(Some(1)),
2350    ///    ScalarValue::Int32(None),
2351    ///    ScalarValue::Int32(Some(2))
2352    /// ];
2353    ///
2354    /// let result = ScalarValue::new_large_list(&scalars, &DataType::Int32);
2355    ///
2356    /// let expected = LargeListArray::from_iter_primitive::<Int32Type, _, _>(
2357    ///     vec![
2358    ///        Some(vec![Some(1), None, Some(2)])
2359    ///     ]);
2360    ///
2361    /// assert_eq!(*result, expected);
2362    /// ```
2363    pub fn new_large_list(
2364        values: &[ScalarValue],
2365        data_type: &DataType,
2366    ) -> Arc<LargeListArray> {
2367        let values = if values.is_empty() {
2368            new_empty_array(data_type)
2369        } else {
2370            Self::iter_to_array(values.iter().cloned()).unwrap()
2371        };
2372        Arc::new(SingleRowListArrayBuilder::new(values).build_large_list_array())
2373    }
2374
2375    /// Converts a scalar value into an array of `size` rows.
2376    ///
2377    /// # Errors
2378    ///
2379    /// Errors if `self` is
2380    /// - a decimal that fails be converted to a decimal array of size
2381    /// - a `FixedsizeList` that fails to be concatenated into an array of size
2382    /// - a `List` that fails to be concatenated into an array of size
2383    /// - a `Dictionary` that fails be converted to a dictionary array of size
2384    pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
2385        Ok(match self {
2386            ScalarValue::Decimal128(e, precision, scale) => Arc::new(
2387                ScalarValue::build_decimal_array(*e, *precision, *scale, size)?,
2388            ),
2389            ScalarValue::Decimal256(e, precision, scale) => Arc::new(
2390                ScalarValue::build_decimal256_array(*e, *precision, *scale, size)?,
2391            ),
2392            ScalarValue::Boolean(e) => {
2393                Arc::new(BooleanArray::from(vec![*e; size])) as ArrayRef
2394            }
2395            ScalarValue::Float64(e) => {
2396                build_array_from_option!(Float64, Float64Array, e, size)
2397            }
2398            ScalarValue::Float32(e) => {
2399                build_array_from_option!(Float32, Float32Array, e, size)
2400            }
2401            ScalarValue::Float16(e) => {
2402                build_array_from_option!(Float16, Float16Array, e, size)
2403            }
2404            ScalarValue::Int8(e) => build_array_from_option!(Int8, Int8Array, e, size),
2405            ScalarValue::Int16(e) => build_array_from_option!(Int16, Int16Array, e, size),
2406            ScalarValue::Int32(e) => build_array_from_option!(Int32, Int32Array, e, size),
2407            ScalarValue::Int64(e) => build_array_from_option!(Int64, Int64Array, e, size),
2408            ScalarValue::UInt8(e) => build_array_from_option!(UInt8, UInt8Array, e, size),
2409            ScalarValue::UInt16(e) => {
2410                build_array_from_option!(UInt16, UInt16Array, e, size)
2411            }
2412            ScalarValue::UInt32(e) => {
2413                build_array_from_option!(UInt32, UInt32Array, e, size)
2414            }
2415            ScalarValue::UInt64(e) => {
2416                build_array_from_option!(UInt64, UInt64Array, e, size)
2417            }
2418            ScalarValue::TimestampSecond(e, tz_opt) => {
2419                build_timestamp_array_from_option!(
2420                    TimeUnit::Second,
2421                    tz_opt.clone(),
2422                    TimestampSecondArray,
2423                    e,
2424                    size
2425                )
2426            }
2427            ScalarValue::TimestampMillisecond(e, tz_opt) => {
2428                build_timestamp_array_from_option!(
2429                    TimeUnit::Millisecond,
2430                    tz_opt.clone(),
2431                    TimestampMillisecondArray,
2432                    e,
2433                    size
2434                )
2435            }
2436
2437            ScalarValue::TimestampMicrosecond(e, tz_opt) => {
2438                build_timestamp_array_from_option!(
2439                    TimeUnit::Microsecond,
2440                    tz_opt.clone(),
2441                    TimestampMicrosecondArray,
2442                    e,
2443                    size
2444                )
2445            }
2446            ScalarValue::TimestampNanosecond(e, tz_opt) => {
2447                build_timestamp_array_from_option!(
2448                    TimeUnit::Nanosecond,
2449                    tz_opt.clone(),
2450                    TimestampNanosecondArray,
2451                    e,
2452                    size
2453                )
2454            }
2455            ScalarValue::Utf8(e) => match e {
2456                Some(value) => {
2457                    Arc::new(StringArray::from_iter_values(repeat_n(value, size)))
2458                }
2459                None => new_null_array(&DataType::Utf8, size),
2460            },
2461            ScalarValue::Utf8View(e) => match e {
2462                Some(value) => {
2463                    Arc::new(StringViewArray::from_iter_values(repeat_n(value, size)))
2464                }
2465                None => new_null_array(&DataType::Utf8View, size),
2466            },
2467            ScalarValue::LargeUtf8(e) => match e {
2468                Some(value) => {
2469                    Arc::new(LargeStringArray::from_iter_values(repeat_n(value, size)))
2470                }
2471                None => new_null_array(&DataType::LargeUtf8, size),
2472            },
2473            ScalarValue::Binary(e) => match e {
2474                Some(value) => Arc::new(
2475                    repeat_n(Some(value.as_slice()), size).collect::<BinaryArray>(),
2476                ),
2477                None => Arc::new(repeat_n(None::<&str>, size).collect::<BinaryArray>()),
2478            },
2479            ScalarValue::BinaryView(e) => match e {
2480                Some(value) => Arc::new(
2481                    repeat_n(Some(value.as_slice()), size).collect::<BinaryViewArray>(),
2482                ),
2483                None => {
2484                    Arc::new(repeat_n(None::<&str>, size).collect::<BinaryViewArray>())
2485                }
2486            },
2487            ScalarValue::FixedSizeBinary(s, e) => match e {
2488                Some(value) => Arc::new(
2489                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2490                        repeat_n(Some(value.as_slice()), size),
2491                        *s,
2492                    )
2493                    .unwrap(),
2494                ),
2495                None => Arc::new(
2496                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2497                        repeat_n(None::<&[u8]>, size),
2498                        *s,
2499                    )
2500                    .unwrap(),
2501                ),
2502            },
2503            ScalarValue::LargeBinary(e) => match e {
2504                Some(value) => Arc::new(
2505                    repeat_n(Some(value.as_slice()), size).collect::<LargeBinaryArray>(),
2506                ),
2507                None => {
2508                    Arc::new(repeat_n(None::<&str>, size).collect::<LargeBinaryArray>())
2509                }
2510            },
2511            ScalarValue::List(arr) => {
2512                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2513            }
2514            ScalarValue::LargeList(arr) => {
2515                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2516            }
2517            ScalarValue::FixedSizeList(arr) => {
2518                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2519            }
2520            ScalarValue::Struct(arr) => {
2521                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2522            }
2523            ScalarValue::Map(arr) => {
2524                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2525            }
2526            ScalarValue::Date32(e) => {
2527                build_array_from_option!(Date32, Date32Array, e, size)
2528            }
2529            ScalarValue::Date64(e) => {
2530                build_array_from_option!(Date64, Date64Array, e, size)
2531            }
2532            ScalarValue::Time32Second(e) => {
2533                build_array_from_option!(
2534                    Time32,
2535                    TimeUnit::Second,
2536                    Time32SecondArray,
2537                    e,
2538                    size
2539                )
2540            }
2541            ScalarValue::Time32Millisecond(e) => {
2542                build_array_from_option!(
2543                    Time32,
2544                    TimeUnit::Millisecond,
2545                    Time32MillisecondArray,
2546                    e,
2547                    size
2548                )
2549            }
2550            ScalarValue::Time64Microsecond(e) => {
2551                build_array_from_option!(
2552                    Time64,
2553                    TimeUnit::Microsecond,
2554                    Time64MicrosecondArray,
2555                    e,
2556                    size
2557                )
2558            }
2559            ScalarValue::Time64Nanosecond(e) => {
2560                build_array_from_option!(
2561                    Time64,
2562                    TimeUnit::Nanosecond,
2563                    Time64NanosecondArray,
2564                    e,
2565                    size
2566                )
2567            }
2568            ScalarValue::IntervalDayTime(e) => build_array_from_option!(
2569                Interval,
2570                IntervalUnit::DayTime,
2571                IntervalDayTimeArray,
2572                e,
2573                size
2574            ),
2575            ScalarValue::IntervalYearMonth(e) => build_array_from_option!(
2576                Interval,
2577                IntervalUnit::YearMonth,
2578                IntervalYearMonthArray,
2579                e,
2580                size
2581            ),
2582            ScalarValue::IntervalMonthDayNano(e) => build_array_from_option!(
2583                Interval,
2584                IntervalUnit::MonthDayNano,
2585                IntervalMonthDayNanoArray,
2586                e,
2587                size
2588            ),
2589            ScalarValue::DurationSecond(e) => build_array_from_option!(
2590                Duration,
2591                TimeUnit::Second,
2592                DurationSecondArray,
2593                e,
2594                size
2595            ),
2596            ScalarValue::DurationMillisecond(e) => build_array_from_option!(
2597                Duration,
2598                TimeUnit::Millisecond,
2599                DurationMillisecondArray,
2600                e,
2601                size
2602            ),
2603            ScalarValue::DurationMicrosecond(e) => build_array_from_option!(
2604                Duration,
2605                TimeUnit::Microsecond,
2606                DurationMicrosecondArray,
2607                e,
2608                size
2609            ),
2610            ScalarValue::DurationNanosecond(e) => build_array_from_option!(
2611                Duration,
2612                TimeUnit::Nanosecond,
2613                DurationNanosecondArray,
2614                e,
2615                size
2616            ),
2617            ScalarValue::Union(value, fields, mode) => match value {
2618                Some((v_id, value)) => {
2619                    let mut new_fields = Vec::with_capacity(fields.len());
2620                    let mut child_arrays = Vec::<ArrayRef>::with_capacity(fields.len());
2621                    for (f_id, field) in fields.iter() {
2622                        let ar = if f_id == *v_id {
2623                            value.to_array_of_size(size)?
2624                        } else {
2625                            let dt = field.data_type();
2626                            match mode {
2627                                UnionMode::Sparse => new_null_array(dt, size),
2628                                // In a dense union, only the child with values needs to be
2629                                // allocated
2630                                UnionMode::Dense => new_null_array(dt, 0),
2631                            }
2632                        };
2633                        let field = (**field).clone();
2634                        child_arrays.push(ar);
2635                        new_fields.push(field.clone());
2636                    }
2637                    let type_ids = repeat_n(*v_id, size);
2638                    let type_ids = ScalarBuffer::<i8>::from_iter(type_ids);
2639                    let value_offsets = match mode {
2640                        UnionMode::Sparse => None,
2641                        UnionMode::Dense => Some(ScalarBuffer::from_iter(0..size as i32)),
2642                    };
2643                    let ar = UnionArray::try_new(
2644                        fields.clone(),
2645                        type_ids,
2646                        value_offsets,
2647                        child_arrays,
2648                    )
2649                    .map_err(|e| DataFusionError::ArrowError(e, None))?;
2650                    Arc::new(ar)
2651                }
2652                None => {
2653                    let dt = self.data_type();
2654                    new_null_array(&dt, size)
2655                }
2656            },
2657            ScalarValue::Dictionary(key_type, v) => {
2658                // values array is one element long (the value)
2659                match key_type.as_ref() {
2660                    DataType::Int8 => dict_from_scalar::<Int8Type>(v, size)?,
2661                    DataType::Int16 => dict_from_scalar::<Int16Type>(v, size)?,
2662                    DataType::Int32 => dict_from_scalar::<Int32Type>(v, size)?,
2663                    DataType::Int64 => dict_from_scalar::<Int64Type>(v, size)?,
2664                    DataType::UInt8 => dict_from_scalar::<UInt8Type>(v, size)?,
2665                    DataType::UInt16 => dict_from_scalar::<UInt16Type>(v, size)?,
2666                    DataType::UInt32 => dict_from_scalar::<UInt32Type>(v, size)?,
2667                    DataType::UInt64 => dict_from_scalar::<UInt64Type>(v, size)?,
2668                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2669                }
2670            }
2671            ScalarValue::Null => new_null_array(&DataType::Null, size),
2672        })
2673    }
2674
2675    fn get_decimal_value_from_array(
2676        array: &dyn Array,
2677        index: usize,
2678        precision: u8,
2679        scale: i8,
2680    ) -> Result<ScalarValue> {
2681        match array.data_type() {
2682            DataType::Decimal128(_, _) => {
2683                let array = as_decimal128_array(array)?;
2684                if array.is_null(index) {
2685                    Ok(ScalarValue::Decimal128(None, precision, scale))
2686                } else {
2687                    let value = array.value(index);
2688                    Ok(ScalarValue::Decimal128(Some(value), precision, scale))
2689                }
2690            }
2691            DataType::Decimal256(_, _) => {
2692                let array = as_decimal256_array(array)?;
2693                if array.is_null(index) {
2694                    Ok(ScalarValue::Decimal256(None, precision, scale))
2695                } else {
2696                    let value = array.value(index);
2697                    Ok(ScalarValue::Decimal256(Some(value), precision, scale))
2698                }
2699            }
2700            _ => _internal_err!("Unsupported decimal type"),
2701        }
2702    }
2703
2704    fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
2705        let arrays = repeat_n(arr, size).collect::<Vec<_>>();
2706        let ret = match !arrays.is_empty() {
2707            true => arrow::compute::concat(arrays.as_slice())?,
2708            false => arr.slice(0, 0),
2709        };
2710        Ok(ret)
2711    }
2712
2713    /// Retrieve ScalarValue for each row in `array`
2714    ///
2715    /// Example 1: Array (ScalarValue::Int32)
2716    /// ```
2717    /// use datafusion_common::ScalarValue;
2718    /// use arrow::array::ListArray;
2719    /// use arrow::datatypes::{DataType, Int32Type};
2720    ///
2721    /// // Equivalent to [[1,2,3], [4,5]]
2722    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2723    ///    Some(vec![Some(1), Some(2), Some(3)]),
2724    ///    Some(vec![Some(4), Some(5)])
2725    /// ]);
2726    ///
2727    /// // Convert the array into Scalar Values for each row
2728    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
2729    ///
2730    /// let expected = vec![
2731    /// vec![
2732    ///     ScalarValue::Int32(Some(1)),
2733    ///     ScalarValue::Int32(Some(2)),
2734    ///     ScalarValue::Int32(Some(3)),
2735    /// ],
2736    /// vec![
2737    ///    ScalarValue::Int32(Some(4)),
2738    ///    ScalarValue::Int32(Some(5)),
2739    /// ],
2740    /// ];
2741    ///
2742    /// assert_eq!(scalar_vec, expected);
2743    /// ```
2744    ///
2745    /// Example 2: Nested array (ScalarValue::List)
2746    /// ```
2747    /// use datafusion_common::ScalarValue;
2748    /// use arrow::array::ListArray;
2749    /// use arrow::datatypes::{DataType, Int32Type};
2750    /// use datafusion_common::utils::SingleRowListArrayBuilder;
2751    /// use std::sync::Arc;
2752    ///
2753    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2754    ///    Some(vec![Some(1), Some(2), Some(3)]),
2755    ///    Some(vec![Some(4), Some(5)])
2756    /// ]);
2757    ///
2758    /// // Wrap into another layer of list, we got nested array as [ [[1,2,3], [4,5]] ]
2759    /// let list_arr = SingleRowListArrayBuilder::new(Arc::new(list_arr)).build_list_array();
2760    ///
2761    /// // Convert the array into Scalar Values for each row, we got 1D arrays in this example
2762    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
2763    ///
2764    /// let l1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2765    ///     Some(vec![Some(1), Some(2), Some(3)]),
2766    /// ]);
2767    /// let l2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2768    ///     Some(vec![Some(4), Some(5)]),
2769    /// ]);
2770    ///
2771    /// let expected = vec![
2772    ///   vec![
2773    ///     ScalarValue::List(Arc::new(l1)),
2774    ///     ScalarValue::List(Arc::new(l2)),
2775    ///   ],
2776    /// ];
2777    ///
2778    /// assert_eq!(scalar_vec, expected);
2779    /// ```
2780    pub fn convert_array_to_scalar_vec(array: &dyn Array) -> Result<Vec<Vec<Self>>> {
2781        let mut scalars = Vec::with_capacity(array.len());
2782
2783        for index in 0..array.len() {
2784            let nested_array = array.as_list::<i32>().value(index);
2785            let scalar_values = (0..nested_array.len())
2786                .map(|i| ScalarValue::try_from_array(&nested_array, i))
2787                .collect::<Result<Vec<_>>>()?;
2788            scalars.push(scalar_values);
2789        }
2790
2791        Ok(scalars)
2792    }
2793
2794    #[deprecated(
2795        since = "46.0.0",
2796        note = "This function is obsolete. Use `to_array` instead"
2797    )]
2798    pub fn raw_data(&self) -> Result<ArrayRef> {
2799        match self {
2800            ScalarValue::List(arr) => Ok(arr.to_owned()),
2801            _ => _internal_err!("ScalarValue is not a list"),
2802        }
2803    }
2804
2805    /// Converts a value in `array` at `index` into a ScalarValue
2806    pub fn try_from_array(array: &dyn Array, index: usize) -> Result<Self> {
2807        // handle NULL value
2808        if !array.is_valid(index) {
2809            return array.data_type().try_into();
2810        }
2811
2812        Ok(match array.data_type() {
2813            DataType::Null => ScalarValue::Null,
2814            DataType::Decimal128(precision, scale) => {
2815                ScalarValue::get_decimal_value_from_array(
2816                    array, index, *precision, *scale,
2817                )?
2818            }
2819            DataType::Decimal256(precision, scale) => {
2820                ScalarValue::get_decimal_value_from_array(
2821                    array, index, *precision, *scale,
2822                )?
2823            }
2824            DataType::Boolean => typed_cast!(array, index, BooleanArray, Boolean)?,
2825            DataType::Float64 => typed_cast!(array, index, Float64Array, Float64)?,
2826            DataType::Float32 => typed_cast!(array, index, Float32Array, Float32)?,
2827            DataType::Float16 => typed_cast!(array, index, Float16Array, Float16)?,
2828            DataType::UInt64 => typed_cast!(array, index, UInt64Array, UInt64)?,
2829            DataType::UInt32 => typed_cast!(array, index, UInt32Array, UInt32)?,
2830            DataType::UInt16 => typed_cast!(array, index, UInt16Array, UInt16)?,
2831            DataType::UInt8 => typed_cast!(array, index, UInt8Array, UInt8)?,
2832            DataType::Int64 => typed_cast!(array, index, Int64Array, Int64)?,
2833            DataType::Int32 => typed_cast!(array, index, Int32Array, Int32)?,
2834            DataType::Int16 => typed_cast!(array, index, Int16Array, Int16)?,
2835            DataType::Int8 => typed_cast!(array, index, Int8Array, Int8)?,
2836            DataType::Binary => typed_cast!(array, index, BinaryArray, Binary)?,
2837            DataType::LargeBinary => {
2838                typed_cast!(array, index, LargeBinaryArray, LargeBinary)?
2839            }
2840            DataType::BinaryView => {
2841                typed_cast!(array, index, BinaryViewArray, BinaryView)?
2842            }
2843            DataType::Utf8 => typed_cast!(array, index, StringArray, Utf8)?,
2844            DataType::LargeUtf8 => {
2845                typed_cast!(array, index, LargeStringArray, LargeUtf8)?
2846            }
2847            DataType::Utf8View => typed_cast!(array, index, StringViewArray, Utf8View)?,
2848            DataType::List(field) => {
2849                let list_array = array.as_list::<i32>();
2850                let nested_array = list_array.value(index);
2851                // Produces a single element `ListArray` with the value at `index`.
2852                SingleRowListArrayBuilder::new(nested_array)
2853                    .with_field(field)
2854                    .build_list_scalar()
2855            }
2856            DataType::LargeList(field) => {
2857                let list_array = as_large_list_array(array);
2858                let nested_array = list_array.value(index);
2859                // Produces a single element `LargeListArray` with the value at `index`.
2860                SingleRowListArrayBuilder::new(nested_array)
2861                    .with_field(field)
2862                    .build_large_list_scalar()
2863            }
2864            // TODO: There is no test for FixedSizeList now, add it later
2865            DataType::FixedSizeList(field, _) => {
2866                let list_array = as_fixed_size_list_array(array)?;
2867                let nested_array = list_array.value(index);
2868                // Produces a single element `FixedSizeListArray` with the value at `index`.
2869                let list_size = nested_array.len();
2870                SingleRowListArrayBuilder::new(nested_array)
2871                    .with_field(field)
2872                    .build_fixed_size_list_scalar(list_size)
2873            }
2874            DataType::Date32 => typed_cast!(array, index, Date32Array, Date32)?,
2875            DataType::Date64 => typed_cast!(array, index, Date64Array, Date64)?,
2876            DataType::Time32(TimeUnit::Second) => {
2877                typed_cast!(array, index, Time32SecondArray, Time32Second)?
2878            }
2879            DataType::Time32(TimeUnit::Millisecond) => {
2880                typed_cast!(array, index, Time32MillisecondArray, Time32Millisecond)?
2881            }
2882            DataType::Time64(TimeUnit::Microsecond) => {
2883                typed_cast!(array, index, Time64MicrosecondArray, Time64Microsecond)?
2884            }
2885            DataType::Time64(TimeUnit::Nanosecond) => {
2886                typed_cast!(array, index, Time64NanosecondArray, Time64Nanosecond)?
2887            }
2888            DataType::Timestamp(TimeUnit::Second, tz_opt) => typed_cast_tz!(
2889                array,
2890                index,
2891                TimestampSecondArray,
2892                TimestampSecond,
2893                tz_opt
2894            )?,
2895            DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_cast_tz!(
2896                array,
2897                index,
2898                TimestampMillisecondArray,
2899                TimestampMillisecond,
2900                tz_opt
2901            )?,
2902            DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_cast_tz!(
2903                array,
2904                index,
2905                TimestampMicrosecondArray,
2906                TimestampMicrosecond,
2907                tz_opt
2908            )?,
2909            DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_cast_tz!(
2910                array,
2911                index,
2912                TimestampNanosecondArray,
2913                TimestampNanosecond,
2914                tz_opt
2915            )?,
2916            DataType::Dictionary(key_type, _) => {
2917                let (values_array, values_index) = match key_type.as_ref() {
2918                    DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
2919                    DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
2920                    DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
2921                    DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
2922                    DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
2923                    DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
2924                    DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
2925                    DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
2926                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2927                };
2928                // look up the index in the values dictionary
2929                let value = match values_index {
2930                    Some(values_index) => {
2931                        ScalarValue::try_from_array(values_array, values_index)
2932                    }
2933                    // else entry was null, so return null
2934                    None => values_array.data_type().try_into(),
2935                }?;
2936
2937                Self::Dictionary(key_type.clone(), Box::new(value))
2938            }
2939            DataType::Struct(_) => {
2940                let a = array.slice(index, 1);
2941                Self::Struct(Arc::new(a.as_struct().to_owned()))
2942            }
2943            DataType::FixedSizeBinary(_) => {
2944                let array = as_fixed_size_binary_array(array)?;
2945                let size = match array.data_type() {
2946                    DataType::FixedSizeBinary(size) => *size,
2947                    _ => unreachable!(),
2948                };
2949                ScalarValue::FixedSizeBinary(
2950                    size,
2951                    match array.is_null(index) {
2952                        true => None,
2953                        false => Some(array.value(index).into()),
2954                    },
2955                )
2956            }
2957            DataType::Interval(IntervalUnit::DayTime) => {
2958                typed_cast!(array, index, IntervalDayTimeArray, IntervalDayTime)?
2959            }
2960            DataType::Interval(IntervalUnit::YearMonth) => {
2961                typed_cast!(array, index, IntervalYearMonthArray, IntervalYearMonth)?
2962            }
2963            DataType::Interval(IntervalUnit::MonthDayNano) => typed_cast!(
2964                array,
2965                index,
2966                IntervalMonthDayNanoArray,
2967                IntervalMonthDayNano
2968            )?,
2969
2970            DataType::Duration(TimeUnit::Second) => {
2971                typed_cast!(array, index, DurationSecondArray, DurationSecond)?
2972            }
2973            DataType::Duration(TimeUnit::Millisecond) => {
2974                typed_cast!(array, index, DurationMillisecondArray, DurationMillisecond)?
2975            }
2976            DataType::Duration(TimeUnit::Microsecond) => {
2977                typed_cast!(array, index, DurationMicrosecondArray, DurationMicrosecond)?
2978            }
2979            DataType::Duration(TimeUnit::Nanosecond) => {
2980                typed_cast!(array, index, DurationNanosecondArray, DurationNanosecond)?
2981            }
2982            DataType::Map(_, _) => {
2983                let a = array.slice(index, 1);
2984                Self::Map(Arc::new(a.as_map().to_owned()))
2985            }
2986            DataType::Union(fields, mode) => {
2987                let array = as_union_array(array);
2988                let ti = array.type_id(index);
2989                let index = array.value_offset(index);
2990                let value = ScalarValue::try_from_array(array.child(ti), index)?;
2991                ScalarValue::Union(Some((ti, Box::new(value))), fields.clone(), *mode)
2992            }
2993            other => {
2994                return _not_impl_err!(
2995                    "Can't create a scalar from array of type \"{other:?}\""
2996                );
2997            }
2998        })
2999    }
3000
3001    /// Try to parse `value` into a ScalarValue of type `target_type`
3002    pub fn try_from_string(value: String, target_type: &DataType) -> Result<Self> {
3003        ScalarValue::from(value).cast_to(target_type)
3004    }
3005
3006    /// Returns the Some(`&str`) representation of `ScalarValue` of logical string type
3007    ///
3008    /// Returns `None` if this `ScalarValue` is not a logical string type or the
3009    /// `ScalarValue` represents the `NULL` value.
3010    ///
3011    /// Note you can use [`Option::flatten`] to check for non null logical
3012    /// strings.
3013    ///
3014    /// For example, [`ScalarValue::Utf8`], [`ScalarValue::LargeUtf8`], and
3015    /// [`ScalarValue::Dictionary`] with a logical string value and store
3016    /// strings and can be accessed as `&str` using this method.
3017    ///
3018    /// # Example: logical strings
3019    /// ```
3020    /// # use datafusion_common::ScalarValue;
3021    /// /// non strings return None
3022    /// let scalar = ScalarValue::from(42);
3023    /// assert_eq!(scalar.try_as_str(), None);
3024    /// // Non null logical string returns Some(Some(&str))
3025    /// let scalar = ScalarValue::from("hello");
3026    /// assert_eq!(scalar.try_as_str(), Some(Some("hello")));
3027    /// // Null logical string returns Some(None)
3028    /// let scalar = ScalarValue::Utf8(None);
3029    /// assert_eq!(scalar.try_as_str(), Some(None));
3030    /// ```
3031    ///
3032    /// # Example: use [`Option::flatten`] to check for non-null logical strings
3033    /// ```
3034    /// # use datafusion_common::ScalarValue;
3035    /// // Non null logical string returns Some(Some(&str))
3036    /// let scalar = ScalarValue::from("hello");
3037    /// assert_eq!(scalar.try_as_str().flatten(), Some("hello"));
3038    /// ```
3039    pub fn try_as_str(&self) -> Option<Option<&str>> {
3040        let v = match self {
3041            ScalarValue::Utf8(v) => v,
3042            ScalarValue::LargeUtf8(v) => v,
3043            ScalarValue::Utf8View(v) => v,
3044            ScalarValue::Dictionary(_, v) => return v.try_as_str(),
3045            _ => return None,
3046        };
3047        Some(v.as_ref().map(|v| v.as_str()))
3048    }
3049
3050    /// Try to cast this value to a ScalarValue of type `data_type`
3051    pub fn cast_to(&self, target_type: &DataType) -> Result<Self> {
3052        self.cast_to_with_options(target_type, &DEFAULT_CAST_OPTIONS)
3053    }
3054
3055    /// Try to cast this value to a ScalarValue of type `data_type` with [`CastOptions`]
3056    pub fn cast_to_with_options(
3057        &self,
3058        target_type: &DataType,
3059        cast_options: &CastOptions<'static>,
3060    ) -> Result<Self> {
3061        let scalar_array = match (self, target_type) {
3062            (
3063                ScalarValue::Float64(Some(float_ts)),
3064                DataType::Timestamp(TimeUnit::Nanosecond, None),
3065            ) => ScalarValue::Int64(Some((float_ts * 1_000_000_000_f64).trunc() as i64))
3066                .to_array()?,
3067            (
3068                ScalarValue::Decimal128(Some(decimal_value), _, scale),
3069                DataType::Timestamp(time_unit, None),
3070            ) => {
3071                let scale_factor = 10_i128.pow(*scale as u32);
3072                let seconds = decimal_value / scale_factor;
3073                let fraction = decimal_value % scale_factor;
3074
3075                let timestamp_value = match time_unit {
3076                    TimeUnit::Second => ScalarValue::Int64(Some(seconds as i64)),
3077                    TimeUnit::Millisecond => {
3078                        let millis = seconds * 1_000 + (fraction * 1_000) / scale_factor;
3079                        ScalarValue::Int64(Some(millis as i64))
3080                    }
3081                    TimeUnit::Microsecond => {
3082                        let micros =
3083                            seconds * 1_000_000 + (fraction * 1_000_000) / scale_factor;
3084                        ScalarValue::Int64(Some(micros as i64))
3085                    }
3086                    TimeUnit::Nanosecond => {
3087                        let nanos = seconds * 1_000_000_000
3088                            + (fraction * 1_000_000_000) / scale_factor;
3089                        ScalarValue::Int64(Some(nanos as i64))
3090                    }
3091                };
3092
3093                timestamp_value.to_array()?
3094            }
3095            _ => self.to_array()?,
3096        };
3097
3098        let cast_arr = cast_with_options(&scalar_array, target_type, cast_options)?;
3099        ScalarValue::try_from_array(&cast_arr, 0)
3100    }
3101
3102    fn eq_array_decimal(
3103        array: &ArrayRef,
3104        index: usize,
3105        value: Option<&i128>,
3106        precision: u8,
3107        scale: i8,
3108    ) -> Result<bool> {
3109        let array = as_decimal128_array(array)?;
3110        if array.precision() != precision || array.scale() != scale {
3111            return Ok(false);
3112        }
3113        let is_null = array.is_null(index);
3114        if let Some(v) = value {
3115            Ok(!array.is_null(index) && array.value(index) == *v)
3116        } else {
3117            Ok(is_null)
3118        }
3119    }
3120
3121    fn eq_array_decimal256(
3122        array: &ArrayRef,
3123        index: usize,
3124        value: Option<&i256>,
3125        precision: u8,
3126        scale: i8,
3127    ) -> Result<bool> {
3128        let array = as_decimal256_array(array)?;
3129        if array.precision() != precision || array.scale() != scale {
3130            return Ok(false);
3131        }
3132        let is_null = array.is_null(index);
3133        if let Some(v) = value {
3134            Ok(!array.is_null(index) && array.value(index) == *v)
3135        } else {
3136            Ok(is_null)
3137        }
3138    }
3139
3140    /// Compares a single row of array @ index for equality with self,
3141    /// in an optimized fashion.
3142    ///
3143    /// This method implements an optimized version of:
3144    ///
3145    /// ```text
3146    ///     let arr_scalar = Self::try_from_array(array, index).unwrap();
3147    ///     arr_scalar.eq(self)
3148    /// ```
3149    ///
3150    /// *Performance note*: the arrow compute kernels should be
3151    /// preferred over this function if at all possible as they can be
3152    /// vectorized and are generally much faster.
3153    ///
3154    /// This function has a few narrow use cases such as hash table key
3155    /// comparisons where comparing a single row at a time is necessary.
3156    ///
3157    /// # Errors
3158    ///
3159    /// Errors if
3160    /// - it fails to downcast `array` to the data type of `self`
3161    /// - `self` is a `Struct`
3162    ///
3163    /// # Panics
3164    ///
3165    /// Panics if `self` is a dictionary with invalid key type
3166    #[inline]
3167    pub fn eq_array(&self, array: &ArrayRef, index: usize) -> Result<bool> {
3168        Ok(match self {
3169            ScalarValue::Decimal128(v, precision, scale) => {
3170                ScalarValue::eq_array_decimal(
3171                    array,
3172                    index,
3173                    v.as_ref(),
3174                    *precision,
3175                    *scale,
3176                )?
3177            }
3178            ScalarValue::Decimal256(v, precision, scale) => {
3179                ScalarValue::eq_array_decimal256(
3180                    array,
3181                    index,
3182                    v.as_ref(),
3183                    *precision,
3184                    *scale,
3185                )?
3186            }
3187            ScalarValue::Boolean(val) => {
3188                eq_array_primitive!(array, index, BooleanArray, val)?
3189            }
3190            ScalarValue::Float16(val) => {
3191                eq_array_primitive!(array, index, Float16Array, val)?
3192            }
3193            ScalarValue::Float32(val) => {
3194                eq_array_primitive!(array, index, Float32Array, val)?
3195            }
3196            ScalarValue::Float64(val) => {
3197                eq_array_primitive!(array, index, Float64Array, val)?
3198            }
3199            ScalarValue::Int8(val) => eq_array_primitive!(array, index, Int8Array, val)?,
3200            ScalarValue::Int16(val) => {
3201                eq_array_primitive!(array, index, Int16Array, val)?
3202            }
3203            ScalarValue::Int32(val) => {
3204                eq_array_primitive!(array, index, Int32Array, val)?
3205            }
3206            ScalarValue::Int64(val) => {
3207                eq_array_primitive!(array, index, Int64Array, val)?
3208            }
3209            ScalarValue::UInt8(val) => {
3210                eq_array_primitive!(array, index, UInt8Array, val)?
3211            }
3212            ScalarValue::UInt16(val) => {
3213                eq_array_primitive!(array, index, UInt16Array, val)?
3214            }
3215            ScalarValue::UInt32(val) => {
3216                eq_array_primitive!(array, index, UInt32Array, val)?
3217            }
3218            ScalarValue::UInt64(val) => {
3219                eq_array_primitive!(array, index, UInt64Array, val)?
3220            }
3221            ScalarValue::Utf8(val) => {
3222                eq_array_primitive!(array, index, StringArray, val)?
3223            }
3224            ScalarValue::Utf8View(val) => {
3225                eq_array_primitive!(array, index, StringViewArray, val)?
3226            }
3227            ScalarValue::LargeUtf8(val) => {
3228                eq_array_primitive!(array, index, LargeStringArray, val)?
3229            }
3230            ScalarValue::Binary(val) => {
3231                eq_array_primitive!(array, index, BinaryArray, val)?
3232            }
3233            ScalarValue::BinaryView(val) => {
3234                eq_array_primitive!(array, index, BinaryViewArray, val)?
3235            }
3236            ScalarValue::FixedSizeBinary(_, val) => {
3237                eq_array_primitive!(array, index, FixedSizeBinaryArray, val)?
3238            }
3239            ScalarValue::LargeBinary(val) => {
3240                eq_array_primitive!(array, index, LargeBinaryArray, val)?
3241            }
3242            ScalarValue::List(arr) => {
3243                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3244            }
3245            ScalarValue::LargeList(arr) => {
3246                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3247            }
3248            ScalarValue::FixedSizeList(arr) => {
3249                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3250            }
3251            ScalarValue::Struct(arr) => {
3252                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3253            }
3254            ScalarValue::Map(arr) => {
3255                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3256            }
3257            ScalarValue::Date32(val) => {
3258                eq_array_primitive!(array, index, Date32Array, val)?
3259            }
3260            ScalarValue::Date64(val) => {
3261                eq_array_primitive!(array, index, Date64Array, val)?
3262            }
3263            ScalarValue::Time32Second(val) => {
3264                eq_array_primitive!(array, index, Time32SecondArray, val)?
3265            }
3266            ScalarValue::Time32Millisecond(val) => {
3267                eq_array_primitive!(array, index, Time32MillisecondArray, val)?
3268            }
3269            ScalarValue::Time64Microsecond(val) => {
3270                eq_array_primitive!(array, index, Time64MicrosecondArray, val)?
3271            }
3272            ScalarValue::Time64Nanosecond(val) => {
3273                eq_array_primitive!(array, index, Time64NanosecondArray, val)?
3274            }
3275            ScalarValue::TimestampSecond(val, _) => {
3276                eq_array_primitive!(array, index, TimestampSecondArray, val)?
3277            }
3278            ScalarValue::TimestampMillisecond(val, _) => {
3279                eq_array_primitive!(array, index, TimestampMillisecondArray, val)?
3280            }
3281            ScalarValue::TimestampMicrosecond(val, _) => {
3282                eq_array_primitive!(array, index, TimestampMicrosecondArray, val)?
3283            }
3284            ScalarValue::TimestampNanosecond(val, _) => {
3285                eq_array_primitive!(array, index, TimestampNanosecondArray, val)?
3286            }
3287            ScalarValue::IntervalYearMonth(val) => {
3288                eq_array_primitive!(array, index, IntervalYearMonthArray, val)?
3289            }
3290            ScalarValue::IntervalDayTime(val) => {
3291                eq_array_primitive!(array, index, IntervalDayTimeArray, val)?
3292            }
3293            ScalarValue::IntervalMonthDayNano(val) => {
3294                eq_array_primitive!(array, index, IntervalMonthDayNanoArray, val)?
3295            }
3296            ScalarValue::DurationSecond(val) => {
3297                eq_array_primitive!(array, index, DurationSecondArray, val)?
3298            }
3299            ScalarValue::DurationMillisecond(val) => {
3300                eq_array_primitive!(array, index, DurationMillisecondArray, val)?
3301            }
3302            ScalarValue::DurationMicrosecond(val) => {
3303                eq_array_primitive!(array, index, DurationMicrosecondArray, val)?
3304            }
3305            ScalarValue::DurationNanosecond(val) => {
3306                eq_array_primitive!(array, index, DurationNanosecondArray, val)?
3307            }
3308            ScalarValue::Union(value, _, _) => {
3309                let array = as_union_array(array);
3310                let ti = array.type_id(index);
3311                let index = array.value_offset(index);
3312                if let Some((ti_v, value)) = value {
3313                    ti_v == &ti && value.eq_array(array.child(ti), index)?
3314                } else {
3315                    array.child(ti).is_null(index)
3316                }
3317            }
3318            ScalarValue::Dictionary(key_type, v) => {
3319                let (values_array, values_index) = match key_type.as_ref() {
3320                    DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
3321                    DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
3322                    DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
3323                    DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
3324                    DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
3325                    DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
3326                    DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
3327                    DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
3328                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
3329                };
3330                // was the value in the array non null?
3331                match values_index {
3332                    Some(values_index) => v.eq_array(values_array, values_index)?,
3333                    None => v.is_null(),
3334                }
3335            }
3336            ScalarValue::Null => array.is_null(index),
3337        })
3338    }
3339
3340    fn eq_array_list(arr1: &ArrayRef, arr2: &ArrayRef, index: usize) -> bool {
3341        let right = arr2.slice(index, 1);
3342        arr1 == &right
3343    }
3344
3345    /// Estimate size if bytes including `Self`. For values with internal containers such as `String`
3346    /// includes the allocated size (`capacity`) rather than the current length (`len`)
3347    pub fn size(&self) -> usize {
3348        size_of_val(self)
3349            + match self {
3350                ScalarValue::Null
3351                | ScalarValue::Boolean(_)
3352                | ScalarValue::Float16(_)
3353                | ScalarValue::Float32(_)
3354                | ScalarValue::Float64(_)
3355                | ScalarValue::Decimal128(_, _, _)
3356                | ScalarValue::Decimal256(_, _, _)
3357                | ScalarValue::Int8(_)
3358                | ScalarValue::Int16(_)
3359                | ScalarValue::Int32(_)
3360                | ScalarValue::Int64(_)
3361                | ScalarValue::UInt8(_)
3362                | ScalarValue::UInt16(_)
3363                | ScalarValue::UInt32(_)
3364                | ScalarValue::UInt64(_)
3365                | ScalarValue::Date32(_)
3366                | ScalarValue::Date64(_)
3367                | ScalarValue::Time32Second(_)
3368                | ScalarValue::Time32Millisecond(_)
3369                | ScalarValue::Time64Microsecond(_)
3370                | ScalarValue::Time64Nanosecond(_)
3371                | ScalarValue::IntervalYearMonth(_)
3372                | ScalarValue::IntervalDayTime(_)
3373                | ScalarValue::IntervalMonthDayNano(_)
3374                | ScalarValue::DurationSecond(_)
3375                | ScalarValue::DurationMillisecond(_)
3376                | ScalarValue::DurationMicrosecond(_)
3377                | ScalarValue::DurationNanosecond(_) => 0,
3378                ScalarValue::Utf8(s)
3379                | ScalarValue::LargeUtf8(s)
3380                | ScalarValue::Utf8View(s) => {
3381                    s.as_ref().map(|s| s.capacity()).unwrap_or_default()
3382                }
3383                ScalarValue::TimestampSecond(_, s)
3384                | ScalarValue::TimestampMillisecond(_, s)
3385                | ScalarValue::TimestampMicrosecond(_, s)
3386                | ScalarValue::TimestampNanosecond(_, s) => {
3387                    s.as_ref().map(|s| s.len()).unwrap_or_default()
3388                }
3389                ScalarValue::Binary(b)
3390                | ScalarValue::FixedSizeBinary(_, b)
3391                | ScalarValue::LargeBinary(b)
3392                | ScalarValue::BinaryView(b) => {
3393                    b.as_ref().map(|b| b.capacity()).unwrap_or_default()
3394                }
3395                ScalarValue::List(arr) => arr.get_array_memory_size(),
3396                ScalarValue::LargeList(arr) => arr.get_array_memory_size(),
3397                ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(),
3398                ScalarValue::Struct(arr) => arr.get_array_memory_size(),
3399                ScalarValue::Map(arr) => arr.get_array_memory_size(),
3400                ScalarValue::Union(vals, fields, _mode) => {
3401                    vals.as_ref()
3402                        .map(|(_id, sv)| sv.size() - size_of_val(sv))
3403                        .unwrap_or_default()
3404                        // `fields` is boxed, so it is NOT already included in `self`
3405                        + size_of_val(fields)
3406                        + (size_of::<Field>() * fields.len())
3407                        + fields.iter().map(|(_idx, field)| field.size() - size_of_val(field)).sum::<usize>()
3408                }
3409                ScalarValue::Dictionary(dt, sv) => {
3410                    // `dt` and `sv` are boxed, so they are NOT already included in `self`
3411                    dt.size() + sv.size()
3412                }
3413            }
3414    }
3415
3416    /// Estimates [size](Self::size) of [`Vec`] in bytes.
3417    ///
3418    /// Includes the size of the [`Vec`] container itself.
3419    pub fn size_of_vec(vec: &Vec<Self>) -> usize {
3420        size_of_val(vec)
3421            + (size_of::<ScalarValue>() * vec.capacity())
3422            + vec
3423                .iter()
3424                .map(|sv| sv.size() - size_of_val(sv))
3425                .sum::<usize>()
3426    }
3427
3428    /// Estimates [size](Self::size) of [`VecDeque`] in bytes.
3429    ///
3430    /// Includes the size of the [`VecDeque`] container itself.
3431    pub fn size_of_vec_deque(vec_deque: &VecDeque<Self>) -> usize {
3432        size_of_val(vec_deque)
3433            + (size_of::<ScalarValue>() * vec_deque.capacity())
3434            + vec_deque
3435                .iter()
3436                .map(|sv| sv.size() - size_of_val(sv))
3437                .sum::<usize>()
3438    }
3439
3440    /// Estimates [size](Self::size) of [`HashSet`] in bytes.
3441    ///
3442    /// Includes the size of the [`HashSet`] container itself.
3443    pub fn size_of_hashset<S>(set: &HashSet<Self, S>) -> usize {
3444        size_of_val(set)
3445            + (size_of::<ScalarValue>() * set.capacity())
3446            + set
3447                .iter()
3448                .map(|sv| sv.size() - size_of_val(sv))
3449                .sum::<usize>()
3450    }
3451
3452    /// Compacts the allocation referenced by `self` to the minimum, copying the data if
3453    /// necessary.
3454    ///
3455    /// This can be relevant when `self` is a list or contains a list as a nested value, as
3456    /// a single list holds an Arc to its entire original array buffer.
3457    pub fn compact(&mut self) {
3458        match self {
3459            ScalarValue::Null
3460            | ScalarValue::Boolean(_)
3461            | ScalarValue::Float16(_)
3462            | ScalarValue::Float32(_)
3463            | ScalarValue::Float64(_)
3464            | ScalarValue::Decimal128(_, _, _)
3465            | ScalarValue::Decimal256(_, _, _)
3466            | ScalarValue::Int8(_)
3467            | ScalarValue::Int16(_)
3468            | ScalarValue::Int32(_)
3469            | ScalarValue::Int64(_)
3470            | ScalarValue::UInt8(_)
3471            | ScalarValue::UInt16(_)
3472            | ScalarValue::UInt32(_)
3473            | ScalarValue::UInt64(_)
3474            | ScalarValue::Date32(_)
3475            | ScalarValue::Date64(_)
3476            | ScalarValue::Time32Second(_)
3477            | ScalarValue::Time32Millisecond(_)
3478            | ScalarValue::Time64Microsecond(_)
3479            | ScalarValue::Time64Nanosecond(_)
3480            | ScalarValue::IntervalYearMonth(_)
3481            | ScalarValue::IntervalDayTime(_)
3482            | ScalarValue::IntervalMonthDayNano(_)
3483            | ScalarValue::DurationSecond(_)
3484            | ScalarValue::DurationMillisecond(_)
3485            | ScalarValue::DurationMicrosecond(_)
3486            | ScalarValue::DurationNanosecond(_)
3487            | ScalarValue::Utf8(_)
3488            | ScalarValue::LargeUtf8(_)
3489            | ScalarValue::Utf8View(_)
3490            | ScalarValue::TimestampSecond(_, _)
3491            | ScalarValue::TimestampMillisecond(_, _)
3492            | ScalarValue::TimestampMicrosecond(_, _)
3493            | ScalarValue::TimestampNanosecond(_, _)
3494            | ScalarValue::Binary(_)
3495            | ScalarValue::FixedSizeBinary(_, _)
3496            | ScalarValue::LargeBinary(_)
3497            | ScalarValue::BinaryView(_) => (),
3498            ScalarValue::FixedSizeList(arr) => {
3499                let array = copy_array_data(&arr.to_data());
3500                *Arc::make_mut(arr) = FixedSizeListArray::from(array);
3501            }
3502            ScalarValue::List(arr) => {
3503                let array = copy_array_data(&arr.to_data());
3504                *Arc::make_mut(arr) = ListArray::from(array);
3505            }
3506            ScalarValue::LargeList(arr) => {
3507                let array = copy_array_data(&arr.to_data());
3508                *Arc::make_mut(arr) = LargeListArray::from(array)
3509            }
3510            ScalarValue::Struct(arr) => {
3511                let array = copy_array_data(&arr.to_data());
3512                *Arc::make_mut(arr) = StructArray::from(array);
3513            }
3514            ScalarValue::Map(arr) => {
3515                let array = copy_array_data(&arr.to_data());
3516                *Arc::make_mut(arr) = MapArray::from(array);
3517            }
3518            ScalarValue::Union(val, _, _) => {
3519                if let Some((_, value)) = val.as_mut() {
3520                    value.compact();
3521                }
3522            }
3523            ScalarValue::Dictionary(_, value) => {
3524                value.compact();
3525            }
3526        }
3527    }
3528}
3529
3530pub fn copy_array_data(data: &ArrayData) -> ArrayData {
3531    let mut copy = MutableArrayData::new(vec![&data], true, data.len());
3532    copy.extend(0, 0, data.len());
3533    copy.freeze()
3534}
3535
3536macro_rules! impl_scalar {
3537    ($ty:ty, $scalar:tt) => {
3538        impl From<$ty> for ScalarValue {
3539            fn from(value: $ty) -> Self {
3540                ScalarValue::$scalar(Some(value))
3541            }
3542        }
3543
3544        impl From<Option<$ty>> for ScalarValue {
3545            fn from(value: Option<$ty>) -> Self {
3546                ScalarValue::$scalar(value)
3547            }
3548        }
3549    };
3550}
3551
3552impl_scalar!(f64, Float64);
3553impl_scalar!(f32, Float32);
3554impl_scalar!(i8, Int8);
3555impl_scalar!(i16, Int16);
3556impl_scalar!(i32, Int32);
3557impl_scalar!(i64, Int64);
3558impl_scalar!(bool, Boolean);
3559impl_scalar!(u8, UInt8);
3560impl_scalar!(u16, UInt16);
3561impl_scalar!(u32, UInt32);
3562impl_scalar!(u64, UInt64);
3563
3564impl From<&str> for ScalarValue {
3565    fn from(value: &str) -> Self {
3566        Some(value).into()
3567    }
3568}
3569
3570impl From<Option<&str>> for ScalarValue {
3571    fn from(value: Option<&str>) -> Self {
3572        let value = value.map(|s| s.to_string());
3573        ScalarValue::Utf8(value)
3574    }
3575}
3576
3577/// Wrapper to create ScalarValue::Struct for convenience
3578impl From<Vec<(&str, ScalarValue)>> for ScalarValue {
3579    fn from(value: Vec<(&str, ScalarValue)>) -> Self {
3580        value
3581            .into_iter()
3582            .fold(ScalarStructBuilder::new(), |builder, (name, value)| {
3583                builder.with_name_and_scalar(name, value)
3584            })
3585            .build()
3586            .unwrap()
3587    }
3588}
3589
3590impl FromStr for ScalarValue {
3591    type Err = Infallible;
3592
3593    fn from_str(s: &str) -> Result<Self, Self::Err> {
3594        Ok(s.into())
3595    }
3596}
3597
3598impl From<String> for ScalarValue {
3599    fn from(value: String) -> Self {
3600        ScalarValue::Utf8(Some(value))
3601    }
3602}
3603
3604macro_rules! impl_try_from {
3605    ($SCALAR:ident, $NATIVE:ident) => {
3606        impl TryFrom<ScalarValue> for $NATIVE {
3607            type Error = DataFusionError;
3608
3609            fn try_from(value: ScalarValue) -> Result<Self> {
3610                match value {
3611                    ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value),
3612                    _ => _internal_err!(
3613                        "Cannot convert {:?} to {}",
3614                        value,
3615                        std::any::type_name::<Self>()
3616                    ),
3617                }
3618            }
3619        }
3620    };
3621}
3622
3623impl_try_from!(Int8, i8);
3624impl_try_from!(Int16, i16);
3625
3626// special implementation for i32 because of Date32 and Time32
3627impl TryFrom<ScalarValue> for i32 {
3628    type Error = DataFusionError;
3629
3630    fn try_from(value: ScalarValue) -> Result<Self> {
3631        match value {
3632            ScalarValue::Int32(Some(inner_value))
3633            | ScalarValue::Date32(Some(inner_value))
3634            | ScalarValue::Time32Second(Some(inner_value))
3635            | ScalarValue::Time32Millisecond(Some(inner_value)) => Ok(inner_value),
3636            _ => _internal_err!(
3637                "Cannot convert {:?} to {}",
3638                value,
3639                std::any::type_name::<Self>()
3640            ),
3641        }
3642    }
3643}
3644
3645// special implementation for i64 because of Date64, Time64 and Timestamp
3646impl TryFrom<ScalarValue> for i64 {
3647    type Error = DataFusionError;
3648
3649    fn try_from(value: ScalarValue) -> Result<Self> {
3650        match value {
3651            ScalarValue::Int64(Some(inner_value))
3652            | ScalarValue::Date64(Some(inner_value))
3653            | ScalarValue::Time64Microsecond(Some(inner_value))
3654            | ScalarValue::Time64Nanosecond(Some(inner_value))
3655            | ScalarValue::TimestampNanosecond(Some(inner_value), _)
3656            | ScalarValue::TimestampMicrosecond(Some(inner_value), _)
3657            | ScalarValue::TimestampMillisecond(Some(inner_value), _)
3658            | ScalarValue::TimestampSecond(Some(inner_value), _) => Ok(inner_value),
3659            _ => _internal_err!(
3660                "Cannot convert {:?} to {}",
3661                value,
3662                std::any::type_name::<Self>()
3663            ),
3664        }
3665    }
3666}
3667
3668// special implementation for i128 because of Decimal128
3669impl TryFrom<ScalarValue> for i128 {
3670    type Error = DataFusionError;
3671
3672    fn try_from(value: ScalarValue) -> Result<Self> {
3673        match value {
3674            ScalarValue::Decimal128(Some(inner_value), _, _) => Ok(inner_value),
3675            _ => _internal_err!(
3676                "Cannot convert {:?} to {}",
3677                value,
3678                std::any::type_name::<Self>()
3679            ),
3680        }
3681    }
3682}
3683
3684// special implementation for i256 because of Decimal128
3685impl TryFrom<ScalarValue> for i256 {
3686    type Error = DataFusionError;
3687
3688    fn try_from(value: ScalarValue) -> Result<Self> {
3689        match value {
3690            ScalarValue::Decimal256(Some(inner_value), _, _) => Ok(inner_value),
3691            _ => _internal_err!(
3692                "Cannot convert {:?} to {}",
3693                value,
3694                std::any::type_name::<Self>()
3695            ),
3696        }
3697    }
3698}
3699
3700impl_try_from!(UInt8, u8);
3701impl_try_from!(UInt16, u16);
3702impl_try_from!(UInt32, u32);
3703impl_try_from!(UInt64, u64);
3704impl_try_from!(Float32, f32);
3705impl_try_from!(Float64, f64);
3706impl_try_from!(Boolean, bool);
3707
3708impl TryFrom<DataType> for ScalarValue {
3709    type Error = DataFusionError;
3710
3711    /// Create a Null instance of ScalarValue for this datatype
3712    fn try_from(datatype: DataType) -> Result<Self> {
3713        (&datatype).try_into()
3714    }
3715}
3716
3717impl TryFrom<&DataType> for ScalarValue {
3718    type Error = DataFusionError;
3719
3720    /// Create a Null instance of ScalarValue for this datatype
3721    fn try_from(data_type: &DataType) -> Result<Self> {
3722        Self::try_new_null(data_type)
3723    }
3724}
3725
3726macro_rules! format_option {
3727    ($F:expr, $EXPR:expr) => {{
3728        match $EXPR {
3729            Some(e) => write!($F, "{e}"),
3730            None => write!($F, "NULL"),
3731        }
3732    }};
3733}
3734
3735// Implement Display trait for ScalarValue
3736//
3737// # Panics
3738//
3739// Panics if there is an error when creating a visual representation of columns via `arrow::util::pretty`
3740impl fmt::Display for ScalarValue {
3741    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
3742        match self {
3743            ScalarValue::Decimal128(v, p, s) => {
3744                write!(f, "{v:?},{p:?},{s:?}")?;
3745            }
3746            ScalarValue::Decimal256(v, p, s) => {
3747                write!(f, "{v:?},{p:?},{s:?}")?;
3748            }
3749            ScalarValue::Boolean(e) => format_option!(f, e)?,
3750            ScalarValue::Float16(e) => format_option!(f, e)?,
3751            ScalarValue::Float32(e) => format_option!(f, e)?,
3752            ScalarValue::Float64(e) => format_option!(f, e)?,
3753            ScalarValue::Int8(e) => format_option!(f, e)?,
3754            ScalarValue::Int16(e) => format_option!(f, e)?,
3755            ScalarValue::Int32(e) => format_option!(f, e)?,
3756            ScalarValue::Int64(e) => format_option!(f, e)?,
3757            ScalarValue::UInt8(e) => format_option!(f, e)?,
3758            ScalarValue::UInt16(e) => format_option!(f, e)?,
3759            ScalarValue::UInt32(e) => format_option!(f, e)?,
3760            ScalarValue::UInt64(e) => format_option!(f, e)?,
3761            ScalarValue::TimestampSecond(e, _) => format_option!(f, e)?,
3762            ScalarValue::TimestampMillisecond(e, _) => format_option!(f, e)?,
3763            ScalarValue::TimestampMicrosecond(e, _) => format_option!(f, e)?,
3764            ScalarValue::TimestampNanosecond(e, _) => format_option!(f, e)?,
3765            ScalarValue::Utf8(e)
3766            | ScalarValue::LargeUtf8(e)
3767            | ScalarValue::Utf8View(e) => format_option!(f, e)?,
3768            ScalarValue::Binary(e)
3769            | ScalarValue::FixedSizeBinary(_, e)
3770            | ScalarValue::LargeBinary(e)
3771            | ScalarValue::BinaryView(e) => match e {
3772                Some(bytes) => {
3773                    // print up to first 10 bytes, with trailing ... if needed
3774                    for b in bytes.iter().take(10) {
3775                        write!(f, "{b:02X}")?;
3776                    }
3777                    if bytes.len() > 10 {
3778                        write!(f, "...")?;
3779                    }
3780                }
3781                None => write!(f, "NULL")?,
3782            },
3783            ScalarValue::List(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3784            ScalarValue::LargeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3785            ScalarValue::FixedSizeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3786            ScalarValue::Date32(e) => {
3787                format_option!(f, e.map(|v| Date32Type::to_naive_date(v).to_string()))?
3788            }
3789            ScalarValue::Date64(e) => {
3790                format_option!(f, e.map(|v| Date64Type::to_naive_date(v).to_string()))?
3791            }
3792            ScalarValue::Time32Second(e) => format_option!(f, e)?,
3793            ScalarValue::Time32Millisecond(e) => format_option!(f, e)?,
3794            ScalarValue::Time64Microsecond(e) => format_option!(f, e)?,
3795            ScalarValue::Time64Nanosecond(e) => format_option!(f, e)?,
3796            ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
3797            ScalarValue::IntervalMonthDayNano(e) => {
3798                format_option!(f, e.map(|v| format!("{v:?}")))?
3799            }
3800            ScalarValue::IntervalDayTime(e) => {
3801                format_option!(f, e.map(|v| format!("{v:?}")))?;
3802            }
3803            ScalarValue::DurationSecond(e) => format_option!(f, e)?,
3804            ScalarValue::DurationMillisecond(e) => format_option!(f, e)?,
3805            ScalarValue::DurationMicrosecond(e) => format_option!(f, e)?,
3806            ScalarValue::DurationNanosecond(e) => format_option!(f, e)?,
3807            ScalarValue::Struct(struct_arr) => {
3808                // ScalarValue Struct should always have a single element
3809                assert_eq!(struct_arr.len(), 1);
3810
3811                if struct_arr.null_count() == struct_arr.len() {
3812                    write!(f, "NULL")?;
3813                    return Ok(());
3814                }
3815
3816                let columns = struct_arr.columns();
3817                let fields = struct_arr.fields();
3818                let nulls = struct_arr.nulls();
3819
3820                write!(
3821                    f,
3822                    "{{{}}}",
3823                    columns
3824                        .iter()
3825                        .zip(fields.iter())
3826                        .map(|(column, field)| {
3827                            if nulls.is_some_and(|b| b.is_null(0)) {
3828                                format!("{}:NULL", field.name())
3829                            } else if let DataType::Struct(_) = field.data_type() {
3830                                let sv = ScalarValue::Struct(Arc::new(
3831                                    column.as_struct().to_owned(),
3832                                ));
3833                                format!("{}:{sv}", field.name())
3834                            } else {
3835                                let sv = array_value_to_string(column, 0).unwrap();
3836                                format!("{}:{sv}", field.name())
3837                            }
3838                        })
3839                        .collect::<Vec<_>>()
3840                        .join(",")
3841                )?
3842            }
3843            ScalarValue::Map(map_arr) => {
3844                if map_arr.null_count() == map_arr.len() {
3845                    write!(f, "NULL")?;
3846                    return Ok(());
3847                }
3848
3849                write!(
3850                    f,
3851                    "[{}]",
3852                    map_arr
3853                        .iter()
3854                        .map(|struct_array| {
3855                            if let Some(arr) = struct_array {
3856                                let mut buffer = VecDeque::new();
3857                                for i in 0..arr.len() {
3858                                    let key =
3859                                        array_value_to_string(arr.column(0), i).unwrap();
3860                                    let value =
3861                                        array_value_to_string(arr.column(1), i).unwrap();
3862                                    buffer.push_back(format!("{key}:{value}"));
3863                                }
3864                                format!(
3865                                    "{{{}}}",
3866                                    buffer
3867                                        .into_iter()
3868                                        .collect::<Vec<_>>()
3869                                        .join(",")
3870                                        .as_str()
3871                                )
3872                            } else {
3873                                "NULL".to_string()
3874                            }
3875                        })
3876                        .collect::<Vec<_>>()
3877                        .join(",")
3878                )?
3879            }
3880            ScalarValue::Union(val, _fields, _mode) => match val {
3881                Some((id, val)) => write!(f, "{id}:{val}")?,
3882                None => write!(f, "NULL")?,
3883            },
3884            ScalarValue::Dictionary(_k, v) => write!(f, "{v}")?,
3885            ScalarValue::Null => write!(f, "NULL")?,
3886        };
3887        Ok(())
3888    }
3889}
3890
3891fn fmt_list(arr: ArrayRef, f: &mut fmt::Formatter) -> fmt::Result {
3892    // ScalarValue List, LargeList, FixedSizeList should always have a single element
3893    assert_eq!(arr.len(), 1);
3894    let options = FormatOptions::default().with_display_error(true);
3895    let formatter =
3896        ArrayFormatter::try_new(arr.as_ref() as &dyn Array, &options).unwrap();
3897    let value_formatter = formatter.value(0);
3898    write!(f, "{value_formatter}")
3899}
3900
3901/// writes a byte array to formatter. `[1, 2, 3]` ==> `"1,2,3"`
3902fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result {
3903    let mut iter = data.iter();
3904    if let Some(b) = iter.next() {
3905        write!(f, "{b}")?;
3906    }
3907    for b in iter {
3908        write!(f, ",{b}")?;
3909    }
3910    Ok(())
3911}
3912
3913impl fmt::Debug for ScalarValue {
3914    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
3915        match self {
3916            ScalarValue::Decimal128(_, _, _) => write!(f, "Decimal128({self})"),
3917            ScalarValue::Decimal256(_, _, _) => write!(f, "Decimal256({self})"),
3918            ScalarValue::Boolean(_) => write!(f, "Boolean({self})"),
3919            ScalarValue::Float16(_) => write!(f, "Float16({self})"),
3920            ScalarValue::Float32(_) => write!(f, "Float32({self})"),
3921            ScalarValue::Float64(_) => write!(f, "Float64({self})"),
3922            ScalarValue::Int8(_) => write!(f, "Int8({self})"),
3923            ScalarValue::Int16(_) => write!(f, "Int16({self})"),
3924            ScalarValue::Int32(_) => write!(f, "Int32({self})"),
3925            ScalarValue::Int64(_) => write!(f, "Int64({self})"),
3926            ScalarValue::UInt8(_) => write!(f, "UInt8({self})"),
3927            ScalarValue::UInt16(_) => write!(f, "UInt16({self})"),
3928            ScalarValue::UInt32(_) => write!(f, "UInt32({self})"),
3929            ScalarValue::UInt64(_) => write!(f, "UInt64({self})"),
3930            ScalarValue::TimestampSecond(_, tz_opt) => {
3931                write!(f, "TimestampSecond({self}, {tz_opt:?})")
3932            }
3933            ScalarValue::TimestampMillisecond(_, tz_opt) => {
3934                write!(f, "TimestampMillisecond({self}, {tz_opt:?})")
3935            }
3936            ScalarValue::TimestampMicrosecond(_, tz_opt) => {
3937                write!(f, "TimestampMicrosecond({self}, {tz_opt:?})")
3938            }
3939            ScalarValue::TimestampNanosecond(_, tz_opt) => {
3940                write!(f, "TimestampNanosecond({self}, {tz_opt:?})")
3941            }
3942            ScalarValue::Utf8(None) => write!(f, "Utf8({self})"),
3943            ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{self}\")"),
3944            ScalarValue::Utf8View(None) => write!(f, "Utf8View({self})"),
3945            ScalarValue::Utf8View(Some(_)) => write!(f, "Utf8View(\"{self}\")"),
3946            ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({self})"),
3947            ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{self}\")"),
3948            ScalarValue::Binary(None) => write!(f, "Binary({self})"),
3949            ScalarValue::Binary(Some(b)) => {
3950                write!(f, "Binary(\"")?;
3951                fmt_binary(b.as_slice(), f)?;
3952                write!(f, "\")")
3953            }
3954            ScalarValue::BinaryView(None) => write!(f, "BinaryView({self})"),
3955            ScalarValue::BinaryView(Some(b)) => {
3956                write!(f, "BinaryView(\"")?;
3957                fmt_binary(b.as_slice(), f)?;
3958                write!(f, "\")")
3959            }
3960            ScalarValue::FixedSizeBinary(size, None) => {
3961                write!(f, "FixedSizeBinary({size}, {self})")
3962            }
3963            ScalarValue::FixedSizeBinary(size, Some(b)) => {
3964                write!(f, "FixedSizeBinary({size}, \"")?;
3965                fmt_binary(b.as_slice(), f)?;
3966                write!(f, "\")")
3967            }
3968            ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({self})"),
3969            ScalarValue::LargeBinary(Some(b)) => {
3970                write!(f, "LargeBinary(\"")?;
3971                fmt_binary(b.as_slice(), f)?;
3972                write!(f, "\")")
3973            }
3974            ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"),
3975            ScalarValue::List(_) => write!(f, "List({self})"),
3976            ScalarValue::LargeList(_) => write!(f, "LargeList({self})"),
3977            ScalarValue::Struct(struct_arr) => {
3978                // ScalarValue Struct should always have a single element
3979                assert_eq!(struct_arr.len(), 1);
3980
3981                let columns = struct_arr.columns();
3982                let fields = struct_arr.fields();
3983
3984                write!(
3985                    f,
3986                    "Struct({{{}}})",
3987                    columns
3988                        .iter()
3989                        .zip(fields.iter())
3990                        .map(|(column, field)| {
3991                            let sv = array_value_to_string(column, 0).unwrap();
3992                            let name = field.name();
3993                            format!("{name}:{sv}")
3994                        })
3995                        .collect::<Vec<_>>()
3996                        .join(",")
3997                )
3998            }
3999            ScalarValue::Map(map_arr) => {
4000                write!(
4001                    f,
4002                    "Map([{}])",
4003                    map_arr
4004                        .iter()
4005                        .map(|struct_array| {
4006                            if let Some(arr) = struct_array {
4007                                let buffer: Vec<String> = (0..arr.len())
4008                                    .map(|i| {
4009                                        let key = array_value_to_string(arr.column(0), i)
4010                                            .unwrap();
4011                                        let value =
4012                                            array_value_to_string(arr.column(1), i)
4013                                                .unwrap();
4014                                        format!("{key:?}:{value:?}")
4015                                    })
4016                                    .collect();
4017                                format!("{{{}}}", buffer.join(","))
4018                            } else {
4019                                "NULL".to_string()
4020                            }
4021                        })
4022                        .collect::<Vec<_>>()
4023                        .join(",")
4024                )
4025            }
4026            ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"),
4027            ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"),
4028            ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{self}\")"),
4029            ScalarValue::Time32Millisecond(_) => {
4030                write!(f, "Time32Millisecond(\"{self}\")")
4031            }
4032            ScalarValue::Time64Microsecond(_) => {
4033                write!(f, "Time64Microsecond(\"{self}\")")
4034            }
4035            ScalarValue::Time64Nanosecond(_) => {
4036                write!(f, "Time64Nanosecond(\"{self}\")")
4037            }
4038            ScalarValue::IntervalDayTime(_) => {
4039                write!(f, "IntervalDayTime(\"{self}\")")
4040            }
4041            ScalarValue::IntervalYearMonth(_) => {
4042                write!(f, "IntervalYearMonth(\"{self}\")")
4043            }
4044            ScalarValue::IntervalMonthDayNano(_) => {
4045                write!(f, "IntervalMonthDayNano(\"{self}\")")
4046            }
4047            ScalarValue::DurationSecond(_) => write!(f, "DurationSecond(\"{self}\")"),
4048            ScalarValue::DurationMillisecond(_) => {
4049                write!(f, "DurationMillisecond(\"{self}\")")
4050            }
4051            ScalarValue::DurationMicrosecond(_) => {
4052                write!(f, "DurationMicrosecond(\"{self}\")")
4053            }
4054            ScalarValue::DurationNanosecond(_) => {
4055                write!(f, "DurationNanosecond(\"{self}\")")
4056            }
4057            ScalarValue::Union(val, _fields, _mode) => match val {
4058                Some((id, val)) => write!(f, "Union {id}:{val}"),
4059                None => write!(f, "Union(NULL)"),
4060            },
4061            ScalarValue::Dictionary(k, v) => write!(f, "Dictionary({k:?}, {v:?})"),
4062            ScalarValue::Null => write!(f, "NULL"),
4063        }
4064    }
4065}
4066
4067/// Trait used to map a NativeType to a ScalarValue
4068pub trait ScalarType<T: ArrowNativeType> {
4069    /// returns a scalar from an optional T
4070    fn scalar(r: Option<T>) -> ScalarValue;
4071}
4072
4073impl ScalarType<f32> for Float32Type {
4074    fn scalar(r: Option<f32>) -> ScalarValue {
4075        ScalarValue::Float32(r)
4076    }
4077}
4078
4079impl ScalarType<i64> for TimestampSecondType {
4080    fn scalar(r: Option<i64>) -> ScalarValue {
4081        ScalarValue::TimestampSecond(r, None)
4082    }
4083}
4084
4085impl ScalarType<i64> for TimestampMillisecondType {
4086    fn scalar(r: Option<i64>) -> ScalarValue {
4087        ScalarValue::TimestampMillisecond(r, None)
4088    }
4089}
4090
4091impl ScalarType<i64> for TimestampMicrosecondType {
4092    fn scalar(r: Option<i64>) -> ScalarValue {
4093        ScalarValue::TimestampMicrosecond(r, None)
4094    }
4095}
4096
4097impl ScalarType<i64> for TimestampNanosecondType {
4098    fn scalar(r: Option<i64>) -> ScalarValue {
4099        ScalarValue::TimestampNanosecond(r, None)
4100    }
4101}
4102
4103impl ScalarType<i32> for Date32Type {
4104    fn scalar(r: Option<i32>) -> ScalarValue {
4105        ScalarValue::Date32(r)
4106    }
4107}
4108
4109#[cfg(test)]
4110mod tests {
4111
4112    use super::*;
4113    use crate::cast::{
4114        as_map_array, as_string_array, as_struct_array, as_uint32_array, as_uint64_array,
4115    };
4116
4117    use crate::test_util::batches_to_string;
4118    use arrow::array::{types::Float64Type, NullBufferBuilder};
4119    use arrow::buffer::{Buffer, OffsetBuffer};
4120    use arrow::compute::{is_null, kernels};
4121    use arrow::datatypes::Fields;
4122    use arrow::error::ArrowError;
4123    use arrow::util::pretty::pretty_format_columns;
4124    use chrono::NaiveDate;
4125    use insta::assert_snapshot;
4126    use rand::Rng;
4127
4128    #[test]
4129    fn test_scalar_value_from_for_map() {
4130        let string_builder = StringBuilder::new();
4131        let int_builder = Int32Builder::with_capacity(4);
4132        let mut builder = MapBuilder::new(None, string_builder, int_builder);
4133        builder.keys().append_value("joe");
4134        builder.values().append_value(1);
4135        builder.append(true).unwrap();
4136
4137        builder.keys().append_value("blogs");
4138        builder.values().append_value(2);
4139        builder.keys().append_value("foo");
4140        builder.values().append_value(4);
4141        builder.append(true).unwrap();
4142        builder.append(true).unwrap();
4143        builder.append(false).unwrap();
4144
4145        let expected = builder.finish();
4146
4147        let sv = ScalarValue::Map(Arc::new(expected.clone()));
4148        let map_arr = sv.to_array().unwrap();
4149        let actual = as_map_array(&map_arr).unwrap();
4150        assert_eq!(actual, &expected);
4151    }
4152
4153    #[test]
4154    fn test_scalar_value_from_for_struct() {
4155        let boolean = Arc::new(BooleanArray::from(vec![false]));
4156        let int = Arc::new(Int32Array::from(vec![42]));
4157
4158        let expected = StructArray::from(vec![
4159            (
4160                Arc::new(Field::new("b", DataType::Boolean, false)),
4161                Arc::clone(&boolean) as ArrayRef,
4162            ),
4163            (
4164                Arc::new(Field::new("c", DataType::Int32, false)),
4165                Arc::clone(&int) as ArrayRef,
4166            ),
4167        ]);
4168
4169        let sv = ScalarStructBuilder::new()
4170            .with_array(Field::new("b", DataType::Boolean, false), boolean)
4171            .with_array(Field::new("c", DataType::Int32, false), int)
4172            .build()
4173            .unwrap();
4174
4175        let struct_arr = sv.to_array().unwrap();
4176        let actual = as_struct_array(&struct_arr).unwrap();
4177        assert_eq!(actual, &expected);
4178    }
4179
4180    #[test]
4181    #[should_panic(
4182        expected = "InvalidArgumentError(\"Incorrect array length for StructArray field \\\"bool\\\", expected 1 got 4\")"
4183    )]
4184    fn test_scalar_value_from_for_struct_should_panic() {
4185        let _ = ScalarStructBuilder::new()
4186            .with_array(
4187                Field::new("bool", DataType::Boolean, false),
4188                Arc::new(BooleanArray::from(vec![false, true, false, false])),
4189            )
4190            .with_array(
4191                Field::new("i32", DataType::Int32, false),
4192                Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
4193            )
4194            .build()
4195            .unwrap();
4196    }
4197
4198    #[test]
4199    fn test_to_array_of_size_for_nested() {
4200        // Struct
4201        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
4202        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4203
4204        let struct_array = StructArray::from(vec![
4205            (
4206                Arc::new(Field::new("b", DataType::Boolean, false)),
4207                Arc::clone(&boolean) as ArrayRef,
4208            ),
4209            (
4210                Arc::new(Field::new("c", DataType::Int32, false)),
4211                Arc::clone(&int) as ArrayRef,
4212            ),
4213        ]);
4214        let sv = ScalarValue::Struct(Arc::new(struct_array));
4215        let actual_arr = sv.to_array_of_size(2).unwrap();
4216
4217        let boolean = Arc::new(BooleanArray::from(vec![
4218            false, false, true, true, false, false, true, true,
4219        ]));
4220        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31, 42, 28, 19, 31]));
4221
4222        let struct_array = StructArray::from(vec![
4223            (
4224                Arc::new(Field::new("b", DataType::Boolean, false)),
4225                Arc::clone(&boolean) as ArrayRef,
4226            ),
4227            (
4228                Arc::new(Field::new("c", DataType::Int32, false)),
4229                Arc::clone(&int) as ArrayRef,
4230            ),
4231        ]);
4232
4233        let actual = as_struct_array(&actual_arr).unwrap();
4234        assert_eq!(actual, &struct_array);
4235
4236        // List
4237        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
4238            Some(1),
4239            None,
4240            Some(2),
4241        ])]);
4242
4243        let sv = ScalarValue::List(Arc::new(arr));
4244        let actual_arr = sv
4245            .to_array_of_size(2)
4246            .expect("Failed to convert to array of size");
4247        let actual_list_arr = actual_arr.as_list::<i32>();
4248
4249        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4250            Some(vec![Some(1), None, Some(2)]),
4251            Some(vec![Some(1), None, Some(2)]),
4252        ]);
4253
4254        assert_eq!(&arr, actual_list_arr);
4255    }
4256
4257    #[test]
4258    fn test_to_array_of_size_for_fsl() {
4259        let values = Int32Array::from_iter([Some(1), None, Some(2)]);
4260        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
4261        let arr = FixedSizeListArray::new(Arc::clone(&field), 3, Arc::new(values), None);
4262        let sv = ScalarValue::FixedSizeList(Arc::new(arr));
4263        let actual_arr = sv
4264            .to_array_of_size(2)
4265            .expect("Failed to convert to array of size");
4266
4267        let expected_values =
4268            Int32Array::from_iter([Some(1), None, Some(2), Some(1), None, Some(2)]);
4269        let expected_arr =
4270            FixedSizeListArray::new(field, 3, Arc::new(expected_values), None);
4271
4272        assert_eq!(
4273            &expected_arr,
4274            as_fixed_size_list_array(actual_arr.as_ref()).unwrap()
4275        );
4276
4277        let empty_array = sv
4278            .to_array_of_size(0)
4279            .expect("Failed to convert to empty array");
4280
4281        assert_eq!(empty_array.len(), 0);
4282    }
4283
4284    #[test]
4285    fn test_list_to_array_string() {
4286        let scalars = vec![
4287            ScalarValue::from("rust"),
4288            ScalarValue::from("arrow"),
4289            ScalarValue::from("data-fusion"),
4290        ];
4291
4292        let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8);
4293
4294        let expected = single_row_list_array(vec!["rust", "arrow", "data-fusion"]);
4295        assert_eq!(*result, expected);
4296    }
4297
4298    fn single_row_list_array(items: Vec<&str>) -> ListArray {
4299        SingleRowListArrayBuilder::new(Arc::new(StringArray::from(items)))
4300            .build_list_array()
4301    }
4302
4303    fn build_list<O: OffsetSizeTrait>(
4304        values: Vec<Option<Vec<Option<i64>>>>,
4305    ) -> Vec<ScalarValue> {
4306        values
4307            .into_iter()
4308            .map(|v| {
4309                let arr = if v.is_some() {
4310                    Arc::new(
4311                        GenericListArray::<O>::from_iter_primitive::<Int64Type, _, _>(
4312                            vec![v],
4313                        ),
4314                    )
4315                } else if O::IS_LARGE {
4316                    new_null_array(
4317                        &DataType::LargeList(Arc::new(Field::new_list_field(
4318                            DataType::Int64,
4319                            true,
4320                        ))),
4321                        1,
4322                    )
4323                } else {
4324                    new_null_array(
4325                        &DataType::List(Arc::new(Field::new_list_field(
4326                            DataType::Int64,
4327                            true,
4328                        ))),
4329                        1,
4330                    )
4331                };
4332
4333                if O::IS_LARGE {
4334                    ScalarValue::LargeList(arr.as_list::<i64>().to_owned().into())
4335                } else {
4336                    ScalarValue::List(arr.as_list::<i32>().to_owned().into())
4337                }
4338            })
4339            .collect()
4340    }
4341
4342    #[test]
4343    fn test_iter_to_array_fixed_size_list() {
4344        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
4345        let f1 = Arc::new(FixedSizeListArray::new(
4346            Arc::clone(&field),
4347            3,
4348            Arc::new(Int32Array::from(vec![1, 2, 3])),
4349            None,
4350        ));
4351        let f2 = Arc::new(FixedSizeListArray::new(
4352            Arc::clone(&field),
4353            3,
4354            Arc::new(Int32Array::from(vec![4, 5, 6])),
4355            None,
4356        ));
4357        let f_nulls = Arc::new(FixedSizeListArray::new_null(field, 1, 1));
4358
4359        let scalars = vec![
4360            ScalarValue::FixedSizeList(Arc::clone(&f_nulls)),
4361            ScalarValue::FixedSizeList(f1),
4362            ScalarValue::FixedSizeList(f2),
4363            ScalarValue::FixedSizeList(f_nulls),
4364        ];
4365
4366        let array = ScalarValue::iter_to_array(scalars).unwrap();
4367
4368        let expected = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
4369            vec![
4370                None,
4371                Some(vec![Some(1), Some(2), Some(3)]),
4372                Some(vec![Some(4), Some(5), Some(6)]),
4373                None,
4374            ],
4375            3,
4376        );
4377        assert_eq!(array.as_ref(), &expected);
4378    }
4379
4380    #[test]
4381    fn test_iter_to_array_struct() {
4382        let s1 = StructArray::from(vec![
4383            (
4384                Arc::new(Field::new("A", DataType::Boolean, false)),
4385                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
4386            ),
4387            (
4388                Arc::new(Field::new("B", DataType::Int32, false)),
4389                Arc::new(Int32Array::from(vec![42])) as ArrayRef,
4390            ),
4391        ]);
4392
4393        let s2 = StructArray::from(vec![
4394            (
4395                Arc::new(Field::new("A", DataType::Boolean, false)),
4396                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
4397            ),
4398            (
4399                Arc::new(Field::new("B", DataType::Int32, false)),
4400                Arc::new(Int32Array::from(vec![42])) as ArrayRef,
4401            ),
4402        ]);
4403
4404        let scalars = vec![
4405            ScalarValue::Struct(Arc::new(s1)),
4406            ScalarValue::Struct(Arc::new(s2)),
4407        ];
4408
4409        let array = ScalarValue::iter_to_array(scalars).unwrap();
4410
4411        let expected = StructArray::from(vec![
4412            (
4413                Arc::new(Field::new("A", DataType::Boolean, false)),
4414                Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
4415            ),
4416            (
4417                Arc::new(Field::new("B", DataType::Int32, false)),
4418                Arc::new(Int32Array::from(vec![42, 42])) as ArrayRef,
4419            ),
4420        ]);
4421        assert_eq!(array.as_ref(), &expected);
4422    }
4423
4424    #[test]
4425    fn test_iter_to_array_struct_with_nulls() {
4426        // non-null
4427        let s1 = StructArray::from((
4428            vec![
4429                (
4430                    Arc::new(Field::new("A", DataType::Int32, false)),
4431                    Arc::new(Int32Array::from(vec![1])) as ArrayRef,
4432                ),
4433                (
4434                    Arc::new(Field::new("B", DataType::Int64, false)),
4435                    Arc::new(Int64Array::from(vec![2])) as ArrayRef,
4436                ),
4437            ],
4438            // Present the null mask, 1 is non-null, 0 is null
4439            Buffer::from(&[1]),
4440        ));
4441
4442        // null
4443        let s2 = StructArray::from((
4444            vec![
4445                (
4446                    Arc::new(Field::new("A", DataType::Int32, false)),
4447                    Arc::new(Int32Array::from(vec![3])) as ArrayRef,
4448                ),
4449                (
4450                    Arc::new(Field::new("B", DataType::Int64, false)),
4451                    Arc::new(Int64Array::from(vec![4])) as ArrayRef,
4452                ),
4453            ],
4454            Buffer::from(&[0]),
4455        ));
4456
4457        let scalars = vec![
4458            ScalarValue::Struct(Arc::new(s1)),
4459            ScalarValue::Struct(Arc::new(s2)),
4460        ];
4461
4462        let array = ScalarValue::iter_to_array(scalars).unwrap();
4463        let struct_array = array.as_struct();
4464        assert!(struct_array.is_valid(0));
4465        assert!(struct_array.is_null(1));
4466    }
4467
4468    #[test]
4469    fn iter_to_array_primitive_test() {
4470        // List[[1,2,3]], List[null], List[[4,5]]
4471        let scalars = build_list::<i32>(vec![
4472            Some(vec![Some(1), Some(2), Some(3)]),
4473            None,
4474            Some(vec![Some(4), Some(5)]),
4475        ]);
4476
4477        let array = ScalarValue::iter_to_array(scalars).unwrap();
4478        let list_array = as_list_array(&array);
4479        // List[[1,2,3], null, [4,5]]
4480        let expected = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
4481            Some(vec![Some(1), Some(2), Some(3)]),
4482            None,
4483            Some(vec![Some(4), Some(5)]),
4484        ]);
4485        assert_eq!(list_array, &expected);
4486
4487        let scalars = build_list::<i64>(vec![
4488            Some(vec![Some(1), Some(2), Some(3)]),
4489            None,
4490            Some(vec![Some(4), Some(5)]),
4491        ]);
4492
4493        let array = ScalarValue::iter_to_array(scalars).unwrap();
4494        let list_array = as_large_list_array(&array);
4495        let expected = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
4496            Some(vec![Some(1), Some(2), Some(3)]),
4497            None,
4498            Some(vec![Some(4), Some(5)]),
4499        ]);
4500        assert_eq!(list_array, &expected);
4501    }
4502
4503    #[test]
4504    fn iter_to_array_string_test() {
4505        let arr1 = single_row_list_array(vec!["foo", "bar", "baz"]);
4506        let arr2 = single_row_list_array(vec!["rust", "world"]);
4507
4508        let scalars = vec![
4509            ScalarValue::List(Arc::new(arr1)),
4510            ScalarValue::List(Arc::new(arr2)),
4511        ];
4512
4513        let array = ScalarValue::iter_to_array(scalars).unwrap();
4514        let result = array.as_list::<i32>();
4515
4516        // build expected array
4517        let string_builder = StringBuilder::with_capacity(5, 25);
4518        let mut list_of_string_builder = ListBuilder::new(string_builder);
4519
4520        list_of_string_builder.values().append_value("foo");
4521        list_of_string_builder.values().append_value("bar");
4522        list_of_string_builder.values().append_value("baz");
4523        list_of_string_builder.append(true);
4524
4525        list_of_string_builder.values().append_value("rust");
4526        list_of_string_builder.values().append_value("world");
4527        list_of_string_builder.append(true);
4528        let expected = list_of_string_builder.finish();
4529
4530        assert_eq!(result, &expected);
4531    }
4532
4533    #[test]
4534    fn test_list_scalar_eq_to_array() {
4535        let list_array: ArrayRef =
4536            Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4537                Some(vec![Some(0), Some(1), Some(2)]),
4538                None,
4539                Some(vec![None, Some(5)]),
4540            ]));
4541
4542        let fsl_array: ArrayRef =
4543            Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4544                Some(vec![Some(0), Some(1), Some(2)]),
4545                None,
4546                Some(vec![Some(3), None, Some(5)]),
4547            ]));
4548
4549        for arr in [list_array, fsl_array] {
4550            for i in 0..arr.len() {
4551                let scalar =
4552                    ScalarValue::List(arr.slice(i, 1).as_list::<i32>().to_owned().into());
4553                assert!(scalar.eq_array(&arr, i).unwrap());
4554            }
4555        }
4556    }
4557
4558    #[test]
4559    fn scalar_add_trait_test() -> Result<()> {
4560        let float_value = ScalarValue::Float64(Some(123.));
4561        let float_value_2 = ScalarValue::Float64(Some(123.));
4562        assert_eq!(
4563            (float_value.add(&float_value_2))?,
4564            ScalarValue::Float64(Some(246.))
4565        );
4566        assert_eq!(
4567            (float_value.add(float_value_2))?,
4568            ScalarValue::Float64(Some(246.))
4569        );
4570        Ok(())
4571    }
4572
4573    #[test]
4574    fn scalar_sub_trait_test() -> Result<()> {
4575        let float_value = ScalarValue::Float64(Some(123.));
4576        let float_value_2 = ScalarValue::Float64(Some(123.));
4577        assert_eq!(
4578            float_value.sub(&float_value_2)?,
4579            ScalarValue::Float64(Some(0.))
4580        );
4581        assert_eq!(
4582            float_value.sub(float_value_2)?,
4583            ScalarValue::Float64(Some(0.))
4584        );
4585        Ok(())
4586    }
4587
4588    #[test]
4589    fn scalar_sub_trait_int32_test() -> Result<()> {
4590        let int_value = ScalarValue::Int32(Some(42));
4591        let int_value_2 = ScalarValue::Int32(Some(100));
4592        assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int32(Some(-58)));
4593        assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int32(Some(58)));
4594        Ok(())
4595    }
4596
4597    #[test]
4598    fn scalar_sub_trait_int32_overflow_test() {
4599        let int_value = ScalarValue::Int32(Some(i32::MAX));
4600        let int_value_2 = ScalarValue::Int32(Some(i32::MIN));
4601        let err = int_value
4602            .sub_checked(&int_value_2)
4603            .unwrap_err()
4604            .strip_backtrace();
4605        assert_eq!(
4606            err,
4607            "Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
4608        )
4609    }
4610
4611    #[test]
4612    fn scalar_sub_trait_int64_test() -> Result<()> {
4613        let int_value = ScalarValue::Int64(Some(42));
4614        let int_value_2 = ScalarValue::Int64(Some(100));
4615        assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int64(Some(-58)));
4616        assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int64(Some(58)));
4617        Ok(())
4618    }
4619
4620    #[test]
4621    fn scalar_sub_trait_int64_overflow_test() {
4622        let int_value = ScalarValue::Int64(Some(i64::MAX));
4623        let int_value_2 = ScalarValue::Int64(Some(i64::MIN));
4624        let err = int_value
4625            .sub_checked(&int_value_2)
4626            .unwrap_err()
4627            .strip_backtrace();
4628        assert_eq!(err, "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808")
4629    }
4630
4631    #[test]
4632    fn scalar_add_overflow_test() -> Result<()> {
4633        check_scalar_add_overflow::<Int8Type>(
4634            ScalarValue::Int8(Some(i8::MAX)),
4635            ScalarValue::Int8(Some(i8::MAX)),
4636        );
4637        check_scalar_add_overflow::<UInt8Type>(
4638            ScalarValue::UInt8(Some(u8::MAX)),
4639            ScalarValue::UInt8(Some(u8::MAX)),
4640        );
4641        check_scalar_add_overflow::<Int16Type>(
4642            ScalarValue::Int16(Some(i16::MAX)),
4643            ScalarValue::Int16(Some(i16::MAX)),
4644        );
4645        check_scalar_add_overflow::<UInt16Type>(
4646            ScalarValue::UInt16(Some(u16::MAX)),
4647            ScalarValue::UInt16(Some(u16::MAX)),
4648        );
4649        check_scalar_add_overflow::<Int32Type>(
4650            ScalarValue::Int32(Some(i32::MAX)),
4651            ScalarValue::Int32(Some(i32::MAX)),
4652        );
4653        check_scalar_add_overflow::<UInt32Type>(
4654            ScalarValue::UInt32(Some(u32::MAX)),
4655            ScalarValue::UInt32(Some(u32::MAX)),
4656        );
4657        check_scalar_add_overflow::<Int64Type>(
4658            ScalarValue::Int64(Some(i64::MAX)),
4659            ScalarValue::Int64(Some(i64::MAX)),
4660        );
4661        check_scalar_add_overflow::<UInt64Type>(
4662            ScalarValue::UInt64(Some(u64::MAX)),
4663            ScalarValue::UInt64(Some(u64::MAX)),
4664        );
4665
4666        Ok(())
4667    }
4668
4669    // Verifies that ScalarValue has the same behavior with compute kernel when it overflows.
4670    fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
4671    where
4672        T: ArrowNumericType,
4673    {
4674        let scalar_result = left.add_checked(&right);
4675
4676        let left_array = left.to_array().expect("Failed to convert to array");
4677        let right_array = right.to_array().expect("Failed to convert to array");
4678        let arrow_left_array = left_array.as_primitive::<T>();
4679        let arrow_right_array = right_array.as_primitive::<T>();
4680        let arrow_result = add(arrow_left_array, arrow_right_array);
4681
4682        assert_eq!(scalar_result.is_ok(), arrow_result.is_ok());
4683    }
4684
4685    #[test]
4686    fn test_interval_add_timestamp() -> Result<()> {
4687        let interval = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
4688            months: 1,
4689            days: 2,
4690            nanoseconds: 3,
4691        }));
4692        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4693        let result = interval.add(&timestamp)?;
4694        let expect = timestamp.add(&interval)?;
4695        assert_eq!(result, expect);
4696
4697        let interval = ScalarValue::IntervalYearMonth(Some(123));
4698        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4699        let result = interval.add(&timestamp)?;
4700        let expect = timestamp.add(&interval)?;
4701        assert_eq!(result, expect);
4702
4703        let interval = ScalarValue::IntervalDayTime(Some(IntervalDayTime {
4704            days: 1,
4705            milliseconds: 23,
4706        }));
4707        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4708        let result = interval.add(&timestamp)?;
4709        let expect = timestamp.add(&interval)?;
4710        assert_eq!(result, expect);
4711        Ok(())
4712    }
4713
4714    #[test]
4715    fn scalar_decimal_test() -> Result<()> {
4716        let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
4717        assert_eq!(DataType::Decimal128(10, 1), decimal_value.data_type());
4718        let try_into_value: i128 = decimal_value.clone().try_into().unwrap();
4719        assert_eq!(123_i128, try_into_value);
4720        assert!(!decimal_value.is_null());
4721        let neg_decimal_value = decimal_value.arithmetic_negate()?;
4722        match neg_decimal_value {
4723            ScalarValue::Decimal128(v, _, _) => {
4724                assert_eq!(-123, v.unwrap());
4725            }
4726            _ => {
4727                unreachable!();
4728            }
4729        }
4730
4731        // decimal scalar to array
4732        let array = decimal_value
4733            .to_array()
4734            .expect("Failed to convert to array");
4735        let array = as_decimal128_array(&array)?;
4736        assert_eq!(1, array.len());
4737        assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
4738        assert_eq!(123i128, array.value(0));
4739
4740        // decimal scalar to array with size
4741        let array = decimal_value
4742            .to_array_of_size(10)
4743            .expect("Failed to convert to array of size");
4744        let array_decimal = as_decimal128_array(&array)?;
4745        assert_eq!(10, array.len());
4746        assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
4747        assert_eq!(123i128, array_decimal.value(0));
4748        assert_eq!(123i128, array_decimal.value(9));
4749        // test eq array
4750        assert!(decimal_value
4751            .eq_array(&array, 1)
4752            .expect("Failed to compare arrays"));
4753        assert!(decimal_value
4754            .eq_array(&array, 5)
4755            .expect("Failed to compare arrays"));
4756        // test try from array
4757        assert_eq!(
4758            decimal_value,
4759            ScalarValue::try_from_array(&array, 5).unwrap()
4760        );
4761
4762        assert_eq!(
4763            decimal_value,
4764            ScalarValue::try_new_decimal128(123, 10, 1).unwrap()
4765        );
4766
4767        // test compare
4768        let left = ScalarValue::Decimal128(Some(123), 10, 2);
4769        let right = ScalarValue::Decimal128(Some(124), 10, 2);
4770        assert!(!left.eq(&right));
4771        let result = left < right;
4772        assert!(result);
4773        let result = left <= right;
4774        assert!(result);
4775        let right = ScalarValue::Decimal128(Some(124), 10, 3);
4776        // make sure that two decimals with diff datatype can't be compared.
4777        let result = left.partial_cmp(&right);
4778        assert_eq!(None, result);
4779
4780        let decimal_vec = vec![
4781            ScalarValue::Decimal128(Some(1), 10, 2),
4782            ScalarValue::Decimal128(Some(2), 10, 2),
4783            ScalarValue::Decimal128(Some(3), 10, 2),
4784        ];
4785        // convert the vec to decimal array and check the result
4786        let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
4787        assert_eq!(3, array.len());
4788        assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
4789
4790        let decimal_vec = vec![
4791            ScalarValue::Decimal128(Some(1), 10, 2),
4792            ScalarValue::Decimal128(Some(2), 10, 2),
4793            ScalarValue::Decimal128(Some(3), 10, 2),
4794            ScalarValue::Decimal128(None, 10, 2),
4795        ];
4796        let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
4797        assert_eq!(4, array.len());
4798        assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
4799
4800        assert!(ScalarValue::try_new_decimal128(1, 10, 2)
4801            .unwrap()
4802            .eq_array(&array, 0)
4803            .expect("Failed to compare arrays"));
4804        assert!(ScalarValue::try_new_decimal128(2, 10, 2)
4805            .unwrap()
4806            .eq_array(&array, 1)
4807            .expect("Failed to compare arrays"));
4808        assert!(ScalarValue::try_new_decimal128(3, 10, 2)
4809            .unwrap()
4810            .eq_array(&array, 2)
4811            .expect("Failed to compare arrays"));
4812        assert_eq!(
4813            ScalarValue::Decimal128(None, 10, 2),
4814            ScalarValue::try_from_array(&array, 3).unwrap()
4815        );
4816
4817        Ok(())
4818    }
4819
4820    #[test]
4821    fn test_list_partial_cmp() {
4822        let a =
4823            ScalarValue::List(Arc::new(
4824                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4825                    Some(1),
4826                    Some(2),
4827                    Some(3),
4828                ])]),
4829            ));
4830        let b =
4831            ScalarValue::List(Arc::new(
4832                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4833                    Some(1),
4834                    Some(2),
4835                    Some(3),
4836                ])]),
4837            ));
4838        assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal));
4839
4840        let a =
4841            ScalarValue::List(Arc::new(
4842                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4843                    Some(10),
4844                    Some(2),
4845                    Some(3),
4846                ])]),
4847            ));
4848        let b =
4849            ScalarValue::List(Arc::new(
4850                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4851                    Some(1),
4852                    Some(2),
4853                    Some(30),
4854                ])]),
4855            ));
4856        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4857
4858        let a =
4859            ScalarValue::List(Arc::new(
4860                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4861                    Some(10),
4862                    Some(2),
4863                    Some(3),
4864                ])]),
4865            ));
4866        let b =
4867            ScalarValue::List(Arc::new(
4868                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4869                    Some(10),
4870                    Some(2),
4871                    Some(30),
4872                ])]),
4873            ));
4874        assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
4875
4876        let a =
4877            ScalarValue::List(Arc::new(
4878                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4879                    Some(1),
4880                    Some(2),
4881                    Some(3),
4882                ])]),
4883            ));
4884        let b =
4885            ScalarValue::List(Arc::new(
4886                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4887                    Some(2),
4888                    Some(3),
4889                ])]),
4890            ));
4891        assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
4892
4893        let a =
4894            ScalarValue::List(Arc::new(
4895                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4896                    Some(2),
4897                    Some(3),
4898                    Some(4),
4899                ])]),
4900            ));
4901        let b =
4902            ScalarValue::List(Arc::new(
4903                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4904                    Some(1),
4905                    Some(2),
4906                ])]),
4907            ));
4908        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4909
4910        let a =
4911            ScalarValue::List(Arc::new(
4912                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4913                    Some(1),
4914                    Some(2),
4915                    Some(3),
4916                ])]),
4917            ));
4918        let b =
4919            ScalarValue::List(Arc::new(
4920                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4921                    Some(1),
4922                    Some(2),
4923                ])]),
4924            ));
4925        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4926
4927        let a =
4928            ScalarValue::List(Arc::new(
4929                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4930                    None,
4931                    Some(2),
4932                    Some(3),
4933                ])]),
4934            ));
4935        let b =
4936            ScalarValue::List(Arc::new(
4937                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4938                    Some(1),
4939                    Some(2),
4940                    Some(3),
4941                ])]),
4942            ));
4943        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4944
4945        let a = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
4946            Int64Type,
4947            _,
4948            _,
4949        >(vec![Some(vec![
4950            None,
4951            Some(2),
4952            Some(3),
4953        ])])));
4954        let b = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
4955            Int64Type,
4956            _,
4957            _,
4958        >(vec![Some(vec![
4959            Some(1),
4960            Some(2),
4961            Some(3),
4962        ])])));
4963        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4964
4965        let a = ScalarValue::FixedSizeList(Arc::new(
4966            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
4967                vec![Some(vec![None, Some(2), Some(3)])],
4968                3,
4969            ),
4970        ));
4971        let b = ScalarValue::FixedSizeList(Arc::new(
4972            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
4973                vec![Some(vec![Some(1), Some(2), Some(3)])],
4974                3,
4975            ),
4976        ));
4977        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4978    }
4979
4980    #[test]
4981    fn scalar_value_to_array_u64() -> Result<()> {
4982        let value = ScalarValue::UInt64(Some(13u64));
4983        let array = value.to_array().expect("Failed to convert to array");
4984        let array = as_uint64_array(&array)?;
4985        assert_eq!(array.len(), 1);
4986        assert!(!array.is_null(0));
4987        assert_eq!(array.value(0), 13);
4988
4989        let value = ScalarValue::UInt64(None);
4990        let array = value.to_array().expect("Failed to convert to array");
4991        let array = as_uint64_array(&array)?;
4992        assert_eq!(array.len(), 1);
4993        assert!(array.is_null(0));
4994        Ok(())
4995    }
4996
4997    #[test]
4998    fn scalar_value_to_array_u32() -> Result<()> {
4999        let value = ScalarValue::UInt32(Some(13u32));
5000        let array = value.to_array().expect("Failed to convert to array");
5001        let array = as_uint32_array(&array)?;
5002        assert_eq!(array.len(), 1);
5003        assert!(!array.is_null(0));
5004        assert_eq!(array.value(0), 13);
5005
5006        let value = ScalarValue::UInt32(None);
5007        let array = value.to_array().expect("Failed to convert to array");
5008        let array = as_uint32_array(&array)?;
5009        assert_eq!(array.len(), 1);
5010        assert!(array.is_null(0));
5011        Ok(())
5012    }
5013
5014    #[test]
5015    fn scalar_list_null_to_array() {
5016        let list_array = ScalarValue::new_list_nullable(&[], &DataType::UInt64);
5017
5018        assert_eq!(list_array.len(), 1);
5019        assert_eq!(list_array.values().len(), 0);
5020    }
5021
5022    #[test]
5023    fn scalar_large_list_null_to_array() {
5024        let list_array = ScalarValue::new_large_list(&[], &DataType::UInt64);
5025
5026        assert_eq!(list_array.len(), 1);
5027        assert_eq!(list_array.values().len(), 0);
5028    }
5029
5030    #[test]
5031    fn scalar_list_to_array() -> Result<()> {
5032        let values = vec![
5033            ScalarValue::UInt64(Some(100)),
5034            ScalarValue::UInt64(None),
5035            ScalarValue::UInt64(Some(101)),
5036        ];
5037        let list_array = ScalarValue::new_list_nullable(&values, &DataType::UInt64);
5038        assert_eq!(list_array.len(), 1);
5039        assert_eq!(list_array.values().len(), 3);
5040
5041        let prim_array_ref = list_array.value(0);
5042        let prim_array = as_uint64_array(&prim_array_ref)?;
5043        assert_eq!(prim_array.len(), 3);
5044        assert_eq!(prim_array.value(0), 100);
5045        assert!(prim_array.is_null(1));
5046        assert_eq!(prim_array.value(2), 101);
5047        Ok(())
5048    }
5049
5050    #[test]
5051    fn scalar_large_list_to_array() -> Result<()> {
5052        let values = vec![
5053            ScalarValue::UInt64(Some(100)),
5054            ScalarValue::UInt64(None),
5055            ScalarValue::UInt64(Some(101)),
5056        ];
5057        let list_array = ScalarValue::new_large_list(&values, &DataType::UInt64);
5058        assert_eq!(list_array.len(), 1);
5059        assert_eq!(list_array.values().len(), 3);
5060
5061        let prim_array_ref = list_array.value(0);
5062        let prim_array = as_uint64_array(&prim_array_ref)?;
5063        assert_eq!(prim_array.len(), 3);
5064        assert_eq!(prim_array.value(0), 100);
5065        assert!(prim_array.is_null(1));
5066        assert_eq!(prim_array.value(2), 101);
5067        Ok(())
5068    }
5069
5070    /// Creates array directly and via ScalarValue and ensures they are the same
5071    macro_rules! check_scalar_iter {
5072        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5073            let scalars: Vec<_> =
5074                $INPUT.iter().map(|v| ScalarValue::$SCALAR_T(*v)).collect();
5075
5076            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5077
5078            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
5079
5080            assert_eq!(&array, &expected);
5081        }};
5082    }
5083
5084    /// Creates array directly and via ScalarValue and ensures they are the same
5085    /// but for variants that carry a timezone field.
5086    macro_rules! check_scalar_iter_tz {
5087        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5088            let scalars: Vec<_> = $INPUT
5089                .iter()
5090                .map(|v| ScalarValue::$SCALAR_T(*v, None))
5091                .collect();
5092
5093            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5094
5095            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
5096
5097            assert_eq!(&array, &expected);
5098        }};
5099    }
5100
5101    /// Creates array directly and via ScalarValue and ensures they
5102    /// are the same, for string  arrays
5103    macro_rules! check_scalar_iter_string {
5104        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5105            let scalars: Vec<_> = $INPUT
5106                .iter()
5107                .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_string())))
5108                .collect();
5109
5110            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5111
5112            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
5113
5114            assert_eq!(&array, &expected);
5115        }};
5116    }
5117
5118    /// Creates array directly and via ScalarValue and ensures they
5119    /// are the same, for binary arrays
5120    macro_rules! check_scalar_iter_binary {
5121        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5122            let scalars: Vec<_> = $INPUT
5123                .iter()
5124                .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_vec())))
5125                .collect();
5126
5127            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5128
5129            let expected: $ARRAYTYPE =
5130                $INPUT.iter().map(|v| v.map(|v| v.to_vec())).collect();
5131
5132            let expected: ArrayRef = Arc::new(expected);
5133
5134            assert_eq!(&array, &expected);
5135        }};
5136    }
5137
5138    #[test]
5139    // despite clippy claiming they are useless, the code doesn't compile otherwise.
5140    #[allow(clippy::useless_vec)]
5141    fn scalar_iter_to_array_boolean() {
5142        check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]);
5143        check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]);
5144        check_scalar_iter!(Float64, Float64Array, vec![Some(1.9), None, Some(-2.1)]);
5145
5146        check_scalar_iter!(Int8, Int8Array, vec![Some(1), None, Some(3)]);
5147        check_scalar_iter!(Int16, Int16Array, vec![Some(1), None, Some(3)]);
5148        check_scalar_iter!(Int32, Int32Array, vec![Some(1), None, Some(3)]);
5149        check_scalar_iter!(Int64, Int64Array, vec![Some(1), None, Some(3)]);
5150
5151        check_scalar_iter!(UInt8, UInt8Array, vec![Some(1), None, Some(3)]);
5152        check_scalar_iter!(UInt16, UInt16Array, vec![Some(1), None, Some(3)]);
5153        check_scalar_iter!(UInt32, UInt32Array, vec![Some(1), None, Some(3)]);
5154        check_scalar_iter!(UInt64, UInt64Array, vec![Some(1), None, Some(3)]);
5155
5156        check_scalar_iter_tz!(
5157            TimestampSecond,
5158            TimestampSecondArray,
5159            vec![Some(1), None, Some(3)]
5160        );
5161        check_scalar_iter_tz!(
5162            TimestampMillisecond,
5163            TimestampMillisecondArray,
5164            vec![Some(1), None, Some(3)]
5165        );
5166        check_scalar_iter_tz!(
5167            TimestampMicrosecond,
5168            TimestampMicrosecondArray,
5169            vec![Some(1), None, Some(3)]
5170        );
5171        check_scalar_iter_tz!(
5172            TimestampNanosecond,
5173            TimestampNanosecondArray,
5174            vec![Some(1), None, Some(3)]
5175        );
5176
5177        check_scalar_iter_string!(
5178            Utf8,
5179            StringArray,
5180            vec![Some("foo"), None, Some("bar")]
5181        );
5182        check_scalar_iter_string!(
5183            LargeUtf8,
5184            LargeStringArray,
5185            vec![Some("foo"), None, Some("bar")]
5186        );
5187        check_scalar_iter_binary!(
5188            Binary,
5189            BinaryArray,
5190            vec![Some(b"foo"), None, Some(b"bar")]
5191        );
5192        check_scalar_iter_binary!(
5193            LargeBinary,
5194            LargeBinaryArray,
5195            vec![Some(b"foo"), None, Some(b"bar")]
5196        );
5197    }
5198
5199    #[test]
5200    fn scalar_iter_to_array_empty() {
5201        let scalars = vec![] as Vec<ScalarValue>;
5202
5203        let result = ScalarValue::iter_to_array(scalars).unwrap_err();
5204        assert!(
5205            result
5206                .to_string()
5207                .contains("Empty iterator passed to ScalarValue::iter_to_array"),
5208            "{}",
5209            result
5210        );
5211    }
5212
5213    #[test]
5214    fn scalar_iter_to_dictionary() {
5215        fn make_val(v: Option<String>) -> ScalarValue {
5216            let key_type = DataType::Int32;
5217            let value = ScalarValue::Utf8(v);
5218            ScalarValue::Dictionary(Box::new(key_type), Box::new(value))
5219        }
5220
5221        let scalars = [
5222            make_val(Some("Foo".into())),
5223            make_val(None),
5224            make_val(Some("Bar".into())),
5225        ];
5226
5227        let array = ScalarValue::iter_to_array(scalars).unwrap();
5228        let array = as_dictionary_array::<Int32Type>(&array).unwrap();
5229        let values_array = as_string_array(array.values()).unwrap();
5230
5231        let values = array
5232            .keys_iter()
5233            .map(|k| {
5234                k.map(|k| {
5235                    assert!(values_array.is_valid(k));
5236                    values_array.value(k)
5237                })
5238            })
5239            .collect::<Vec<_>>();
5240
5241        let expected = vec![Some("Foo"), None, Some("Bar")];
5242        assert_eq!(values, expected);
5243    }
5244
5245    #[test]
5246    fn scalar_iter_to_array_mismatched_types() {
5247        use ScalarValue::*;
5248        // If the scalar values are not all the correct type, error here
5249        let scalars = [Boolean(Some(true)), Int32(Some(5))];
5250
5251        let result = ScalarValue::iter_to_array(scalars).unwrap_err();
5252        assert!(result.to_string().contains("Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)"),
5253                "{}", result);
5254    }
5255
5256    #[test]
5257    fn scalar_try_from_array_null() {
5258        let array = vec![Some(33), None].into_iter().collect::<Int64Array>();
5259        let array: ArrayRef = Arc::new(array);
5260
5261        assert_eq!(
5262            ScalarValue::Int64(Some(33)),
5263            ScalarValue::try_from_array(&array, 0).unwrap()
5264        );
5265        assert_eq!(
5266            ScalarValue::Int64(None),
5267            ScalarValue::try_from_array(&array, 1).unwrap()
5268        );
5269    }
5270
5271    #[test]
5272    fn scalar_try_from_array_list_array_null() {
5273        let list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5274            Some(vec![Some(1), Some(2)]),
5275            None,
5276        ]);
5277
5278        let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap();
5279        let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap();
5280
5281        let data_type =
5282            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
5283
5284        assert_eq!(non_null_list_scalar.data_type(), data_type);
5285        assert_eq!(null_list_scalar.data_type(), data_type);
5286    }
5287
5288    #[test]
5289    fn scalar_try_from_list_datatypes() {
5290        let inner_field = Arc::new(Field::new_list_field(DataType::Int32, true));
5291
5292        // Test for List
5293        let data_type = &DataType::List(Arc::clone(&inner_field));
5294        let scalar: ScalarValue = data_type.try_into().unwrap();
5295        let expected = ScalarValue::List(
5296            new_null_array(data_type, 1)
5297                .as_list::<i32>()
5298                .to_owned()
5299                .into(),
5300        );
5301        assert_eq!(expected, scalar);
5302        assert!(expected.is_null());
5303
5304        // Test for LargeList
5305        let data_type = &DataType::LargeList(Arc::clone(&inner_field));
5306        let scalar: ScalarValue = data_type.try_into().unwrap();
5307        let expected = ScalarValue::LargeList(
5308            new_null_array(data_type, 1)
5309                .as_list::<i64>()
5310                .to_owned()
5311                .into(),
5312        );
5313        assert_eq!(expected, scalar);
5314        assert!(expected.is_null());
5315
5316        // Test for FixedSizeList(5)
5317        let data_type = &DataType::FixedSizeList(Arc::clone(&inner_field), 5);
5318        let scalar: ScalarValue = data_type.try_into().unwrap();
5319        let expected = ScalarValue::FixedSizeList(
5320            new_null_array(data_type, 1)
5321                .as_fixed_size_list()
5322                .to_owned()
5323                .into(),
5324        );
5325        assert_eq!(expected, scalar);
5326        assert!(expected.is_null());
5327    }
5328
5329    #[test]
5330    fn scalar_try_from_list_of_list() {
5331        let data_type = DataType::List(Arc::new(Field::new_list_field(
5332            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
5333            true,
5334        )));
5335        let data_type = &data_type;
5336        let scalar: ScalarValue = data_type.try_into().unwrap();
5337
5338        let expected = ScalarValue::List(
5339            new_null_array(
5340                &DataType::List(Arc::new(Field::new_list_field(
5341                    DataType::List(Arc::new(Field::new_list_field(
5342                        DataType::Int32,
5343                        true,
5344                    ))),
5345                    true,
5346                ))),
5347                1,
5348            )
5349            .as_list::<i32>()
5350            .to_owned()
5351            .into(),
5352        );
5353
5354        assert_eq!(expected, scalar)
5355    }
5356
5357    #[test]
5358    fn scalar_try_from_not_equal_list_nested_list() {
5359        let list_data_type =
5360            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
5361        let data_type = &list_data_type;
5362        let list_scalar: ScalarValue = data_type.try_into().unwrap();
5363
5364        let nested_list_data_type = DataType::List(Arc::new(Field::new_list_field(
5365            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
5366            true,
5367        )));
5368        let data_type = &nested_list_data_type;
5369        let nested_list_scalar: ScalarValue = data_type.try_into().unwrap();
5370
5371        assert_ne!(list_scalar, nested_list_scalar);
5372    }
5373
5374    #[test]
5375    fn scalar_try_from_dict_datatype() {
5376        let data_type =
5377            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
5378        let data_type = &data_type;
5379        let expected = ScalarValue::Dictionary(
5380            Box::new(DataType::Int8),
5381            Box::new(ScalarValue::Utf8(None)),
5382        );
5383        assert_eq!(expected, data_type.try_into().unwrap())
5384    }
5385
5386    #[test]
5387    fn size_of_scalar() {
5388        // Since ScalarValues are used in a non trivial number of places,
5389        // making it larger means significant more memory consumption
5390        // per distinct value.
5391        //
5392        // Thus this test ensures that no code change makes ScalarValue larger
5393        //
5394        // The alignment requirements differ across architectures and
5395        // thus the size of the enum appears to as well
5396
5397        // The value may also change depending on rust version
5398        assert_eq!(size_of::<ScalarValue>(), 64);
5399    }
5400
5401    #[test]
5402    fn memory_size() {
5403        let sv = ScalarValue::Binary(Some(Vec::with_capacity(10)));
5404        assert_eq!(sv.size(), size_of::<ScalarValue>() + 10,);
5405        let sv_size = sv.size();
5406
5407        let mut v = Vec::with_capacity(10);
5408        // do NOT clone `sv` here because this may shrink the vector capacity
5409        v.push(sv);
5410        assert_eq!(v.capacity(), 10);
5411        assert_eq!(
5412            ScalarValue::size_of_vec(&v),
5413            size_of::<Vec<ScalarValue>>() + (9 * size_of::<ScalarValue>()) + sv_size,
5414        );
5415
5416        let mut s = HashSet::with_capacity(0);
5417        // do NOT clone `sv` here because this may shrink the vector capacity
5418        s.insert(v.pop().unwrap());
5419        // hashsets may easily grow during insert, so capacity is dynamic
5420        let s_capacity = s.capacity();
5421        assert_eq!(
5422            ScalarValue::size_of_hashset(&s),
5423            size_of::<HashSet<ScalarValue>>()
5424                + ((s_capacity - 1) * size_of::<ScalarValue>())
5425                + sv_size,
5426        );
5427    }
5428
5429    #[test]
5430    fn scalar_eq_array() {
5431        // Validate that eq_array has the same semantics as ScalarValue::eq
5432        macro_rules! make_typed_vec {
5433            ($INPUT:expr, $TYPE:ident) => {{
5434                $INPUT
5435                    .iter()
5436                    .map(|v| v.map(|v| v as $TYPE))
5437                    .collect::<Vec<_>>()
5438            }};
5439        }
5440
5441        let bool_vals = [Some(true), None, Some(false)];
5442        let f32_vals = [Some(-1.0), None, Some(1.0)];
5443        let f64_vals = make_typed_vec!(f32_vals, f64);
5444
5445        let i8_vals = [Some(-1), None, Some(1)];
5446        let i16_vals = make_typed_vec!(i8_vals, i16);
5447        let i32_vals = make_typed_vec!(i8_vals, i32);
5448        let i64_vals = make_typed_vec!(i8_vals, i64);
5449
5450        let u8_vals = [Some(0), None, Some(1)];
5451        let u16_vals = make_typed_vec!(u8_vals, u16);
5452        let u32_vals = make_typed_vec!(u8_vals, u32);
5453        let u64_vals = make_typed_vec!(u8_vals, u64);
5454
5455        let str_vals = [Some("foo"), None, Some("bar")];
5456
5457        let interval_dt_vals = [
5458            Some(IntervalDayTime::MINUS_ONE),
5459            None,
5460            Some(IntervalDayTime::ONE),
5461        ];
5462        let interval_mdn_vals = [
5463            Some(IntervalMonthDayNano::MINUS_ONE),
5464            None,
5465            Some(IntervalMonthDayNano::ONE),
5466        ];
5467
5468        /// Test each value in `scalar` with the corresponding element
5469        /// at `array`. Assumes each element is unique (aka not equal
5470        /// with all other indexes)
5471        #[derive(Debug)]
5472        struct TestCase {
5473            array: ArrayRef,
5474            scalars: Vec<ScalarValue>,
5475        }
5476
5477        /// Create a test case for casing the input to the specified array type
5478        macro_rules! make_test_case {
5479            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5480                TestCase {
5481                    array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
5482                    scalars: $INPUT.iter().map(|v| ScalarValue::$SCALAR_TY(*v)).collect(),
5483                }
5484            }};
5485
5486            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
5487                let tz = $TZ;
5488                TestCase {
5489                    array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
5490                    scalars: $INPUT
5491                        .iter()
5492                        .map(|v| ScalarValue::$SCALAR_TY(*v, tz.clone()))
5493                        .collect(),
5494                }
5495            }};
5496        }
5497
5498        macro_rules! make_str_test_case {
5499            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5500                TestCase {
5501                    array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
5502                    scalars: $INPUT
5503                        .iter()
5504                        .map(|v| ScalarValue::$SCALAR_TY(v.map(|v| v.to_string())))
5505                        .collect(),
5506                }
5507            }};
5508        }
5509
5510        macro_rules! make_binary_test_case {
5511            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5512                TestCase {
5513                    array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
5514                    scalars: $INPUT
5515                        .iter()
5516                        .map(|v| {
5517                            ScalarValue::$SCALAR_TY(v.map(|v| v.as_bytes().to_vec()))
5518                        })
5519                        .collect(),
5520                }
5521            }};
5522        }
5523
5524        /// create a test case for DictionaryArray<$INDEX_TY>
5525        macro_rules! make_str_dict_test_case {
5526            ($INPUT:expr, $INDEX_TY:ident) => {{
5527                TestCase {
5528                    array: Arc::new(
5529                        $INPUT
5530                            .iter()
5531                            .cloned()
5532                            .collect::<DictionaryArray<$INDEX_TY>>(),
5533                    ),
5534                    scalars: $INPUT
5535                        .iter()
5536                        .map(|v| {
5537                            ScalarValue::Dictionary(
5538                                Box::new($INDEX_TY::DATA_TYPE),
5539                                Box::new(ScalarValue::Utf8(v.map(|v| v.to_string()))),
5540                            )
5541                        })
5542                        .collect(),
5543                }
5544            }};
5545        }
5546
5547        let cases = vec![
5548            make_test_case!(bool_vals, BooleanArray, Boolean),
5549            make_test_case!(f32_vals, Float32Array, Float32),
5550            make_test_case!(f64_vals, Float64Array, Float64),
5551            make_test_case!(i8_vals, Int8Array, Int8),
5552            make_test_case!(i16_vals, Int16Array, Int16),
5553            make_test_case!(i32_vals, Int32Array, Int32),
5554            make_test_case!(i64_vals, Int64Array, Int64),
5555            make_test_case!(u8_vals, UInt8Array, UInt8),
5556            make_test_case!(u16_vals, UInt16Array, UInt16),
5557            make_test_case!(u32_vals, UInt32Array, UInt32),
5558            make_test_case!(u64_vals, UInt64Array, UInt64),
5559            make_str_test_case!(str_vals, StringArray, Utf8),
5560            make_str_test_case!(str_vals, LargeStringArray, LargeUtf8),
5561            make_binary_test_case!(str_vals, BinaryArray, Binary),
5562            make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary),
5563            make_test_case!(i32_vals, Date32Array, Date32),
5564            make_test_case!(i64_vals, Date64Array, Date64),
5565            make_test_case!(i32_vals, Time32SecondArray, Time32Second),
5566            make_test_case!(i32_vals, Time32MillisecondArray, Time32Millisecond),
5567            make_test_case!(i64_vals, Time64MicrosecondArray, Time64Microsecond),
5568            make_test_case!(i64_vals, Time64NanosecondArray, Time64Nanosecond),
5569            make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None),
5570            make_test_case!(
5571                i64_vals,
5572                TimestampSecondArray,
5573                TimestampSecond,
5574                Some("UTC".into())
5575            ),
5576            make_test_case!(
5577                i64_vals,
5578                TimestampMillisecondArray,
5579                TimestampMillisecond,
5580                None
5581            ),
5582            make_test_case!(
5583                i64_vals,
5584                TimestampMillisecondArray,
5585                TimestampMillisecond,
5586                Some("UTC".into())
5587            ),
5588            make_test_case!(
5589                i64_vals,
5590                TimestampMicrosecondArray,
5591                TimestampMicrosecond,
5592                None
5593            ),
5594            make_test_case!(
5595                i64_vals,
5596                TimestampMicrosecondArray,
5597                TimestampMicrosecond,
5598                Some("UTC".into())
5599            ),
5600            make_test_case!(
5601                i64_vals,
5602                TimestampNanosecondArray,
5603                TimestampNanosecond,
5604                None
5605            ),
5606            make_test_case!(
5607                i64_vals,
5608                TimestampNanosecondArray,
5609                TimestampNanosecond,
5610                Some("UTC".into())
5611            ),
5612            make_test_case!(i32_vals, IntervalYearMonthArray, IntervalYearMonth),
5613            make_test_case!(interval_dt_vals, IntervalDayTimeArray, IntervalDayTime),
5614            make_test_case!(
5615                interval_mdn_vals,
5616                IntervalMonthDayNanoArray,
5617                IntervalMonthDayNano
5618            ),
5619            make_str_dict_test_case!(str_vals, Int8Type),
5620            make_str_dict_test_case!(str_vals, Int16Type),
5621            make_str_dict_test_case!(str_vals, Int32Type),
5622            make_str_dict_test_case!(str_vals, Int64Type),
5623            make_str_dict_test_case!(str_vals, UInt8Type),
5624            make_str_dict_test_case!(str_vals, UInt16Type),
5625            make_str_dict_test_case!(str_vals, UInt32Type),
5626            make_str_dict_test_case!(str_vals, UInt64Type),
5627        ];
5628
5629        for case in cases {
5630            println!("**** Test Case *****");
5631            let TestCase { array, scalars } = case;
5632            println!("Input array type: {}", array.data_type());
5633            println!("Input scalars: {scalars:#?}");
5634            assert_eq!(array.len(), scalars.len());
5635
5636            for (index, scalar) in scalars.into_iter().enumerate() {
5637                assert!(
5638                    scalar
5639                        .eq_array(&array, index)
5640                        .expect("Failed to compare arrays"),
5641                    "Expected {scalar:?} to be equal to {array:?} at index {index}"
5642                );
5643
5644                // test that all other elements are *not* equal
5645                for other_index in 0..array.len() {
5646                    if index != other_index {
5647                        assert!(
5648                            !scalar.eq_array(&array, other_index).expect("Failed to compare arrays"),
5649                            "Expected {scalar:?} to be NOT equal to {array:?} at index {other_index}"
5650                        );
5651                    }
5652                }
5653            }
5654        }
5655    }
5656
5657    #[test]
5658    fn scalar_partial_ordering() {
5659        use ScalarValue::*;
5660
5661        assert_eq!(
5662            Int64(Some(33)).partial_cmp(&Int64(Some(0))),
5663            Some(Ordering::Greater)
5664        );
5665        assert_eq!(
5666            Int64(Some(0)).partial_cmp(&Int64(Some(33))),
5667            Some(Ordering::Less)
5668        );
5669        assert_eq!(
5670            Int64(Some(33)).partial_cmp(&Int64(Some(33))),
5671            Some(Ordering::Equal)
5672        );
5673        // For different data type, `partial_cmp` returns None.
5674        assert_eq!(Int64(Some(33)).partial_cmp(&Int32(Some(33))), None);
5675        assert_eq!(Int32(Some(33)).partial_cmp(&Int64(Some(33))), None);
5676
5677        assert_eq!(
5678            ScalarValue::from(vec![
5679                ("A", ScalarValue::from(1.0)),
5680                ("B", ScalarValue::from("Z")),
5681            ])
5682            .partial_cmp(&ScalarValue::from(vec![
5683                ("A", ScalarValue::from(2.0)),
5684                ("B", ScalarValue::from("A")),
5685            ])),
5686            Some(Ordering::Less)
5687        );
5688
5689        // For different struct fields, `partial_cmp` returns None.
5690        assert_eq!(
5691            ScalarValue::from(vec![
5692                ("A", ScalarValue::from(1.0)),
5693                ("B", ScalarValue::from("Z")),
5694            ])
5695            .partial_cmp(&ScalarValue::from(vec![
5696                ("a", ScalarValue::from(2.0)),
5697                ("b", ScalarValue::from("A")),
5698            ])),
5699            None
5700        );
5701    }
5702
5703    #[test]
5704    fn test_scalar_value_from_string() {
5705        let scalar = ScalarValue::from("foo");
5706        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5707        let scalar = ScalarValue::from("foo".to_string());
5708        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5709        let scalar = ScalarValue::from_str("foo").unwrap();
5710        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5711    }
5712
5713    #[test]
5714    fn test_scalar_struct() {
5715        let field_a = Arc::new(Field::new("A", DataType::Int32, false));
5716        let field_b = Arc::new(Field::new("B", DataType::Boolean, false));
5717        let field_c = Arc::new(Field::new("C", DataType::Utf8, false));
5718
5719        let field_e = Arc::new(Field::new("e", DataType::Int16, false));
5720        let field_f = Arc::new(Field::new("f", DataType::Int64, false));
5721        let field_d = Arc::new(Field::new(
5722            "D",
5723            DataType::Struct(vec![Arc::clone(&field_e), Arc::clone(&field_f)].into()),
5724            false,
5725        ));
5726
5727        let struct_array = StructArray::from(vec![
5728            (
5729                Arc::clone(&field_e),
5730                Arc::new(Int16Array::from(vec![2])) as ArrayRef,
5731            ),
5732            (
5733                Arc::clone(&field_f),
5734                Arc::new(Int64Array::from(vec![3])) as ArrayRef,
5735            ),
5736        ]);
5737
5738        let struct_array = StructArray::from(vec![
5739            (
5740                Arc::clone(&field_a),
5741                Arc::new(Int32Array::from(vec![23])) as ArrayRef,
5742            ),
5743            (
5744                Arc::clone(&field_b),
5745                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
5746            ),
5747            (
5748                Arc::clone(&field_c),
5749                Arc::new(StringArray::from(vec!["Hello"])) as ArrayRef,
5750            ),
5751            (Arc::clone(&field_d), Arc::new(struct_array) as ArrayRef),
5752        ]);
5753        let scalar = ScalarValue::Struct(Arc::new(struct_array));
5754
5755        let array = scalar
5756            .to_array_of_size(2)
5757            .expect("Failed to convert to array of size");
5758
5759        let expected = Arc::new(StructArray::from(vec![
5760            (
5761                Arc::clone(&field_a),
5762                Arc::new(Int32Array::from(vec![23, 23])) as ArrayRef,
5763            ),
5764            (
5765                Arc::clone(&field_b),
5766                Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
5767            ),
5768            (
5769                Arc::clone(&field_c),
5770                Arc::new(StringArray::from(vec!["Hello", "Hello"])) as ArrayRef,
5771            ),
5772            (
5773                Arc::clone(&field_d),
5774                Arc::new(StructArray::from(vec![
5775                    (
5776                        Arc::clone(&field_e),
5777                        Arc::new(Int16Array::from(vec![2, 2])) as ArrayRef,
5778                    ),
5779                    (
5780                        Arc::clone(&field_f),
5781                        Arc::new(Int64Array::from(vec![3, 3])) as ArrayRef,
5782                    ),
5783                ])) as ArrayRef,
5784            ),
5785        ])) as ArrayRef;
5786
5787        assert_eq!(&array, &expected);
5788
5789        // Construct from second element of ArrayRef
5790        let constructed = ScalarValue::try_from_array(&expected, 1).unwrap();
5791        assert_eq!(constructed, scalar);
5792
5793        // None version
5794        let none_scalar = ScalarValue::try_from(array.data_type()).unwrap();
5795        assert!(none_scalar.is_null());
5796        assert_eq!(
5797            format!("{none_scalar:?}"),
5798            String::from("Struct({A:,B:,C:,D:})")
5799        );
5800
5801        // Construct with convenience From<Vec<(&str, ScalarValue)>>
5802        let constructed = ScalarValue::from(vec![
5803            ("A", ScalarValue::from(23)),
5804            ("B", ScalarValue::from(false)),
5805            ("C", ScalarValue::from("Hello")),
5806            (
5807                "D",
5808                ScalarValue::from(vec![
5809                    ("e", ScalarValue::from(2i16)),
5810                    ("f", ScalarValue::from(3i64)),
5811                ]),
5812            ),
5813        ]);
5814        assert_eq!(constructed, scalar);
5815
5816        // Build Array from Vec of structs
5817        let scalars = vec![
5818            ScalarValue::from(vec![
5819                ("A", ScalarValue::from(23)),
5820                ("B", ScalarValue::from(false)),
5821                ("C", ScalarValue::from("Hello")),
5822                (
5823                    "D",
5824                    ScalarValue::from(vec![
5825                        ("e", ScalarValue::from(2i16)),
5826                        ("f", ScalarValue::from(3i64)),
5827                    ]),
5828                ),
5829            ]),
5830            ScalarValue::from(vec![
5831                ("A", ScalarValue::from(7)),
5832                ("B", ScalarValue::from(true)),
5833                ("C", ScalarValue::from("World")),
5834                (
5835                    "D",
5836                    ScalarValue::from(vec![
5837                        ("e", ScalarValue::from(4i16)),
5838                        ("f", ScalarValue::from(5i64)),
5839                    ]),
5840                ),
5841            ]),
5842            ScalarValue::from(vec![
5843                ("A", ScalarValue::from(-1000)),
5844                ("B", ScalarValue::from(true)),
5845                ("C", ScalarValue::from("!!!!!")),
5846                (
5847                    "D",
5848                    ScalarValue::from(vec![
5849                        ("e", ScalarValue::from(6i16)),
5850                        ("f", ScalarValue::from(7i64)),
5851                    ]),
5852                ),
5853            ]),
5854        ];
5855        let array = ScalarValue::iter_to_array(scalars).unwrap();
5856
5857        let expected = Arc::new(StructArray::from(vec![
5858            (
5859                Arc::clone(&field_a),
5860                Arc::new(Int32Array::from(vec![23, 7, -1000])) as ArrayRef,
5861            ),
5862            (
5863                Arc::clone(&field_b),
5864                Arc::new(BooleanArray::from(vec![false, true, true])) as ArrayRef,
5865            ),
5866            (
5867                Arc::clone(&field_c),
5868                Arc::new(StringArray::from(vec!["Hello", "World", "!!!!!"])) as ArrayRef,
5869            ),
5870            (
5871                Arc::clone(&field_d),
5872                Arc::new(StructArray::from(vec![
5873                    (
5874                        Arc::clone(&field_e),
5875                        Arc::new(Int16Array::from(vec![2, 4, 6])) as ArrayRef,
5876                    ),
5877                    (
5878                        Arc::clone(&field_f),
5879                        Arc::new(Int64Array::from(vec![3, 5, 7])) as ArrayRef,
5880                    ),
5881                ])) as ArrayRef,
5882            ),
5883        ])) as ArrayRef;
5884
5885        assert_eq!(&array, &expected);
5886    }
5887
5888    #[test]
5889    fn round_trip() {
5890        // Each array type should be able to round tripped through a scalar
5891        let cases: Vec<ArrayRef> = vec![
5892            // int
5893            Arc::new(Int8Array::from(vec![Some(1), None, Some(3)])),
5894            Arc::new(Int16Array::from(vec![Some(1), None, Some(3)])),
5895            Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
5896            Arc::new(Int64Array::from(vec![Some(1), None, Some(3)])),
5897            Arc::new(UInt8Array::from(vec![Some(1), None, Some(3)])),
5898            Arc::new(UInt16Array::from(vec![Some(1), None, Some(3)])),
5899            Arc::new(UInt32Array::from(vec![Some(1), None, Some(3)])),
5900            Arc::new(UInt64Array::from(vec![Some(1), None, Some(3)])),
5901            // bool
5902            Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)])),
5903            // float
5904            Arc::new(Float32Array::from(vec![Some(1.0), None, Some(3.0)])),
5905            Arc::new(Float64Array::from(vec![Some(1.0), None, Some(3.0)])),
5906            // string array
5907            Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")])),
5908            Arc::new(LargeStringArray::from(vec![Some("foo"), None, Some("bar")])),
5909            Arc::new(StringViewArray::from(vec![Some("foo"), None, Some("bar")])),
5910            // string dictionary
5911            {
5912                let mut builder = StringDictionaryBuilder::<Int32Type>::new();
5913                builder.append("foo").unwrap();
5914                builder.append_null();
5915                builder.append("bar").unwrap();
5916                Arc::new(builder.finish())
5917            },
5918            // binary array
5919            Arc::new(BinaryArray::from_iter(vec![
5920                Some(b"foo"),
5921                None,
5922                Some(b"bar"),
5923            ])),
5924            Arc::new(LargeBinaryArray::from_iter(vec![
5925                Some(b"foo"),
5926                None,
5927                Some(b"bar"),
5928            ])),
5929            Arc::new(BinaryViewArray::from_iter(vec![
5930                Some(b"foo"),
5931                None,
5932                Some(b"bar"),
5933            ])),
5934            // timestamp
5935            Arc::new(TimestampSecondArray::from(vec![Some(1), None, Some(3)])),
5936            Arc::new(TimestampMillisecondArray::from(vec![
5937                Some(1),
5938                None,
5939                Some(3),
5940            ])),
5941            Arc::new(TimestampMicrosecondArray::from(vec![
5942                Some(1),
5943                None,
5944                Some(3),
5945            ])),
5946            Arc::new(TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])),
5947            // timestamp with timezone
5948            Arc::new(
5949                TimestampSecondArray::from(vec![Some(1), None, Some(3)])
5950                    .with_timezone_opt(Some("UTC")),
5951            ),
5952            Arc::new(
5953                TimestampMillisecondArray::from(vec![Some(1), None, Some(3)])
5954                    .with_timezone_opt(Some("UTC")),
5955            ),
5956            Arc::new(
5957                TimestampMicrosecondArray::from(vec![Some(1), None, Some(3)])
5958                    .with_timezone_opt(Some("UTC")),
5959            ),
5960            Arc::new(
5961                TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])
5962                    .with_timezone_opt(Some("UTC")),
5963            ),
5964            // date
5965            Arc::new(Date32Array::from(vec![Some(1), None, Some(3)])),
5966            Arc::new(Date64Array::from(vec![Some(1), None, Some(3)])),
5967            // time
5968            Arc::new(Time32SecondArray::from(vec![Some(1), None, Some(3)])),
5969            Arc::new(Time32MillisecondArray::from(vec![Some(1), None, Some(3)])),
5970            Arc::new(Time64MicrosecondArray::from(vec![Some(1), None, Some(3)])),
5971            Arc::new(Time64NanosecondArray::from(vec![Some(1), None, Some(3)])),
5972            // null array
5973            Arc::new(NullArray::new(3)),
5974            // dense union
5975            {
5976                let mut builder = UnionBuilder::new_dense();
5977                builder.append::<Int32Type>("a", 1).unwrap();
5978                builder.append::<Float64Type>("b", 3.4).unwrap();
5979                Arc::new(builder.build().unwrap())
5980            },
5981            // sparse union
5982            {
5983                let mut builder = UnionBuilder::new_sparse();
5984                builder.append::<Int32Type>("a", 1).unwrap();
5985                builder.append::<Float64Type>("b", 3.4).unwrap();
5986                Arc::new(builder.build().unwrap())
5987            },
5988            // list array
5989            {
5990                let values_builder = StringBuilder::new();
5991                let mut builder = ListBuilder::new(values_builder);
5992                // [A, B]
5993                builder.values().append_value("A");
5994                builder.values().append_value("B");
5995                builder.append(true);
5996                // [ ] (empty list)
5997                builder.append(true);
5998                // Null
5999                builder.values().append_value("?"); // irrelevant
6000                builder.append(false);
6001                Arc::new(builder.finish())
6002            },
6003            // large list array
6004            {
6005                let values_builder = StringBuilder::new();
6006                let mut builder = LargeListBuilder::new(values_builder);
6007                // [A, B]
6008                builder.values().append_value("A");
6009                builder.values().append_value("B");
6010                builder.append(true);
6011                // [ ] (empty list)
6012                builder.append(true);
6013                // Null
6014                builder.append(false);
6015                Arc::new(builder.finish())
6016            },
6017            // fixed size list array
6018            {
6019                let values_builder = Int32Builder::new();
6020                let mut builder = FixedSizeListBuilder::new(values_builder, 3);
6021
6022                //  [[0, 1, 2], null, [3, null, 5]
6023                builder.values().append_value(0);
6024                builder.values().append_value(1);
6025                builder.values().append_value(2);
6026                builder.append(true);
6027                builder.values().append_null();
6028                builder.values().append_null();
6029                builder.values().append_null();
6030                builder.append(false);
6031                builder.values().append_value(3);
6032                builder.values().append_null();
6033                builder.values().append_value(5);
6034                builder.append(true);
6035                Arc::new(builder.finish())
6036            },
6037            // map
6038            {
6039                let string_builder = StringBuilder::new();
6040                let int_builder = Int32Builder::with_capacity(4);
6041
6042                let mut builder = MapBuilder::new(None, string_builder, int_builder);
6043                // {"joe": 1}
6044                builder.keys().append_value("joe");
6045                builder.values().append_value(1);
6046                builder.append(true).unwrap();
6047                // {}
6048                builder.append(true).unwrap();
6049                // null
6050                builder.append(false).unwrap();
6051
6052                Arc::new(builder.finish())
6053            },
6054        ];
6055
6056        for arr in cases {
6057            round_trip_through_scalar(arr);
6058        }
6059    }
6060
6061    /// for each row in `arr`:
6062    /// 1. convert to a `ScalarValue`
6063    /// 2. Convert `ScalarValue` back to an `ArrayRef`
6064    /// 3. Compare the original array (sliced) and new array for equality
6065    fn round_trip_through_scalar(arr: ArrayRef) {
6066        for i in 0..arr.len() {
6067            // convert Scalar --> Array
6068            let scalar = ScalarValue::try_from_array(&arr, i).unwrap();
6069            let array = scalar.to_array_of_size(1).unwrap();
6070            assert_eq!(array.len(), 1);
6071            assert_eq!(array.data_type(), arr.data_type());
6072            assert_eq!(array.as_ref(), arr.slice(i, 1).as_ref());
6073        }
6074    }
6075
6076    #[test]
6077    fn test_scalar_union_sparse() {
6078        let field_a = Arc::new(Field::new("A", DataType::Int32, true));
6079        let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
6080        let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
6081        let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
6082
6083        let mut values_a = vec![None; 6];
6084        values_a[0] = Some(42);
6085        let mut values_b = vec![None; 6];
6086        values_b[1] = Some(true);
6087        let mut values_c = vec![None; 6];
6088        values_c[2] = Some("foo");
6089        let children: Vec<ArrayRef> = vec![
6090            Arc::new(Int32Array::from(values_a)),
6091            Arc::new(BooleanArray::from(values_b)),
6092            Arc::new(StringArray::from(values_c)),
6093        ];
6094
6095        let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
6096        let array: ArrayRef = Arc::new(
6097            UnionArray::try_new(fields.clone(), type_ids, None, children)
6098                .expect("UnionArray"),
6099        );
6100
6101        let expected = [
6102            (0, ScalarValue::from(42)),
6103            (1, ScalarValue::from(true)),
6104            (2, ScalarValue::from("foo")),
6105            (0, ScalarValue::Int32(None)),
6106            (1, ScalarValue::Boolean(None)),
6107            (2, ScalarValue::Utf8(None)),
6108        ];
6109
6110        for (i, (ti, value)) in expected.into_iter().enumerate() {
6111            let is_null = value.is_null();
6112            let value = Some((ti, Box::new(value)));
6113            let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Sparse);
6114            let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
6115
6116            assert_eq!(
6117                actual, expected,
6118                "[{i}] {actual} was not equal to {expected}"
6119            );
6120
6121            assert!(
6122                expected.eq_array(&array, i).expect("eq_array"),
6123                "[{i}] {expected}.eq_array was false"
6124            );
6125
6126            if is_null {
6127                assert!(actual.is_null(), "[{i}] {actual} was not null")
6128            }
6129        }
6130    }
6131
6132    #[test]
6133    fn test_scalar_union_dense() {
6134        let field_a = Arc::new(Field::new("A", DataType::Int32, true));
6135        let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
6136        let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
6137        let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
6138        let children: Vec<ArrayRef> = vec![
6139            Arc::new(Int32Array::from(vec![Some(42), None])),
6140            Arc::new(BooleanArray::from(vec![Some(true), None])),
6141            Arc::new(StringArray::from(vec![Some("foo"), None])),
6142        ];
6143
6144        let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
6145        let offsets = ScalarBuffer::from(vec![0, 0, 0, 1, 1, 1]);
6146        let array: ArrayRef = Arc::new(
6147            UnionArray::try_new(fields.clone(), type_ids, Some(offsets), children)
6148                .expect("UnionArray"),
6149        );
6150
6151        let expected = [
6152            (0, ScalarValue::from(42)),
6153            (1, ScalarValue::from(true)),
6154            (2, ScalarValue::from("foo")),
6155            (0, ScalarValue::Int32(None)),
6156            (1, ScalarValue::Boolean(None)),
6157            (2, ScalarValue::Utf8(None)),
6158        ];
6159
6160        for (i, (ti, value)) in expected.into_iter().enumerate() {
6161            let is_null = value.is_null();
6162            let value = Some((ti, Box::new(value)));
6163            let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Dense);
6164            let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
6165
6166            assert_eq!(
6167                actual, expected,
6168                "[{i}] {actual} was not equal to {expected}"
6169            );
6170
6171            assert!(
6172                expected.eq_array(&array, i).expect("eq_array"),
6173                "[{i}] {expected}.eq_array was false"
6174            );
6175
6176            if is_null {
6177                assert!(actual.is_null(), "[{i}] {actual} was not null")
6178            }
6179        }
6180    }
6181
6182    #[test]
6183    fn test_lists_in_struct() {
6184        let field_a = Arc::new(Field::new("A", DataType::Utf8, false));
6185        let field_primitive_list = Arc::new(Field::new(
6186            "primitive_list",
6187            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6188            false,
6189        ));
6190
6191        // Define primitive list scalars
6192        let l0 =
6193            ScalarValue::List(Arc::new(
6194                ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
6195                    Some(1),
6196                    Some(2),
6197                    Some(3),
6198                ])]),
6199            ));
6200        let l1 =
6201            ScalarValue::List(Arc::new(
6202                ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
6203                    Some(4),
6204                    Some(5),
6205                ])]),
6206            ));
6207        let l2 = ScalarValue::List(Arc::new(ListArray::from_iter_primitive::<
6208            Int32Type,
6209            _,
6210            _,
6211        >(vec![Some(vec![Some(6)])])));
6212
6213        // Define struct scalars
6214        let s0 = ScalarValue::from(vec![
6215            ("A", ScalarValue::from("First")),
6216            ("primitive_list", l0),
6217        ]);
6218
6219        let s1 = ScalarValue::from(vec![
6220            ("A", ScalarValue::from("Second")),
6221            ("primitive_list", l1),
6222        ]);
6223
6224        let s2 = ScalarValue::from(vec![
6225            ("A", ScalarValue::from("Third")),
6226            ("primitive_list", l2),
6227        ]);
6228
6229        // iter_to_array for struct scalars
6230        let array =
6231            ScalarValue::iter_to_array(vec![s0.clone(), s1.clone(), s2.clone()]).unwrap();
6232
6233        let array = as_struct_array(&array).unwrap();
6234        let expected = StructArray::from(vec![
6235            (
6236                Arc::clone(&field_a),
6237                Arc::new(StringArray::from(vec!["First", "Second", "Third"])) as ArrayRef,
6238            ),
6239            (
6240                Arc::clone(&field_primitive_list),
6241                Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
6242                    Some(vec![Some(1), Some(2), Some(3)]),
6243                    Some(vec![Some(4), Some(5)]),
6244                    Some(vec![Some(6)]),
6245                ])),
6246            ),
6247        ]);
6248
6249        assert_eq!(array, &expected);
6250
6251        // Define list-of-structs scalars
6252
6253        let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap();
6254        let nl0 = SingleRowListArrayBuilder::new(nl0_array).build_list_scalar();
6255
6256        let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap();
6257        let nl1 = SingleRowListArrayBuilder::new(nl1_array).build_list_scalar();
6258
6259        let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap();
6260        let nl2 = SingleRowListArrayBuilder::new(nl2_array).build_list_scalar();
6261
6262        // iter_to_array for list-of-struct
6263        let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap();
6264        let array = array.as_list::<i32>();
6265
6266        // Construct expected array with array builders
6267        let field_a_builder = StringBuilder::with_capacity(4, 1024);
6268        let primitive_value_builder = Int32Array::builder(8);
6269        let field_primitive_list_builder = ListBuilder::new(primitive_value_builder);
6270
6271        let element_builder = StructBuilder::new(
6272            vec![field_a, field_primitive_list],
6273            vec![
6274                Box::new(field_a_builder),
6275                Box::new(field_primitive_list_builder),
6276            ],
6277        );
6278
6279        let mut list_builder = ListBuilder::new(element_builder);
6280
6281        list_builder
6282            .values()
6283            .field_builder::<StringBuilder>(0)
6284            .unwrap()
6285            .append_value("First");
6286        list_builder
6287            .values()
6288            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6289            .unwrap()
6290            .values()
6291            .append_value(1);
6292        list_builder
6293            .values()
6294            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6295            .unwrap()
6296            .values()
6297            .append_value(2);
6298        list_builder
6299            .values()
6300            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6301            .unwrap()
6302            .values()
6303            .append_value(3);
6304        list_builder
6305            .values()
6306            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6307            .unwrap()
6308            .append(true);
6309        list_builder.values().append(true);
6310
6311        list_builder
6312            .values()
6313            .field_builder::<StringBuilder>(0)
6314            .unwrap()
6315            .append_value("Second");
6316        list_builder
6317            .values()
6318            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6319            .unwrap()
6320            .values()
6321            .append_value(4);
6322        list_builder
6323            .values()
6324            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6325            .unwrap()
6326            .values()
6327            .append_value(5);
6328        list_builder
6329            .values()
6330            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6331            .unwrap()
6332            .append(true);
6333        list_builder.values().append(true);
6334        list_builder.append(true);
6335
6336        list_builder
6337            .values()
6338            .field_builder::<StringBuilder>(0)
6339            .unwrap()
6340            .append_value("Third");
6341        list_builder
6342            .values()
6343            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6344            .unwrap()
6345            .values()
6346            .append_value(6);
6347        list_builder
6348            .values()
6349            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6350            .unwrap()
6351            .append(true);
6352        list_builder.values().append(true);
6353        list_builder.append(true);
6354
6355        list_builder
6356            .values()
6357            .field_builder::<StringBuilder>(0)
6358            .unwrap()
6359            .append_value("Second");
6360        list_builder
6361            .values()
6362            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6363            .unwrap()
6364            .values()
6365            .append_value(4);
6366        list_builder
6367            .values()
6368            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6369            .unwrap()
6370            .values()
6371            .append_value(5);
6372        list_builder
6373            .values()
6374            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6375            .unwrap()
6376            .append(true);
6377        list_builder.values().append(true);
6378        list_builder.append(true);
6379
6380        let expected = list_builder.finish();
6381
6382        assert_eq!(array, &expected);
6383    }
6384
6385    fn build_2d_list(data: Vec<Option<i32>>) -> ListArray {
6386        let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(data)]);
6387        ListArray::new(
6388            Arc::new(Field::new_list_field(
6389                DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6390                true,
6391            )),
6392            OffsetBuffer::<i32>::from_lengths([1]),
6393            Arc::new(a1),
6394            None,
6395        )
6396    }
6397
6398    #[test]
6399    fn test_nested_lists() {
6400        // Define inner list scalars
6401        let arr1 = build_2d_list(vec![Some(1), Some(2), Some(3)]);
6402        let arr2 = build_2d_list(vec![Some(4), Some(5)]);
6403        let arr3 = build_2d_list(vec![Some(6)]);
6404
6405        let array = ScalarValue::iter_to_array(vec![
6406            ScalarValue::List(Arc::new(arr1)),
6407            ScalarValue::List(Arc::new(arr2)),
6408            ScalarValue::List(Arc::new(arr3)),
6409        ])
6410        .unwrap();
6411        let array = array.as_list::<i32>();
6412
6413        // Construct expected array with array builders
6414        let inner_builder = Int32Array::builder(6);
6415        let middle_builder = ListBuilder::new(inner_builder);
6416        let mut outer_builder = ListBuilder::new(middle_builder);
6417
6418        outer_builder.values().values().append_value(1);
6419        outer_builder.values().values().append_value(2);
6420        outer_builder.values().values().append_value(3);
6421        outer_builder.values().append(true);
6422        outer_builder.append(true);
6423
6424        outer_builder.values().values().append_value(4);
6425        outer_builder.values().values().append_value(5);
6426        outer_builder.values().append(true);
6427        outer_builder.append(true);
6428
6429        outer_builder.values().values().append_value(6);
6430        outer_builder.values().append(true);
6431        outer_builder.append(true);
6432
6433        let expected = outer_builder.finish();
6434
6435        assert_eq!(array, &expected);
6436    }
6437
6438    #[test]
6439    fn scalar_timestamp_ns_utc_timezone() {
6440        let scalar = ScalarValue::TimestampNanosecond(
6441            Some(1599566400000000000),
6442            Some("UTC".into()),
6443        );
6444
6445        assert_eq!(
6446            scalar.data_type(),
6447            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6448        );
6449
6450        let array = scalar.to_array().expect("Failed to convert to array");
6451        assert_eq!(array.len(), 1);
6452        assert_eq!(
6453            array.data_type(),
6454            &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6455        );
6456
6457        let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
6458        assert_eq!(
6459            new_scalar.data_type(),
6460            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6461        );
6462    }
6463
6464    #[test]
6465    fn cast_round_trip() {
6466        check_scalar_cast(ScalarValue::Int8(Some(5)), DataType::Int16);
6467        check_scalar_cast(ScalarValue::Int8(None), DataType::Int16);
6468
6469        check_scalar_cast(ScalarValue::Float64(Some(5.5)), DataType::Int16);
6470
6471        check_scalar_cast(ScalarValue::Float64(None), DataType::Int16);
6472
6473        check_scalar_cast(
6474            ScalarValue::from("foo"),
6475            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
6476        );
6477
6478        check_scalar_cast(
6479            ScalarValue::Utf8(None),
6480            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
6481        );
6482
6483        check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
6484        check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
6485        check_scalar_cast(
6486            ScalarValue::from("larger than 12 bytes string"),
6487            DataType::Utf8View,
6488        );
6489        check_scalar_cast(
6490            {
6491                let element_field =
6492                    Arc::new(Field::new("element", DataType::Int32, true));
6493
6494                let mut builder =
6495                    ListBuilder::new(Int32Builder::new()).with_field(element_field);
6496                builder.append_value([Some(1)]);
6497                builder.append(true);
6498
6499                ScalarValue::List(Arc::new(builder.finish()))
6500            },
6501            DataType::List(Arc::new(Field::new("element", DataType::Int64, true))),
6502        );
6503        check_scalar_cast(
6504            {
6505                let element_field =
6506                    Arc::new(Field::new("element", DataType::Int32, true));
6507
6508                let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 1)
6509                    .with_field(element_field);
6510                builder.values().append_value(1);
6511                builder.append(true);
6512
6513                ScalarValue::FixedSizeList(Arc::new(builder.finish()))
6514            },
6515            DataType::FixedSizeList(
6516                Arc::new(Field::new("element", DataType::Int64, true)),
6517                1,
6518            ),
6519        );
6520        check_scalar_cast(
6521            {
6522                let element_field =
6523                    Arc::new(Field::new("element", DataType::Int32, true));
6524
6525                let mut builder =
6526                    LargeListBuilder::new(Int32Builder::new()).with_field(element_field);
6527                builder.append_value([Some(1)]);
6528                builder.append(true);
6529
6530                ScalarValue::LargeList(Arc::new(builder.finish()))
6531            },
6532            DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))),
6533        );
6534    }
6535
6536    // mimics how casting work on scalar values by `casting` `scalar` to `desired_type`
6537    fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) {
6538        // convert from scalar --> Array to call cast
6539        let scalar_array = scalar.to_array().expect("Failed to convert to array");
6540        // cast the actual value
6541        let cast_array = kernels::cast::cast(&scalar_array, &desired_type).unwrap();
6542
6543        // turn it back to a scalar
6544        let cast_scalar = ScalarValue::try_from_array(&cast_array, 0).unwrap();
6545        assert_eq!(cast_scalar.data_type(), desired_type);
6546
6547        // Some time later the "cast" scalar is turned back into an array:
6548        let array = cast_scalar
6549            .to_array_of_size(10)
6550            .expect("Failed to convert to array of size");
6551
6552        // The datatype should be "Dictionary" but is actually Utf8!!!
6553        assert_eq!(array.data_type(), &desired_type)
6554    }
6555
6556    #[test]
6557    fn test_scalar_negative() -> Result<()> {
6558        // positive test
6559        let value = ScalarValue::Int32(Some(12));
6560        assert_eq!(ScalarValue::Int32(Some(-12)), value.arithmetic_negate()?);
6561        let value = ScalarValue::Int32(None);
6562        assert_eq!(ScalarValue::Int32(None), value.arithmetic_negate()?);
6563
6564        // negative test
6565        let value = ScalarValue::UInt8(Some(12));
6566        assert!(value.arithmetic_negate().is_err());
6567        let value = ScalarValue::Boolean(None);
6568        assert!(value.arithmetic_negate().is_err());
6569        Ok(())
6570    }
6571
6572    #[test]
6573    #[allow(arithmetic_overflow)] // we want to test them
6574    fn test_scalar_negative_overflows() -> Result<()> {
6575        macro_rules! test_overflow_on_value {
6576            ($($val:expr),* $(,)?) => {$(
6577                {
6578                    let value: ScalarValue = $val;
6579                    let err = value.arithmetic_negate().expect_err("Should receive overflow error on negating {value:?}");
6580                    let root_err = err.find_root();
6581                    match  root_err{
6582                        DataFusionError::ArrowError(
6583                            ArrowError::ArithmeticOverflow(_),
6584                            _,
6585                        ) => {}
6586                        _ => return Err(err),
6587                    };
6588                }
6589            )*};
6590        }
6591        test_overflow_on_value!(
6592            // the integers
6593            i8::MIN.into(),
6594            i16::MIN.into(),
6595            i32::MIN.into(),
6596            i64::MIN.into(),
6597            // for decimals, only value needs to be tested
6598            ScalarValue::try_new_decimal128(i128::MIN, 10, 5)?,
6599            ScalarValue::Decimal256(Some(i256::MIN), 20, 5),
6600            // interval, check all possible values
6601            ScalarValue::IntervalYearMonth(Some(i32::MIN)),
6602            ScalarValue::new_interval_dt(i32::MIN, 999),
6603            ScalarValue::new_interval_dt(1, i32::MIN),
6604            ScalarValue::new_interval_mdn(i32::MIN, 15, 123_456),
6605            ScalarValue::new_interval_mdn(12, i32::MIN, 123_456),
6606            ScalarValue::new_interval_mdn(12, 15, i64::MIN),
6607            // tz doesn't matter when negating
6608            ScalarValue::TimestampSecond(Some(i64::MIN), None),
6609            ScalarValue::TimestampMillisecond(Some(i64::MIN), None),
6610            ScalarValue::TimestampMicrosecond(Some(i64::MIN), None),
6611            ScalarValue::TimestampNanosecond(Some(i64::MIN), None),
6612        );
6613
6614        let float_cases = [
6615            (
6616                ScalarValue::Float16(Some(f16::MIN)),
6617                ScalarValue::Float16(Some(f16::MAX)),
6618            ),
6619            (
6620                ScalarValue::Float16(Some(f16::MAX)),
6621                ScalarValue::Float16(Some(f16::MIN)),
6622            ),
6623            (f32::MIN.into(), f32::MAX.into()),
6624            (f32::MAX.into(), f32::MIN.into()),
6625            (f64::MIN.into(), f64::MAX.into()),
6626            (f64::MAX.into(), f64::MIN.into()),
6627        ];
6628        // skip float 16 because they aren't supported
6629        for (test, expected) in float_cases.into_iter().skip(2) {
6630            assert_eq!(test.arithmetic_negate()?, expected);
6631        }
6632        Ok(())
6633    }
6634
6635    #[test]
6636    fn f16_test_overflow() {
6637        // TODO: if negate supports f16, add these cases to `test_scalar_negative_overflows` test case
6638        let cases = [
6639            (
6640                ScalarValue::Float16(Some(f16::MIN)),
6641                ScalarValue::Float16(Some(f16::MAX)),
6642            ),
6643            (
6644                ScalarValue::Float16(Some(f16::MAX)),
6645                ScalarValue::Float16(Some(f16::MIN)),
6646            ),
6647        ];
6648
6649        for (test, expected) in cases {
6650            assert_eq!(test.arithmetic_negate().unwrap(), expected);
6651        }
6652    }
6653
6654    macro_rules! expect_operation_error {
6655        ($TEST_NAME:ident, $FUNCTION:ident, $EXPECTED_ERROR:expr) => {
6656            #[test]
6657            fn $TEST_NAME() {
6658                let lhs = ScalarValue::UInt64(Some(12));
6659                let rhs = ScalarValue::Int32(Some(-3));
6660                match lhs.$FUNCTION(&rhs) {
6661                    Ok(_result) => {
6662                        panic!(
6663                            "Expected binary operation error between lhs: '{:?}', rhs: {:?}",
6664                            lhs, rhs
6665                        );
6666                    }
6667                    Err(e) => {
6668                        let error_message = e.to_string();
6669                        assert!(
6670                            error_message.contains($EXPECTED_ERROR),
6671                            "Expected error '{}' not found in actual error '{}'",
6672                            $EXPECTED_ERROR,
6673                            error_message
6674                        );
6675                    }
6676                }
6677            }
6678        };
6679    }
6680
6681    expect_operation_error!(
6682        expect_add_error,
6683        add,
6684        "Invalid arithmetic operation: UInt64 + Int32"
6685    );
6686    expect_operation_error!(
6687        expect_sub_error,
6688        sub,
6689        "Invalid arithmetic operation: UInt64 - Int32"
6690    );
6691
6692    macro_rules! decimal_op_test_cases {
6693    ($OPERATION:ident, [$([$L_VALUE:expr, $L_PRECISION:expr, $L_SCALE:expr, $R_VALUE:expr, $R_PRECISION:expr, $R_SCALE:expr, $O_VALUE:expr, $O_PRECISION:expr, $O_SCALE:expr]),+]) => {
6694            $(
6695
6696                let left = ScalarValue::Decimal128($L_VALUE, $L_PRECISION, $L_SCALE);
6697                let right = ScalarValue::Decimal128($R_VALUE, $R_PRECISION, $R_SCALE);
6698                let result = left.$OPERATION(&right).unwrap();
6699                assert_eq!(ScalarValue::Decimal128($O_VALUE, $O_PRECISION, $O_SCALE), result);
6700
6701            )+
6702        };
6703    }
6704
6705    #[test]
6706    fn decimal_operations() {
6707        decimal_op_test_cases!(
6708            add,
6709            [
6710                [Some(123), 10, 2, Some(124), 10, 2, Some(123 + 124), 11, 2],
6711                // test sum decimal with diff scale
6712                [
6713                    Some(123),
6714                    10,
6715                    3,
6716                    Some(124),
6717                    10,
6718                    2,
6719                    Some(123 + 124 * 10_i128.pow(1)),
6720                    12,
6721                    3
6722                ],
6723                // diff precision and scale for decimal data type
6724                [
6725                    Some(123),
6726                    10,
6727                    2,
6728                    Some(124),
6729                    11,
6730                    3,
6731                    Some(123 * 10_i128.pow(3 - 2) + 124),
6732                    12,
6733                    3
6734                ]
6735            ]
6736        );
6737    }
6738
6739    #[test]
6740    fn decimal_operations_with_nulls() {
6741        decimal_op_test_cases!(
6742            add,
6743            [
6744                // Case: (None, Some, 0)
6745                [None, 10, 2, Some(123), 10, 2, None, 11, 2],
6746                // Case: (Some, None, 0)
6747                [Some(123), 10, 2, None, 10, 2, None, 11, 2],
6748                // Case: (Some, None, _) + Side=False
6749                [Some(123), 8, 2, None, 10, 3, None, 11, 3],
6750                // Case: (None, Some, _) + Side=False
6751                [None, 8, 2, Some(123), 10, 3, None, 11, 3],
6752                // Case: (Some, None, _) + Side=True
6753                [Some(123), 8, 4, None, 10, 3, None, 12, 4],
6754                // Case: (None, Some, _) + Side=True
6755                [None, 10, 3, Some(123), 8, 4, None, 12, 4]
6756            ]
6757        );
6758    }
6759
6760    #[test]
6761    fn test_scalar_distance() {
6762        let cases = [
6763            // scalar (lhs), scalar (rhs), expected distance
6764            // ---------------------------------------------
6765            (ScalarValue::Int8(Some(1)), ScalarValue::Int8(Some(2)), 1),
6766            (ScalarValue::Int8(Some(2)), ScalarValue::Int8(Some(1)), 1),
6767            (
6768                ScalarValue::Int16(Some(-5)),
6769                ScalarValue::Int16(Some(5)),
6770                10,
6771            ),
6772            (
6773                ScalarValue::Int16(Some(5)),
6774                ScalarValue::Int16(Some(-5)),
6775                10,
6776            ),
6777            (ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(0)), 0),
6778            (
6779                ScalarValue::Int32(Some(-5)),
6780                ScalarValue::Int32(Some(-10)),
6781                5,
6782            ),
6783            (
6784                ScalarValue::Int64(Some(-10)),
6785                ScalarValue::Int64(Some(-5)),
6786                5,
6787            ),
6788            (ScalarValue::UInt8(Some(1)), ScalarValue::UInt8(Some(2)), 1),
6789            (ScalarValue::UInt8(Some(0)), ScalarValue::UInt8(Some(0)), 0),
6790            (
6791                ScalarValue::UInt16(Some(5)),
6792                ScalarValue::UInt16(Some(10)),
6793                5,
6794            ),
6795            (
6796                ScalarValue::UInt32(Some(10)),
6797                ScalarValue::UInt32(Some(5)),
6798                5,
6799            ),
6800            (
6801                ScalarValue::UInt64(Some(5)),
6802                ScalarValue::UInt64(Some(10)),
6803                5,
6804            ),
6805            (
6806                ScalarValue::Float16(Some(f16::from_f32(1.1))),
6807                ScalarValue::Float16(Some(f16::from_f32(1.9))),
6808                1,
6809            ),
6810            (
6811                ScalarValue::Float16(Some(f16::from_f32(-5.3))),
6812                ScalarValue::Float16(Some(f16::from_f32(-9.2))),
6813                4,
6814            ),
6815            (
6816                ScalarValue::Float16(Some(f16::from_f32(-5.3))),
6817                ScalarValue::Float16(Some(f16::from_f32(-9.7))),
6818                4,
6819            ),
6820            (
6821                ScalarValue::Float32(Some(1.0)),
6822                ScalarValue::Float32(Some(2.0)),
6823                1,
6824            ),
6825            (
6826                ScalarValue::Float32(Some(2.0)),
6827                ScalarValue::Float32(Some(1.0)),
6828                1,
6829            ),
6830            (
6831                ScalarValue::Float64(Some(0.0)),
6832                ScalarValue::Float64(Some(0.0)),
6833                0,
6834            ),
6835            (
6836                ScalarValue::Float64(Some(-5.0)),
6837                ScalarValue::Float64(Some(-10.0)),
6838                5,
6839            ),
6840            (
6841                ScalarValue::Float64(Some(-10.0)),
6842                ScalarValue::Float64(Some(-5.0)),
6843                5,
6844            ),
6845            // Floats are currently special cased to f64/f32 and the result is rounded
6846            // rather than ceiled/floored. In the future we might want to take a mode
6847            // which specified the rounding behavior.
6848            (
6849                ScalarValue::Float32(Some(1.2)),
6850                ScalarValue::Float32(Some(1.3)),
6851                0,
6852            ),
6853            (
6854                ScalarValue::Float32(Some(1.1)),
6855                ScalarValue::Float32(Some(1.9)),
6856                1,
6857            ),
6858            (
6859                ScalarValue::Float64(Some(-5.3)),
6860                ScalarValue::Float64(Some(-9.2)),
6861                4,
6862            ),
6863            (
6864                ScalarValue::Float64(Some(-5.3)),
6865                ScalarValue::Float64(Some(-9.7)),
6866                4,
6867            ),
6868            (
6869                ScalarValue::Float64(Some(-5.3)),
6870                ScalarValue::Float64(Some(-9.9)),
6871                5,
6872            ),
6873        ];
6874        for (lhs, rhs, expected) in cases.iter() {
6875            let distance = lhs.distance(rhs).unwrap();
6876            assert_eq!(distance, *expected);
6877        }
6878    }
6879
6880    #[test]
6881    fn test_scalar_distance_invalid() {
6882        let cases = [
6883            // scalar (lhs), scalar (rhs)
6884            // --------------------------
6885            // Same type but with nulls
6886            (ScalarValue::Int8(None), ScalarValue::Int8(None)),
6887            (ScalarValue::Int8(None), ScalarValue::Int8(Some(1))),
6888            (ScalarValue::Int8(Some(1)), ScalarValue::Int8(None)),
6889            // Different type
6890            (ScalarValue::Int8(Some(1)), ScalarValue::Int16(Some(1))),
6891            (ScalarValue::Int8(Some(1)), ScalarValue::Float32(Some(1.0))),
6892            (
6893                ScalarValue::Float16(Some(f16::from_f32(1.0))),
6894                ScalarValue::Float32(Some(1.0)),
6895            ),
6896            (
6897                ScalarValue::Float16(Some(f16::from_f32(1.0))),
6898                ScalarValue::Int32(Some(1)),
6899            ),
6900            (
6901                ScalarValue::Float64(Some(1.1)),
6902                ScalarValue::Float32(Some(2.2)),
6903            ),
6904            (
6905                ScalarValue::UInt64(Some(777)),
6906                ScalarValue::Int32(Some(111)),
6907            ),
6908            // Different types with nulls
6909            (ScalarValue::Int8(None), ScalarValue::Int16(Some(1))),
6910            (ScalarValue::Int8(Some(1)), ScalarValue::Int16(None)),
6911            // Unsupported types
6912            (ScalarValue::from("foo"), ScalarValue::from("bar")),
6913            (
6914                ScalarValue::Boolean(Some(true)),
6915                ScalarValue::Boolean(Some(false)),
6916            ),
6917            (ScalarValue::Date32(Some(0)), ScalarValue::Date32(Some(1))),
6918            (ScalarValue::Date64(Some(0)), ScalarValue::Date64(Some(1))),
6919            (
6920                ScalarValue::Decimal128(Some(123), 5, 5),
6921                ScalarValue::Decimal128(Some(120), 5, 5),
6922            ),
6923        ];
6924        for (lhs, rhs) in cases {
6925            let distance = lhs.distance(&rhs);
6926            assert!(distance.is_none());
6927        }
6928    }
6929
6930    #[test]
6931    fn test_scalar_interval_negate() {
6932        let cases = [
6933            (
6934                ScalarValue::new_interval_ym(1, 12),
6935                ScalarValue::new_interval_ym(-1, -12),
6936            ),
6937            (
6938                ScalarValue::new_interval_dt(1, 999),
6939                ScalarValue::new_interval_dt(-1, -999),
6940            ),
6941            (
6942                ScalarValue::new_interval_mdn(12, 15, 123_456),
6943                ScalarValue::new_interval_mdn(-12, -15, -123_456),
6944            ),
6945        ];
6946        for (expr, expected) in cases.iter() {
6947            let result = expr.arithmetic_negate().unwrap();
6948            assert_eq!(*expected, result, "-expr:{expr:?}");
6949        }
6950    }
6951
6952    #[test]
6953    fn test_scalar_interval_add() {
6954        let cases = [
6955            (
6956                ScalarValue::new_interval_ym(1, 12),
6957                ScalarValue::new_interval_ym(1, 12),
6958                ScalarValue::new_interval_ym(2, 24),
6959            ),
6960            (
6961                ScalarValue::new_interval_dt(1, 999),
6962                ScalarValue::new_interval_dt(1, 999),
6963                ScalarValue::new_interval_dt(2, 1998),
6964            ),
6965            (
6966                ScalarValue::new_interval_mdn(12, 15, 123_456),
6967                ScalarValue::new_interval_mdn(12, 15, 123_456),
6968                ScalarValue::new_interval_mdn(24, 30, 246_912),
6969            ),
6970        ];
6971        for (lhs, rhs, expected) in cases.iter() {
6972            let result = lhs.add(rhs).unwrap();
6973            let result_commute = rhs.add(lhs).unwrap();
6974            assert_eq!(*expected, result, "lhs:{lhs:?} + rhs:{rhs:?}");
6975            assert_eq!(*expected, result_commute, "lhs:{rhs:?} + rhs:{lhs:?}");
6976        }
6977    }
6978
6979    #[test]
6980    fn test_scalar_interval_sub() {
6981        let cases = [
6982            (
6983                ScalarValue::new_interval_ym(1, 12),
6984                ScalarValue::new_interval_ym(1, 12),
6985                ScalarValue::new_interval_ym(0, 0),
6986            ),
6987            (
6988                ScalarValue::new_interval_dt(1, 999),
6989                ScalarValue::new_interval_dt(1, 999),
6990                ScalarValue::new_interval_dt(0, 0),
6991            ),
6992            (
6993                ScalarValue::new_interval_mdn(12, 15, 123_456),
6994                ScalarValue::new_interval_mdn(12, 15, 123_456),
6995                ScalarValue::new_interval_mdn(0, 0, 0),
6996            ),
6997        ];
6998        for (lhs, rhs, expected) in cases.iter() {
6999            let result = lhs.sub(rhs).unwrap();
7000            assert_eq!(*expected, result, "lhs:{lhs:?} - rhs:{rhs:?}");
7001        }
7002    }
7003
7004    #[test]
7005    fn timestamp_op_random_tests() {
7006        // timestamp1 + (or -) interval = timestamp2
7007        // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ?
7008        let sample_size = 1000;
7009        let timestamps1 = get_random_timestamps(sample_size);
7010        let intervals = get_random_intervals(sample_size);
7011        // ts(sec) + interval(ns) = ts(sec); however,
7012        // ts(sec) - ts(sec) cannot be = interval(ns). Therefore,
7013        // timestamps are more precise than intervals in tests.
7014        for (idx, ts1) in timestamps1.iter().enumerate() {
7015            if idx % 2 == 0 {
7016                let timestamp2 = ts1.add(intervals[idx].clone()).unwrap();
7017                let back = timestamp2.sub(intervals[idx].clone()).unwrap();
7018                assert_eq!(ts1, &back);
7019            } else {
7020                let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap();
7021                let back = timestamp2.add(intervals[idx].clone()).unwrap();
7022                assert_eq!(ts1, &back);
7023            };
7024        }
7025    }
7026
7027    #[test]
7028    fn test_struct_nulls() {
7029        let fields_b = Fields::from(vec![
7030            Field::new("ba", DataType::UInt64, true),
7031            Field::new("bb", DataType::UInt64, true),
7032        ]);
7033        let fields = Fields::from(vec![
7034            Field::new("a", DataType::UInt64, true),
7035            Field::new("b", DataType::Struct(fields_b.clone()), true),
7036        ]);
7037
7038        let struct_value = vec![
7039            (
7040                Arc::clone(&fields[0]),
7041                Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
7042            ),
7043            (
7044                Arc::clone(&fields[1]),
7045                Arc::new(StructArray::from(vec![
7046                    (
7047                        Arc::clone(&fields_b[0]),
7048                        Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
7049                    ),
7050                    (
7051                        Arc::clone(&fields_b[1]),
7052                        Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
7053                    ),
7054                ])) as ArrayRef,
7055            ),
7056        ];
7057
7058        let struct_value_with_nulls = vec![
7059            (
7060                Arc::clone(&fields[0]),
7061                Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
7062            ),
7063            (
7064                Arc::clone(&fields[1]),
7065                Arc::new(StructArray::from((
7066                    vec![
7067                        (
7068                            Arc::clone(&fields_b[0]),
7069                            Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
7070                        ),
7071                        (
7072                            Arc::clone(&fields_b[1]),
7073                            Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
7074                        ),
7075                    ],
7076                    Buffer::from(&[0]),
7077                ))) as ArrayRef,
7078            ),
7079        ];
7080
7081        let scalars = vec![
7082            // all null
7083            ScalarValue::Struct(Arc::new(StructArray::from((
7084                struct_value.clone(),
7085                Buffer::from(&[0]),
7086            )))),
7087            // field 1 valid, field 2 null
7088            ScalarValue::Struct(Arc::new(StructArray::from((
7089                struct_value_with_nulls.clone(),
7090                Buffer::from(&[1]),
7091            )))),
7092            // all valid
7093            ScalarValue::Struct(Arc::new(StructArray::from((
7094                struct_value.clone(),
7095                Buffer::from(&[1]),
7096            )))),
7097        ];
7098
7099        let check_array = |array| {
7100            let is_null = is_null(&array).unwrap();
7101            assert_eq!(is_null, BooleanArray::from(vec![true, false, false]));
7102
7103            let formatted = pretty_format_columns("col", &[array]).unwrap().to_string();
7104            let formatted = formatted.split('\n').collect::<Vec<_>>();
7105            let expected = vec![
7106                "+---------------------------+",
7107                "| col                       |",
7108                "+---------------------------+",
7109                "|                           |",
7110                "| {a: 1, b: }               |",
7111                "| {a: 1, b: {ba: 2, bb: 3}} |",
7112                "+---------------------------+",
7113            ];
7114            assert_eq!(
7115                formatted, expected,
7116                "Actual:\n{formatted:#?}\n\nExpected:\n{expected:#?}"
7117            );
7118        };
7119
7120        // test `ScalarValue::iter_to_array`
7121        let array = ScalarValue::iter_to_array(scalars.clone()).unwrap();
7122        check_array(array);
7123
7124        // test `ScalarValue::to_array` / `ScalarValue::to_array_of_size`
7125        let arrays = scalars
7126            .iter()
7127            .map(ScalarValue::to_array)
7128            .collect::<Result<Vec<_>>>()
7129            .expect("Failed to convert to array");
7130        let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
7131        let array = arrow::compute::concat(&arrays).unwrap();
7132        check_array(array);
7133    }
7134
7135    #[test]
7136    fn test_struct_display() {
7137        let field_a = Field::new("a", DataType::Int32, true);
7138        let field_b = Field::new("b", DataType::Utf8, true);
7139
7140        let s = ScalarStructBuilder::new()
7141            .with_scalar(field_a, ScalarValue::from(1i32))
7142            .with_scalar(field_b, ScalarValue::Utf8(None))
7143            .build()
7144            .unwrap();
7145
7146        assert_eq!(s.to_string(), "{a:1,b:}");
7147        assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:})"#);
7148
7149        let ScalarValue::Struct(arr) = s else {
7150            panic!("Expected struct");
7151        };
7152
7153        //verify compared to arrow display
7154        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
7155        assert_snapshot!(batches_to_string(&[batch]), @r"
7156        +-------------+
7157        | s           |
7158        +-------------+
7159        | {a: 1, b: } |
7160        +-------------+
7161        ");
7162    }
7163
7164    #[test]
7165    fn test_null_bug() {
7166        let field_a = Field::new("a", DataType::Int32, true);
7167        let field_b = Field::new("b", DataType::Int32, true);
7168        let fields = Fields::from(vec![field_a, field_b]);
7169
7170        let array_a = Arc::new(Int32Array::from_iter_values([1]));
7171        let array_b = Arc::new(Int32Array::from_iter_values([2]));
7172        let arrays: Vec<ArrayRef> = vec![array_a, array_b];
7173
7174        let mut not_nulls = NullBufferBuilder::new(1);
7175
7176        not_nulls.append_non_null();
7177
7178        let ar = StructArray::new(fields, arrays, not_nulls.finish());
7179        let s = ScalarValue::Struct(Arc::new(ar));
7180
7181        assert_eq!(s.to_string(), "{a:1,b:2}");
7182        assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:2})"#);
7183
7184        let ScalarValue::Struct(arr) = s else {
7185            panic!("Expected struct");
7186        };
7187
7188        //verify compared to arrow display
7189        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
7190        assert_snapshot!(batches_to_string(&[batch]), @r"
7191        +--------------+
7192        | s            |
7193        +--------------+
7194        | {a: 1, b: 2} |
7195        +--------------+
7196        ");
7197    }
7198
7199    #[test]
7200    fn test_struct_display_null() {
7201        let fields = vec![Field::new("a", DataType::Int32, false)];
7202        let s = ScalarStructBuilder::new_null(fields);
7203        assert_eq!(s.to_string(), "NULL");
7204
7205        let ScalarValue::Struct(arr) = s else {
7206            panic!("Expected struct");
7207        };
7208
7209        //verify compared to arrow display
7210        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
7211
7212        assert_snapshot!(batches_to_string(&[batch]), @r"
7213        +---+
7214        | s |
7215        +---+
7216        |   |
7217        +---+
7218        ");
7219    }
7220
7221    #[test]
7222    fn test_map_display_and_debug() {
7223        let string_builder = StringBuilder::new();
7224        let int_builder = Int32Builder::with_capacity(4);
7225        let mut builder = MapBuilder::new(None, string_builder, int_builder);
7226        builder.keys().append_value("joe");
7227        builder.values().append_value(1);
7228        builder.append(true).unwrap();
7229
7230        builder.keys().append_value("blogs");
7231        builder.values().append_value(2);
7232        builder.keys().append_value("foo");
7233        builder.values().append_value(4);
7234        builder.append(true).unwrap();
7235        builder.append(true).unwrap();
7236        builder.append(false).unwrap();
7237
7238        let map_value = ScalarValue::Map(Arc::new(builder.finish()));
7239
7240        assert_eq!(map_value.to_string(), "[{joe:1},{blogs:2,foo:4},{},NULL]");
7241        assert_eq!(
7242            format!("{map_value:?}"),
7243            r#"Map([{"joe":"1"},{"blogs":"2","foo":"4"},{},NULL])"#
7244        );
7245
7246        let ScalarValue::Map(arr) = map_value else {
7247            panic!("Expected map");
7248        };
7249
7250        //verify compared to arrow display
7251        let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap();
7252        assert_snapshot!(batches_to_string(&[batch]), @r"
7253        +--------------------+
7254        | m                  |
7255        +--------------------+
7256        | {joe: 1}           |
7257        | {blogs: 2, foo: 4} |
7258        | {}                 |
7259        |                    |
7260        +--------------------+
7261        ");
7262    }
7263
7264    #[test]
7265    fn test_binary_display() {
7266        let no_binary_value = ScalarValue::Binary(None);
7267        assert_eq!(format!("{no_binary_value}"), "NULL");
7268        let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
7269        assert_eq!(format!("{single_binary_value}"), "2A");
7270        let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
7271        assert_eq!(format!("{small_binary_value}"), "010203");
7272        let large_binary_value =
7273            ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7274        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7275
7276        let no_binary_value = ScalarValue::BinaryView(None);
7277        assert_eq!(format!("{no_binary_value}"), "NULL");
7278        let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
7279        assert_eq!(format!("{small_binary_value}"), "010203");
7280        let large_binary_value =
7281            ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7282        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7283
7284        let no_binary_value = ScalarValue::LargeBinary(None);
7285        assert_eq!(format!("{no_binary_value}"), "NULL");
7286        let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
7287        assert_eq!(format!("{small_binary_value}"), "010203");
7288        let large_binary_value =
7289            ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7290        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7291
7292        let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
7293        assert_eq!(format!("{no_binary_value}"), "NULL");
7294        let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
7295        assert_eq!(format!("{small_binary_value}"), "010203");
7296        let large_binary_value = ScalarValue::FixedSizeBinary(
7297            11,
7298            Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
7299        );
7300        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7301    }
7302
7303    #[test]
7304    fn test_binary_debug() {
7305        let no_binary_value = ScalarValue::Binary(None);
7306        assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)");
7307        let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
7308        assert_eq!(format!("{single_binary_value:?}"), "Binary(\"42\")");
7309        let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
7310        assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")");
7311        let large_binary_value =
7312            ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7313        assert_eq!(
7314            format!("{large_binary_value:?}"),
7315            "Binary(\"1,2,3,4,5,6,7,8,9,10,11\")"
7316        );
7317
7318        let no_binary_value = ScalarValue::BinaryView(None);
7319        assert_eq!(format!("{no_binary_value:?}"), "BinaryView(NULL)");
7320        let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
7321        assert_eq!(format!("{small_binary_value:?}"), "BinaryView(\"1,2,3\")");
7322        let large_binary_value =
7323            ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7324        assert_eq!(
7325            format!("{large_binary_value:?}"),
7326            "BinaryView(\"1,2,3,4,5,6,7,8,9,10,11\")"
7327        );
7328
7329        let no_binary_value = ScalarValue::LargeBinary(None);
7330        assert_eq!(format!("{no_binary_value:?}"), "LargeBinary(NULL)");
7331        let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
7332        assert_eq!(format!("{small_binary_value:?}"), "LargeBinary(\"1,2,3\")");
7333        let large_binary_value =
7334            ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7335        assert_eq!(
7336            format!("{large_binary_value:?}"),
7337            "LargeBinary(\"1,2,3,4,5,6,7,8,9,10,11\")"
7338        );
7339
7340        let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
7341        assert_eq!(format!("{no_binary_value:?}"), "FixedSizeBinary(3, NULL)");
7342        let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
7343        assert_eq!(
7344            format!("{small_binary_value:?}"),
7345            "FixedSizeBinary(3, \"1,2,3\")"
7346        );
7347        let large_binary_value = ScalarValue::FixedSizeBinary(
7348            11,
7349            Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
7350        );
7351        assert_eq!(
7352            format!("{large_binary_value:?}"),
7353            "FixedSizeBinary(11, \"1,2,3,4,5,6,7,8,9,10,11\")"
7354        );
7355    }
7356
7357    #[test]
7358    fn test_build_timestamp_millisecond_list() {
7359        let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)];
7360        let arr = ScalarValue::new_list_nullable(
7361            &values,
7362            &DataType::Timestamp(TimeUnit::Millisecond, None),
7363        );
7364        assert_eq!(1, arr.len());
7365    }
7366
7367    #[test]
7368    fn test_newlist_timestamp_zone() {
7369        let s: &'static str = "UTC";
7370        let values = vec![ScalarValue::TimestampMillisecond(Some(1), Some(s.into()))];
7371        let arr = ScalarValue::new_list_nullable(
7372            &values,
7373            &DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
7374        );
7375        assert_eq!(1, arr.len());
7376        assert_eq!(
7377            arr.data_type(),
7378            &DataType::List(Arc::new(Field::new_list_field(
7379                DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
7380                true,
7381            )))
7382        );
7383    }
7384
7385    fn get_random_timestamps(sample_size: u64) -> Vec<ScalarValue> {
7386        let vector_size = sample_size;
7387        let mut timestamp = vec![];
7388        let mut rng = rand::rng();
7389        for i in 0..vector_size {
7390            let year = rng.random_range(1995..=2050);
7391            let month = rng.random_range(1..=12);
7392            let day = rng.random_range(1..=28); // to exclude invalid dates
7393            let hour = rng.random_range(0..=23);
7394            let minute = rng.random_range(0..=59);
7395            let second = rng.random_range(0..=59);
7396            if i % 4 == 0 {
7397                timestamp.push(ScalarValue::TimestampSecond(
7398                    Some(
7399                        NaiveDate::from_ymd_opt(year, month, day)
7400                            .unwrap()
7401                            .and_hms_opt(hour, minute, second)
7402                            .unwrap()
7403                            .and_utc()
7404                            .timestamp(),
7405                    ),
7406                    None,
7407                ))
7408            } else if i % 4 == 1 {
7409                let millisec = rng.random_range(0..=999);
7410                timestamp.push(ScalarValue::TimestampMillisecond(
7411                    Some(
7412                        NaiveDate::from_ymd_opt(year, month, day)
7413                            .unwrap()
7414                            .and_hms_milli_opt(hour, minute, second, millisec)
7415                            .unwrap()
7416                            .and_utc()
7417                            .timestamp_millis(),
7418                    ),
7419                    None,
7420                ))
7421            } else if i % 4 == 2 {
7422                let microsec = rng.random_range(0..=999_999);
7423                timestamp.push(ScalarValue::TimestampMicrosecond(
7424                    Some(
7425                        NaiveDate::from_ymd_opt(year, month, day)
7426                            .unwrap()
7427                            .and_hms_micro_opt(hour, minute, second, microsec)
7428                            .unwrap()
7429                            .and_utc()
7430                            .timestamp_micros(),
7431                    ),
7432                    None,
7433                ))
7434            } else if i % 4 == 3 {
7435                let nanosec = rng.random_range(0..=999_999_999);
7436                timestamp.push(ScalarValue::TimestampNanosecond(
7437                    Some(
7438                        NaiveDate::from_ymd_opt(year, month, day)
7439                            .unwrap()
7440                            .and_hms_nano_opt(hour, minute, second, nanosec)
7441                            .unwrap()
7442                            .and_utc()
7443                            .timestamp_nanos_opt()
7444                            .unwrap(),
7445                    ),
7446                    None,
7447                ))
7448            }
7449        }
7450        timestamp
7451    }
7452
7453    fn get_random_intervals(sample_size: u64) -> Vec<ScalarValue> {
7454        const MILLISECS_IN_ONE_DAY: i64 = 86_400_000;
7455        const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000;
7456
7457        let vector_size = sample_size;
7458        let mut intervals = vec![];
7459        let mut rng = rand::rng();
7460        const SECS_IN_ONE_DAY: i32 = 86_400;
7461        const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000;
7462        for i in 0..vector_size {
7463            if i % 4 == 0 {
7464                let days = rng.random_range(0..5000);
7465                // to not break second precision
7466                let millis = rng.random_range(0..SECS_IN_ONE_DAY) * 1000;
7467                intervals.push(ScalarValue::new_interval_dt(days, millis));
7468            } else if i % 4 == 1 {
7469                let days = rng.random_range(0..5000);
7470                let millisec = rng.random_range(0..(MILLISECS_IN_ONE_DAY as i32));
7471                intervals.push(ScalarValue::new_interval_dt(days, millisec));
7472            } else if i % 4 == 2 {
7473                let days = rng.random_range(0..5000);
7474                // to not break microsec precision
7475                let nanosec = rng.random_range(0..MICROSECS_IN_ONE_DAY) * 1000;
7476                intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
7477            } else {
7478                let days = rng.random_range(0..5000);
7479                let nanosec = rng.random_range(0..NANOSECS_IN_ONE_DAY);
7480                intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
7481            }
7482        }
7483        intervals
7484    }
7485
7486    fn union_fields() -> UnionFields {
7487        [
7488            (0, Arc::new(Field::new("A", DataType::Int32, true))),
7489            (1, Arc::new(Field::new("B", DataType::Float64, true))),
7490        ]
7491        .into_iter()
7492        .collect()
7493    }
7494
7495    #[test]
7496    fn sparse_scalar_union_is_null() {
7497        let sparse_scalar = ScalarValue::Union(
7498            Some((0_i8, Box::new(ScalarValue::Int32(None)))),
7499            union_fields(),
7500            UnionMode::Sparse,
7501        );
7502        assert!(sparse_scalar.is_null());
7503    }
7504
7505    #[test]
7506    fn dense_scalar_union_is_null() {
7507        let dense_scalar = ScalarValue::Union(
7508            Some((0_i8, Box::new(ScalarValue::Int32(None)))),
7509            union_fields(),
7510            UnionMode::Dense,
7511        );
7512        assert!(dense_scalar.is_null());
7513    }
7514
7515    #[test]
7516    fn null_dictionary_scalar_produces_null_dictionary_array() {
7517        let dictionary_scalar = ScalarValue::Dictionary(
7518            Box::new(DataType::Int32),
7519            Box::new(ScalarValue::Null),
7520        );
7521        assert!(dictionary_scalar.is_null());
7522        let dictionary_array = dictionary_scalar.to_array().unwrap();
7523        assert!(dictionary_array.is_null(0));
7524    }
7525
7526    #[test]
7527    fn test_scalar_value_try_new_null() {
7528        let scalars = vec![
7529            ScalarValue::try_new_null(&DataType::Boolean).unwrap(),
7530            ScalarValue::try_new_null(&DataType::Int8).unwrap(),
7531            ScalarValue::try_new_null(&DataType::Int16).unwrap(),
7532            ScalarValue::try_new_null(&DataType::Int32).unwrap(),
7533            ScalarValue::try_new_null(&DataType::Int64).unwrap(),
7534            ScalarValue::try_new_null(&DataType::UInt8).unwrap(),
7535            ScalarValue::try_new_null(&DataType::UInt16).unwrap(),
7536            ScalarValue::try_new_null(&DataType::UInt32).unwrap(),
7537            ScalarValue::try_new_null(&DataType::UInt64).unwrap(),
7538            ScalarValue::try_new_null(&DataType::Float16).unwrap(),
7539            ScalarValue::try_new_null(&DataType::Float32).unwrap(),
7540            ScalarValue::try_new_null(&DataType::Float64).unwrap(),
7541            ScalarValue::try_new_null(&DataType::Decimal128(42, 42)).unwrap(),
7542            ScalarValue::try_new_null(&DataType::Decimal256(42, 42)).unwrap(),
7543            ScalarValue::try_new_null(&DataType::Utf8).unwrap(),
7544            ScalarValue::try_new_null(&DataType::LargeUtf8).unwrap(),
7545            ScalarValue::try_new_null(&DataType::Utf8View).unwrap(),
7546            ScalarValue::try_new_null(&DataType::Binary).unwrap(),
7547            ScalarValue::try_new_null(&DataType::BinaryView).unwrap(),
7548            ScalarValue::try_new_null(&DataType::FixedSizeBinary(42)).unwrap(),
7549            ScalarValue::try_new_null(&DataType::LargeBinary).unwrap(),
7550            ScalarValue::try_new_null(&DataType::Date32).unwrap(),
7551            ScalarValue::try_new_null(&DataType::Date64).unwrap(),
7552            ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Second)).unwrap(),
7553            ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Millisecond)).unwrap(),
7554            ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Microsecond)).unwrap(),
7555            ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Nanosecond)).unwrap(),
7556            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Second, None))
7557                .unwrap(),
7558            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Millisecond, None))
7559                .unwrap(),
7560            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Microsecond, None))
7561                .unwrap(),
7562            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Nanosecond, None))
7563                .unwrap(),
7564            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::YearMonth))
7565                .unwrap(),
7566            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::DayTime))
7567                .unwrap(),
7568            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::MonthDayNano))
7569                .unwrap(),
7570            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Second)).unwrap(),
7571            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Microsecond))
7572                .unwrap(),
7573            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Nanosecond)).unwrap(),
7574            ScalarValue::try_new_null(&DataType::Null).unwrap(),
7575        ];
7576        assert!(scalars.iter().all(|s| s.is_null()));
7577
7578        let field_ref = Arc::new(Field::new("foo", DataType::Int32, true));
7579        let map_field_ref = Arc::new(Field::new(
7580            "foo",
7581            DataType::Struct(Fields::from(vec![
7582                Field::new("bar", DataType::Utf8, true),
7583                Field::new("baz", DataType::Int32, true),
7584            ])),
7585            true,
7586        ));
7587        let scalars = vec![
7588            ScalarValue::try_new_null(&DataType::List(Arc::clone(&field_ref))).unwrap(),
7589            ScalarValue::try_new_null(&DataType::LargeList(Arc::clone(&field_ref)))
7590                .unwrap(),
7591            ScalarValue::try_new_null(&DataType::FixedSizeList(
7592                Arc::clone(&field_ref),
7593                42,
7594            ))
7595            .unwrap(),
7596            ScalarValue::try_new_null(&DataType::Struct(
7597                vec![Arc::clone(&field_ref)].into(),
7598            ))
7599            .unwrap(),
7600            ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(),
7601            ScalarValue::try_new_null(&DataType::Union(
7602                UnionFields::new(vec![42], vec![field_ref]),
7603                UnionMode::Dense,
7604            ))
7605            .unwrap(),
7606        ];
7607        assert!(scalars.iter().all(|s| s.is_null()));
7608    }
7609}