datafusion_common/scalar/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarValue`]: stores single  values
19
20mod consts;
21mod struct_builder;
22
23use std::borrow::Borrow;
24use std::cmp::Ordering;
25use std::collections::{HashSet, VecDeque};
26use std::convert::Infallible;
27use std::fmt;
28use std::hash::Hash;
29use std::hash::Hasher;
30use std::iter::repeat_n;
31use std::mem::{size_of, size_of_val};
32use std::str::FromStr;
33use std::sync::Arc;
34
35use crate::cast::{
36    as_binary_array, as_binary_view_array, as_boolean_array, as_date32_array,
37    as_date64_array, as_decimal128_array, as_decimal256_array, as_dictionary_array,
38    as_duration_microsecond_array, as_duration_millisecond_array,
39    as_duration_nanosecond_array, as_duration_second_array, as_fixed_size_binary_array,
40    as_fixed_size_list_array, as_float16_array, as_float32_array, as_float64_array,
41    as_int16_array, as_int32_array, as_int64_array, as_int8_array, as_interval_dt_array,
42    as_interval_mdn_array, as_interval_ym_array, as_large_binary_array,
43    as_large_list_array, as_large_string_array, as_string_array, as_string_view_array,
44    as_time32_millisecond_array, as_time32_second_array, as_time64_microsecond_array,
45    as_time64_nanosecond_array, as_timestamp_microsecond_array,
46    as_timestamp_millisecond_array, as_timestamp_nanosecond_array,
47    as_timestamp_second_array, as_uint16_array, as_uint32_array, as_uint64_array,
48    as_uint8_array, as_union_array,
49};
50use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
51use crate::format::DEFAULT_CAST_OPTIONS;
52use crate::hash_utils::create_hashes;
53use crate::utils::SingleRowListArrayBuilder;
54use crate::{_internal_datafusion_err, arrow_datafusion_err};
55use arrow::array::{
56    new_empty_array, new_null_array, Array, ArrayData, ArrayRef, ArrowNativeTypeOp,
57    ArrowPrimitiveType, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array,
58    Date64Array, Decimal128Array, Decimal256Array, DictionaryArray,
59    DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray,
60    DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, Float16Array,
61    Float32Array, Float64Array, GenericListArray, Int16Array, Int32Array, Int64Array,
62    Int8Array, IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
63    LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, MapArray,
64    MutableArrayData, PrimitiveArray, Scalar, StringArray, StringViewArray, StructArray,
65    Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
66    Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
67    TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
68    UInt64Array, UInt8Array, UnionArray,
69};
70use arrow::buffer::ScalarBuffer;
71use arrow::compute::kernels::cast::{cast_with_options, CastOptions};
72use arrow::compute::kernels::numeric::{
73    add, add_wrapping, div, mul, mul_wrapping, rem, sub, sub_wrapping,
74};
75use arrow::datatypes::{
76    i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType,
77    Date32Type, Field, Float32Type, Int16Type, Int32Type, Int64Type, Int8Type,
78    IntervalDayTime, IntervalDayTimeType, IntervalMonthDayNano, IntervalMonthDayNanoType,
79    IntervalUnit, IntervalYearMonthType, TimeUnit, TimestampMicrosecondType,
80    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type,
81    UInt32Type, UInt64Type, UInt8Type, UnionFields, UnionMode, DECIMAL128_MAX_PRECISION,
82};
83use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
84use chrono::{Duration, NaiveDate};
85use half::f16;
86pub use struct_builder::ScalarStructBuilder;
87
88/// A dynamically typed, nullable single value.
89///
90/// While an arrow  [`Array`]) stores one or more values of the same type, in a
91/// single column, a `ScalarValue` stores a single value of a single type, the
92/// equivalent of 1 row and one column.
93///
94/// ```text
95///  ┌────────┐
96///  │ value1 │
97///  │ value2 │                  ┌────────┐
98///  │ value3 │                  │ value2 │
99///  │  ...   │                  └────────┘
100///  │ valueN │
101///  └────────┘
102///
103///    Array                     ScalarValue
104///
105/// stores multiple,             stores a single,
106/// possibly null, values of     possible null, value
107/// the same type
108/// ```
109///
110/// # Performance
111///
112/// In general, performance will be better using arrow [`Array`]s rather than
113/// [`ScalarValue`], as it is far more efficient to process multiple values at
114/// once (vectorized processing).
115///
116/// # Example
117/// ```
118/// # use datafusion_common::ScalarValue;
119/// // Create single scalar value for an Int32 value
120/// let s1 = ScalarValue::Int32(Some(10));
121///
122/// // You can also create values using the From impl:
123/// let s2 = ScalarValue::from(10i32);
124/// assert_eq!(s1, s2);
125/// ```
126///
127/// # Null Handling
128///
129/// `ScalarValue` represents null values in the same way as Arrow. Nulls are
130/// "typed" in the sense that a null value in an [`Int32Array`] is different
131/// from a null value in a [`Float64Array`], and is different from the values in
132/// a [`NullArray`].
133///
134/// ```
135/// # fn main() -> datafusion_common::Result<()> {
136/// # use std::collections::hash_set::Difference;
137/// # use datafusion_common::ScalarValue;
138/// # use arrow::datatypes::DataType;
139/// // You can create a 'null' Int32 value directly:
140/// let s1 = ScalarValue::Int32(None);
141///
142/// // You can also create a null value for a given datatype:
143/// let s2 = ScalarValue::try_from(&DataType::Int32)?;
144/// assert_eq!(s1, s2);
145///
146/// // Note that this is DIFFERENT than a `ScalarValue::Null`
147/// let s3 = ScalarValue::Null;
148/// assert_ne!(s1, s3);
149/// # Ok(())
150/// # }
151/// ```
152///
153/// # Nested Types
154///
155/// `List` / `LargeList` / `FixedSizeList` / `Struct` / `Map` are represented as a
156/// single element array of the corresponding type.
157///
158/// ## Example: Creating [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
159/// ```
160/// # use std::sync::Arc;
161/// # use arrow::datatypes::{DataType, Field};
162/// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder};
163/// // Build a struct like: {a: 1, b: "foo"}
164/// let field_a = Field::new("a", DataType::Int32, false);
165/// let field_b = Field::new("b", DataType::Utf8, false);
166///
167/// let s1 = ScalarStructBuilder::new()
168///    .with_scalar(field_a, ScalarValue::from(1i32))
169///    .with_scalar(field_b, ScalarValue::from("foo"))
170///    .build();
171/// ```
172///
173/// ## Example: Creating a null [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
174/// ```
175/// # use std::sync::Arc;
176/// # use arrow::datatypes::{DataType, Field};
177/// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder};
178/// // Build a struct representing a NULL value
179/// let fields = vec![
180///     Field::new("a", DataType::Int32, false),
181///     Field::new("b", DataType::Utf8, false),
182/// ];
183///
184/// let s1 = ScalarStructBuilder::new_null(fields);
185/// ```
186///
187/// ## Example: Creating [`ScalarValue::Struct`] directly
188/// ```
189/// # use std::sync::Arc;
190/// # use arrow::datatypes::{DataType, Field, Fields};
191/// # use arrow::array::{ArrayRef, Int32Array, StructArray, StringArray};
192/// # use datafusion_common::ScalarValue;
193/// // Build a struct like: {a: 1, b: "foo"}
194/// // Field description
195/// let fields = Fields::from(vec![
196///   Field::new("a", DataType::Int32, false),
197///   Field::new("b", DataType::Utf8, false),
198/// ]);
199/// // one row arrays for each field
200/// let arrays: Vec<ArrayRef> = vec![
201///   Arc::new(Int32Array::from(vec![1])),
202///   Arc::new(StringArray::from(vec!["foo"])),
203/// ];
204/// // no nulls for this array
205/// let nulls = None;
206/// let arr = StructArray::new(fields, arrays, nulls);
207///
208/// // Create a ScalarValue::Struct directly
209/// let s1 = ScalarValue::Struct(Arc::new(arr));
210/// ```
211///
212///
213/// # Further Reading
214/// See [datatypes](https://arrow.apache.org/docs/python/api/datatypes.html) for
215/// details on datatypes and the [format](https://github.com/apache/arrow/blob/master/format/Schema.fbs#L354-L375)
216/// for the definitive reference.
217///
218/// [`NullArray`]: arrow::array::NullArray
219#[derive(Clone)]
220pub enum ScalarValue {
221    /// represents `DataType::Null` (castable to/from any other type)
222    Null,
223    /// true or false value
224    Boolean(Option<bool>),
225    /// 16bit float
226    Float16(Option<f16>),
227    /// 32bit float
228    Float32(Option<f32>),
229    /// 64bit float
230    Float64(Option<f64>),
231    /// 128bit decimal, using the i128 to represent the decimal, precision scale
232    Decimal128(Option<i128>, u8, i8),
233    /// 256bit decimal, using the i256 to represent the decimal, precision scale
234    Decimal256(Option<i256>, u8, i8),
235    /// signed 8bit int
236    Int8(Option<i8>),
237    /// signed 16bit int
238    Int16(Option<i16>),
239    /// signed 32bit int
240    Int32(Option<i32>),
241    /// signed 64bit int
242    Int64(Option<i64>),
243    /// unsigned 8bit int
244    UInt8(Option<u8>),
245    /// unsigned 16bit int
246    UInt16(Option<u16>),
247    /// unsigned 32bit int
248    UInt32(Option<u32>),
249    /// unsigned 64bit int
250    UInt64(Option<u64>),
251    /// utf-8 encoded string.
252    Utf8(Option<String>),
253    /// utf-8 encoded string but from view types.
254    Utf8View(Option<String>),
255    /// utf-8 encoded string representing a LargeString's arrow type.
256    LargeUtf8(Option<String>),
257    /// binary
258    Binary(Option<Vec<u8>>),
259    /// binary but from view types.
260    BinaryView(Option<Vec<u8>>),
261    /// fixed size binary
262    FixedSizeBinary(i32, Option<Vec<u8>>),
263    /// large binary
264    LargeBinary(Option<Vec<u8>>),
265    /// Fixed size list scalar.
266    ///
267    /// The array must be a FixedSizeListArray with length 1.
268    FixedSizeList(Arc<FixedSizeListArray>),
269    /// Represents a single element of a [`ListArray`] as an [`ArrayRef`]
270    ///
271    /// The array must be a ListArray with length 1.
272    List(Arc<ListArray>),
273    /// The array must be a LargeListArray with length 1.
274    LargeList(Arc<LargeListArray>),
275    /// Represents a single element [`StructArray`] as an [`ArrayRef`]. See
276    /// [`ScalarValue`] for examples of how to create instances of this type.
277    Struct(Arc<StructArray>),
278    /// Represents a single element [`MapArray`] as an [`ArrayRef`].
279    Map(Arc<MapArray>),
280    /// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01
281    Date32(Option<i32>),
282    /// Date stored as a signed 64bit int milliseconds since UNIX epoch 1970-01-01
283    Date64(Option<i64>),
284    /// Time stored as a signed 32bit int as seconds since midnight
285    Time32Second(Option<i32>),
286    /// Time stored as a signed 32bit int as milliseconds since midnight
287    Time32Millisecond(Option<i32>),
288    /// Time stored as a signed 64bit int as microseconds since midnight
289    Time64Microsecond(Option<i64>),
290    /// Time stored as a signed 64bit int as nanoseconds since midnight
291    Time64Nanosecond(Option<i64>),
292    /// Timestamp Second
293    TimestampSecond(Option<i64>, Option<Arc<str>>),
294    /// Timestamp Milliseconds
295    TimestampMillisecond(Option<i64>, Option<Arc<str>>),
296    /// Timestamp Microseconds
297    TimestampMicrosecond(Option<i64>, Option<Arc<str>>),
298    /// Timestamp Nanoseconds
299    TimestampNanosecond(Option<i64>, Option<Arc<str>>),
300    /// Number of elapsed whole months
301    IntervalYearMonth(Option<i32>),
302    /// Number of elapsed days and milliseconds (no leap seconds)
303    /// stored as 2 contiguous 32-bit signed integers
304    IntervalDayTime(Option<IntervalDayTime>),
305    /// A triple of the number of elapsed months, days, and nanoseconds.
306    /// Months and days are encoded as 32-bit signed integers.
307    /// Nanoseconds is encoded as a 64-bit signed integer (no leap seconds).
308    IntervalMonthDayNano(Option<IntervalMonthDayNano>),
309    /// Duration in seconds
310    DurationSecond(Option<i64>),
311    /// Duration in milliseconds
312    DurationMillisecond(Option<i64>),
313    /// Duration in microseconds
314    DurationMicrosecond(Option<i64>),
315    /// Duration in nanoseconds
316    DurationNanosecond(Option<i64>),
317    /// A nested datatype that can represent slots of differing types. Components:
318    /// `.0`: a tuple of union `type_id` and the single value held by this Scalar
319    /// `.1`: the list of fields, zero-to-one of which will by set in `.0`
320    /// `.2`: the physical storage of the source/destination UnionArray from which this Scalar came
321    Union(Option<(i8, Box<ScalarValue>)>, UnionFields, UnionMode),
322    /// Dictionary type: index type and value
323    Dictionary(Box<DataType>, Box<ScalarValue>),
324}
325
326impl Hash for Fl<f16> {
327    fn hash<H: Hasher>(&self, state: &mut H) {
328        self.0.to_bits().hash(state);
329    }
330}
331
332// manual implementation of `PartialEq`
333impl PartialEq for ScalarValue {
334    fn eq(&self, other: &Self) -> bool {
335        use ScalarValue::*;
336        // This purposely doesn't have a catch-all "(_, _)" so that
337        // any newly added enum variant will require editing this list
338        // or else face a compile error
339        match (self, other) {
340            (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
341                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
342            }
343            (Decimal128(_, _, _), _) => false,
344            (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
345                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
346            }
347            (Decimal256(_, _, _), _) => false,
348            (Boolean(v1), Boolean(v2)) => v1.eq(v2),
349            (Boolean(_), _) => false,
350            (Float32(v1), Float32(v2)) => match (v1, v2) {
351                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
352                _ => v1.eq(v2),
353            },
354            (Float16(v1), Float16(v2)) => match (v1, v2) {
355                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
356                _ => v1.eq(v2),
357            },
358            (Float32(_), _) => false,
359            (Float16(_), _) => false,
360            (Float64(v1), Float64(v2)) => match (v1, v2) {
361                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
362                _ => v1.eq(v2),
363            },
364            (Float64(_), _) => false,
365            (Int8(v1), Int8(v2)) => v1.eq(v2),
366            (Int8(_), _) => false,
367            (Int16(v1), Int16(v2)) => v1.eq(v2),
368            (Int16(_), _) => false,
369            (Int32(v1), Int32(v2)) => v1.eq(v2),
370            (Int32(_), _) => false,
371            (Int64(v1), Int64(v2)) => v1.eq(v2),
372            (Int64(_), _) => false,
373            (UInt8(v1), UInt8(v2)) => v1.eq(v2),
374            (UInt8(_), _) => false,
375            (UInt16(v1), UInt16(v2)) => v1.eq(v2),
376            (UInt16(_), _) => false,
377            (UInt32(v1), UInt32(v2)) => v1.eq(v2),
378            (UInt32(_), _) => false,
379            (UInt64(v1), UInt64(v2)) => v1.eq(v2),
380            (UInt64(_), _) => false,
381            (Utf8(v1), Utf8(v2)) => v1.eq(v2),
382            (Utf8(_), _) => false,
383            (Utf8View(v1), Utf8View(v2)) => v1.eq(v2),
384            (Utf8View(_), _) => false,
385            (LargeUtf8(v1), LargeUtf8(v2)) => v1.eq(v2),
386            (LargeUtf8(_), _) => false,
387            (Binary(v1), Binary(v2)) => v1.eq(v2),
388            (Binary(_), _) => false,
389            (BinaryView(v1), BinaryView(v2)) => v1.eq(v2),
390            (BinaryView(_), _) => false,
391            (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.eq(v2),
392            (FixedSizeBinary(_, _), _) => false,
393            (LargeBinary(v1), LargeBinary(v2)) => v1.eq(v2),
394            (LargeBinary(_), _) => false,
395            (FixedSizeList(v1), FixedSizeList(v2)) => v1.eq(v2),
396            (FixedSizeList(_), _) => false,
397            (List(v1), List(v2)) => v1.eq(v2),
398            (List(_), _) => false,
399            (LargeList(v1), LargeList(v2)) => v1.eq(v2),
400            (LargeList(_), _) => false,
401            (Struct(v1), Struct(v2)) => v1.eq(v2),
402            (Struct(_), _) => false,
403            (Map(v1), Map(v2)) => v1.eq(v2),
404            (Map(_), _) => false,
405            (Date32(v1), Date32(v2)) => v1.eq(v2),
406            (Date32(_), _) => false,
407            (Date64(v1), Date64(v2)) => v1.eq(v2),
408            (Date64(_), _) => false,
409            (Time32Second(v1), Time32Second(v2)) => v1.eq(v2),
410            (Time32Second(_), _) => false,
411            (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.eq(v2),
412            (Time32Millisecond(_), _) => false,
413            (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.eq(v2),
414            (Time64Microsecond(_), _) => false,
415            (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.eq(v2),
416            (Time64Nanosecond(_), _) => false,
417            (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2),
418            (TimestampSecond(_, _), _) => false,
419            (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2),
420            (TimestampMillisecond(_, _), _) => false,
421            (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => v1.eq(v2),
422            (TimestampMicrosecond(_, _), _) => false,
423            (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2),
424            (TimestampNanosecond(_, _), _) => false,
425            (DurationSecond(v1), DurationSecond(v2)) => v1.eq(v2),
426            (DurationSecond(_), _) => false,
427            (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.eq(v2),
428            (DurationMillisecond(_), _) => false,
429            (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.eq(v2),
430            (DurationMicrosecond(_), _) => false,
431            (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.eq(v2),
432            (DurationNanosecond(_), _) => false,
433            (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2),
434            (IntervalYearMonth(_), _) => false,
435            (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2),
436            (IntervalDayTime(_), _) => false,
437            (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2),
438            (IntervalMonthDayNano(_), _) => false,
439            (Union(val1, fields1, mode1), Union(val2, fields2, mode2)) => {
440                val1.eq(val2) && fields1.eq(fields2) && mode1.eq(mode2)
441            }
442            (Union(_, _, _), _) => false,
443            (Dictionary(k1, v1), Dictionary(k2, v2)) => k1.eq(k2) && v1.eq(v2),
444            (Dictionary(_, _), _) => false,
445            (Null, Null) => true,
446            (Null, _) => false,
447        }
448    }
449}
450
451// manual implementation of `PartialOrd`
452impl PartialOrd for ScalarValue {
453    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
454        use ScalarValue::*;
455        // This purposely doesn't have a catch-all "(_, _)" so that
456        // any newly added enum variant will require editing this list
457        // or else face a compile error
458        match (self, other) {
459            (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
460                if p1.eq(p2) && s1.eq(s2) {
461                    v1.partial_cmp(v2)
462                } else {
463                    // Two decimal values can be compared if they have the same precision and scale.
464                    None
465                }
466            }
467            (Decimal128(_, _, _), _) => None,
468            (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
469                if p1.eq(p2) && s1.eq(s2) {
470                    v1.partial_cmp(v2)
471                } else {
472                    // Two decimal values can be compared if they have the same precision and scale.
473                    None
474                }
475            }
476            (Decimal256(_, _, _), _) => None,
477            (Boolean(v1), Boolean(v2)) => v1.partial_cmp(v2),
478            (Boolean(_), _) => None,
479            (Float32(v1), Float32(v2)) => match (v1, v2) {
480                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
481                _ => v1.partial_cmp(v2),
482            },
483            (Float16(v1), Float16(v2)) => match (v1, v2) {
484                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
485                _ => v1.partial_cmp(v2),
486            },
487            (Float32(_), _) => None,
488            (Float16(_), _) => None,
489            (Float64(v1), Float64(v2)) => match (v1, v2) {
490                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
491                _ => v1.partial_cmp(v2),
492            },
493            (Float64(_), _) => None,
494            (Int8(v1), Int8(v2)) => v1.partial_cmp(v2),
495            (Int8(_), _) => None,
496            (Int16(v1), Int16(v2)) => v1.partial_cmp(v2),
497            (Int16(_), _) => None,
498            (Int32(v1), Int32(v2)) => v1.partial_cmp(v2),
499            (Int32(_), _) => None,
500            (Int64(v1), Int64(v2)) => v1.partial_cmp(v2),
501            (Int64(_), _) => None,
502            (UInt8(v1), UInt8(v2)) => v1.partial_cmp(v2),
503            (UInt8(_), _) => None,
504            (UInt16(v1), UInt16(v2)) => v1.partial_cmp(v2),
505            (UInt16(_), _) => None,
506            (UInt32(v1), UInt32(v2)) => v1.partial_cmp(v2),
507            (UInt32(_), _) => None,
508            (UInt64(v1), UInt64(v2)) => v1.partial_cmp(v2),
509            (UInt64(_), _) => None,
510            (Utf8(v1), Utf8(v2)) => v1.partial_cmp(v2),
511            (Utf8(_), _) => None,
512            (LargeUtf8(v1), LargeUtf8(v2)) => v1.partial_cmp(v2),
513            (LargeUtf8(_), _) => None,
514            (Utf8View(v1), Utf8View(v2)) => v1.partial_cmp(v2),
515            (Utf8View(_), _) => None,
516            (Binary(v1), Binary(v2)) => v1.partial_cmp(v2),
517            (Binary(_), _) => None,
518            (BinaryView(v1), BinaryView(v2)) => v1.partial_cmp(v2),
519            (BinaryView(_), _) => None,
520            (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.partial_cmp(v2),
521            (FixedSizeBinary(_, _), _) => None,
522            (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
523            (LargeBinary(_), _) => None,
524            // ScalarValue::List / ScalarValue::FixedSizeList / ScalarValue::LargeList are ensure to have length 1
525            (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()),
526            (FixedSizeList(arr1), FixedSizeList(arr2)) => {
527                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
528            }
529            (LargeList(arr1), LargeList(arr2)) => {
530                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
531            }
532            (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None,
533            (Struct(struct_arr1), Struct(struct_arr2)) => {
534                partial_cmp_struct(struct_arr1.as_ref(), struct_arr2.as_ref())
535            }
536            (Struct(_), _) => None,
537            (Map(map_arr1), Map(map_arr2)) => partial_cmp_map(map_arr1, map_arr2),
538            (Map(_), _) => None,
539            (Date32(v1), Date32(v2)) => v1.partial_cmp(v2),
540            (Date32(_), _) => None,
541            (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
542            (Date64(_), _) => None,
543            (Time32Second(v1), Time32Second(v2)) => v1.partial_cmp(v2),
544            (Time32Second(_), _) => None,
545            (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.partial_cmp(v2),
546            (Time32Millisecond(_), _) => None,
547            (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.partial_cmp(v2),
548            (Time64Microsecond(_), _) => None,
549            (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.partial_cmp(v2),
550            (Time64Nanosecond(_), _) => None,
551            (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2),
552            (TimestampSecond(_, _), _) => None,
553            (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => {
554                v1.partial_cmp(v2)
555            }
556            (TimestampMillisecond(_, _), _) => None,
557            (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => {
558                v1.partial_cmp(v2)
559            }
560            (TimestampMicrosecond(_, _), _) => None,
561            (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => {
562                v1.partial_cmp(v2)
563            }
564            (TimestampNanosecond(_, _), _) => None,
565            (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2),
566            (IntervalYearMonth(_), _) => None,
567            (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2),
568            (IntervalDayTime(_), _) => None,
569            (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2),
570            (IntervalMonthDayNano(_), _) => None,
571            (DurationSecond(v1), DurationSecond(v2)) => v1.partial_cmp(v2),
572            (DurationSecond(_), _) => None,
573            (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.partial_cmp(v2),
574            (DurationMillisecond(_), _) => None,
575            (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.partial_cmp(v2),
576            (DurationMicrosecond(_), _) => None,
577            (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.partial_cmp(v2),
578            (DurationNanosecond(_), _) => None,
579            (Union(v1, t1, m1), Union(v2, t2, m2)) => {
580                if t1.eq(t2) && m1.eq(m2) {
581                    v1.partial_cmp(v2)
582                } else {
583                    None
584                }
585            }
586            (Union(_, _, _), _) => None,
587            (Dictionary(k1, v1), Dictionary(k2, v2)) => {
588                // Don't compare if the key types don't match (it is effectively a different datatype)
589                if k1 == k2 {
590                    v1.partial_cmp(v2)
591                } else {
592                    None
593                }
594            }
595            (Dictionary(_, _), _) => None,
596            (Null, Null) => Some(Ordering::Equal),
597            (Null, _) => None,
598        }
599    }
600}
601
602/// List/LargeList/FixedSizeList scalars always have a single element
603/// array. This function returns that array
604fn first_array_for_list(arr: &dyn Array) -> ArrayRef {
605    assert_eq!(arr.len(), 1);
606    if let Some(arr) = arr.as_list_opt::<i32>() {
607        arr.value(0)
608    } else if let Some(arr) = arr.as_list_opt::<i64>() {
609        arr.value(0)
610    } else if let Some(arr) = arr.as_fixed_size_list_opt() {
611        arr.value(0)
612    } else {
613        unreachable!("Since only List / LargeList / FixedSizeList are supported, this should never happen")
614    }
615}
616
617/// Compares two List/LargeList/FixedSizeList scalars
618fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option<Ordering> {
619    if arr1.data_type() != arr2.data_type() {
620        return None;
621    }
622    let arr1 = first_array_for_list(arr1);
623    let arr2 = first_array_for_list(arr2);
624
625    let min_length = arr1.len().min(arr2.len());
626    let arr1_trimmed = arr1.slice(0, min_length);
627    let arr2_trimmed = arr2.slice(0, min_length);
628
629    let lt_res = arrow::compute::kernels::cmp::lt(&arr1_trimmed, &arr2_trimmed).ok()?;
630    let eq_res = arrow::compute::kernels::cmp::eq(&arr1_trimmed, &arr2_trimmed).ok()?;
631
632    for j in 0..lt_res.len() {
633        // In Postgres, NULL values in lists are always considered to be greater than non-NULL values:
634        //
635        // $ SELECT ARRAY[NULL]::integer[] > ARRAY[1]
636        // true
637        //
638        // These next two if statements are introduced for replicating Postgres behavior, as
639        // arrow::compute does not account for this.
640        if arr1_trimmed.is_null(j) && !arr2_trimmed.is_null(j) {
641            return Some(Ordering::Greater);
642        }
643        if !arr1_trimmed.is_null(j) && arr2_trimmed.is_null(j) {
644            return Some(Ordering::Less);
645        }
646
647        if lt_res.is_valid(j) && lt_res.value(j) {
648            return Some(Ordering::Less);
649        }
650        if eq_res.is_valid(j) && !eq_res.value(j) {
651            return Some(Ordering::Greater);
652        }
653    }
654
655    Some(arr1.len().cmp(&arr2.len()))
656}
657
658fn flatten<'a>(array: &'a StructArray, columns: &mut Vec<&'a ArrayRef>) {
659    for i in 0..array.num_columns() {
660        let column = array.column(i);
661        if let Some(nested_struct) = column.as_any().downcast_ref::<StructArray>() {
662            // If it's a nested struct, recursively expand
663            flatten(nested_struct, columns);
664        } else {
665            // If it's a primitive type, add directly
666            columns.push(column);
667        }
668    }
669}
670
671pub fn partial_cmp_struct(s1: &StructArray, s2: &StructArray) -> Option<Ordering> {
672    if s1.len() != s2.len() {
673        return None;
674    }
675
676    if s1.data_type() != s2.data_type() {
677        return None;
678    }
679
680    let mut expanded_columns1 = Vec::with_capacity(s1.num_columns());
681    let mut expanded_columns2 = Vec::with_capacity(s2.num_columns());
682
683    flatten(s1, &mut expanded_columns1);
684    flatten(s2, &mut expanded_columns2);
685
686    for col_index in 0..expanded_columns1.len() {
687        let arr1 = expanded_columns1[col_index];
688        let arr2 = expanded_columns2[col_index];
689
690        let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
691        let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
692
693        for j in 0..lt_res.len() {
694            if lt_res.is_valid(j) && lt_res.value(j) {
695                return Some(Ordering::Less);
696            }
697            if eq_res.is_valid(j) && !eq_res.value(j) {
698                return Some(Ordering::Greater);
699            }
700        }
701    }
702    Some(Ordering::Equal)
703}
704
705fn partial_cmp_map(m1: &Arc<MapArray>, m2: &Arc<MapArray>) -> Option<Ordering> {
706    if m1.len() != m2.len() {
707        return None;
708    }
709
710    if m1.data_type() != m2.data_type() {
711        return None;
712    }
713
714    for col_index in 0..m1.len() {
715        let arr1 = m1.entries().column(col_index);
716        let arr2 = m2.entries().column(col_index);
717
718        let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
719        let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
720
721        for j in 0..lt_res.len() {
722            if lt_res.is_valid(j) && lt_res.value(j) {
723                return Some(Ordering::Less);
724            }
725            if eq_res.is_valid(j) && !eq_res.value(j) {
726                return Some(Ordering::Greater);
727            }
728        }
729    }
730    Some(Ordering::Equal)
731}
732
733impl Eq for ScalarValue {}
734
735//Float wrapper over f32/f64. Just because we cannot build std::hash::Hash for floats directly we have to do it through type wrapper
736struct Fl<T>(T);
737
738macro_rules! hash_float_value {
739    ($(($t:ty, $i:ty)),+) => {
740        $(impl std::hash::Hash for Fl<$t> {
741            #[inline]
742            fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
743                state.write(&<$i>::from_ne_bytes(self.0.to_ne_bytes()).to_ne_bytes())
744            }
745        })+
746    };
747}
748
749hash_float_value!((f64, u64), (f32, u32));
750
751// manual implementation of `Hash`
752//
753// # Panics
754//
755// Panics if there is an error when creating hash values for rows
756impl Hash for ScalarValue {
757    fn hash<H: Hasher>(&self, state: &mut H) {
758        use ScalarValue::*;
759        match self {
760            Decimal128(v, p, s) => {
761                v.hash(state);
762                p.hash(state);
763                s.hash(state)
764            }
765            Decimal256(v, p, s) => {
766                v.hash(state);
767                p.hash(state);
768                s.hash(state)
769            }
770            Boolean(v) => v.hash(state),
771            Float16(v) => v.map(Fl).hash(state),
772            Float32(v) => v.map(Fl).hash(state),
773            Float64(v) => v.map(Fl).hash(state),
774            Int8(v) => v.hash(state),
775            Int16(v) => v.hash(state),
776            Int32(v) => v.hash(state),
777            Int64(v) => v.hash(state),
778            UInt8(v) => v.hash(state),
779            UInt16(v) => v.hash(state),
780            UInt32(v) => v.hash(state),
781            UInt64(v) => v.hash(state),
782            Utf8(v) | LargeUtf8(v) | Utf8View(v) => v.hash(state),
783            Binary(v) | FixedSizeBinary(_, v) | LargeBinary(v) | BinaryView(v) => {
784                v.hash(state)
785            }
786            List(arr) => {
787                hash_nested_array(arr.to_owned() as ArrayRef, state);
788            }
789            LargeList(arr) => {
790                hash_nested_array(arr.to_owned() as ArrayRef, state);
791            }
792            FixedSizeList(arr) => {
793                hash_nested_array(arr.to_owned() as ArrayRef, state);
794            }
795            Struct(arr) => {
796                hash_nested_array(arr.to_owned() as ArrayRef, state);
797            }
798            Map(arr) => {
799                hash_nested_array(arr.to_owned() as ArrayRef, state);
800            }
801            Date32(v) => v.hash(state),
802            Date64(v) => v.hash(state),
803            Time32Second(v) => v.hash(state),
804            Time32Millisecond(v) => v.hash(state),
805            Time64Microsecond(v) => v.hash(state),
806            Time64Nanosecond(v) => v.hash(state),
807            TimestampSecond(v, _) => v.hash(state),
808            TimestampMillisecond(v, _) => v.hash(state),
809            TimestampMicrosecond(v, _) => v.hash(state),
810            TimestampNanosecond(v, _) => v.hash(state),
811            DurationSecond(v) => v.hash(state),
812            DurationMillisecond(v) => v.hash(state),
813            DurationMicrosecond(v) => v.hash(state),
814            DurationNanosecond(v) => v.hash(state),
815            IntervalYearMonth(v) => v.hash(state),
816            IntervalDayTime(v) => v.hash(state),
817            IntervalMonthDayNano(v) => v.hash(state),
818            Union(v, t, m) => {
819                v.hash(state);
820                t.hash(state);
821                m.hash(state);
822            }
823            Dictionary(k, v) => {
824                k.hash(state);
825                v.hash(state);
826            }
827            // stable hash for Null value
828            Null => 1.hash(state),
829        }
830    }
831}
832
833fn hash_nested_array<H: Hasher>(arr: ArrayRef, state: &mut H) {
834    let arrays = vec![arr.to_owned()];
835    let hashes_buffer = &mut vec![0; arr.len()];
836    let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0);
837    let hashes = create_hashes(&arrays, &random_state, hashes_buffer).unwrap();
838    // Hash back to std::hash::Hasher
839    hashes.hash(state);
840}
841
842/// Return a reference to the values array and the index into it for a
843/// dictionary array
844///
845/// # Errors
846///
847/// Errors if the array cannot be downcasted to DictionaryArray
848#[inline]
849pub fn get_dict_value<K: ArrowDictionaryKeyType>(
850    array: &dyn Array,
851    index: usize,
852) -> Result<(&ArrayRef, Option<usize>)> {
853    let dict_array = as_dictionary_array::<K>(array)?;
854    Ok((dict_array.values(), dict_array.key(index)))
855}
856
857/// Create a dictionary array representing `value` repeated `size`
858/// times
859fn dict_from_scalar<K: ArrowDictionaryKeyType>(
860    value: &ScalarValue,
861    size: usize,
862) -> Result<ArrayRef> {
863    // values array is one element long (the value)
864    let values_array = value.to_array_of_size(1)?;
865
866    // Create a key array with `size` elements, each of 0
867    let key_array: PrimitiveArray<K> = repeat_n(
868        if value.is_null() {
869            None
870        } else {
871            Some(K::default_value())
872        },
873        size,
874    )
875    .collect();
876
877    // create a new DictionaryArray
878    //
879    // Note: this path could be made faster by using the ArrayData
880    // APIs and skipping validation, if it every comes up in
881    // performance traces.
882    Ok(Arc::new(
883        DictionaryArray::<K>::try_new(key_array, values_array)?, // should always be valid by construction above
884    ))
885}
886
887/// Create a dictionary array representing all the values in values
888fn dict_from_values<K: ArrowDictionaryKeyType>(
889    values_array: ArrayRef,
890) -> Result<ArrayRef> {
891    // Create a key array with `size` elements of 0..array_len for all
892    // non-null value elements
893    let key_array: PrimitiveArray<K> = (0..values_array.len())
894        .map(|index| {
895            if values_array.is_valid(index) {
896                let native_index = K::Native::from_usize(index).ok_or_else(|| {
897                    DataFusionError::Internal(format!(
898                        "Can not create index of type {} from value {}",
899                        K::DATA_TYPE,
900                        index
901                    ))
902                })?;
903                Ok(Some(native_index))
904            } else {
905                Ok(None)
906            }
907        })
908        .collect::<Result<Vec<_>>>()?
909        .into_iter()
910        .collect();
911
912    // create a new DictionaryArray
913    //
914    // Note: this path could be made faster by using the ArrayData
915    // APIs and skipping validation, if it every comes up in
916    // performance traces.
917    let dict_array = DictionaryArray::<K>::try_new(key_array, values_array)?;
918    Ok(Arc::new(dict_array))
919}
920
921macro_rules! typed_cast_tz {
922    ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident, $TZ:expr) => {{
923        let array = $array_cast($array)?;
924        Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
925            match array.is_null($index) {
926                true => None,
927                false => Some(array.value($index).into()),
928            },
929            $TZ.clone(),
930        ))
931    }};
932}
933
934macro_rules! typed_cast {
935    ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident) => {{
936        let array = $array_cast($array)?;
937        Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
938            match array.is_null($index) {
939                true => None,
940                false => Some(array.value($index).into()),
941            },
942        ))
943    }};
944}
945
946macro_rules! build_array_from_option {
947    ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
948        match $EXPR {
949            Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
950            None => new_null_array(&DataType::$DATA_TYPE, $SIZE),
951        }
952    }};
953    ($DATA_TYPE:ident, $ENUM:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
954        match $EXPR {
955            Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
956            None => new_null_array(&DataType::$DATA_TYPE($ENUM), $SIZE),
957        }
958    }};
959}
960
961macro_rules! build_timestamp_array_from_option {
962    ($TIME_UNIT:expr, $TZ:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {
963        match $EXPR {
964            Some(value) => {
965                Arc::new($ARRAY_TYPE::from_value(*value, $SIZE).with_timezone_opt($TZ))
966            }
967            None => new_null_array(&DataType::Timestamp($TIME_UNIT, $TZ), $SIZE),
968        }
969    };
970}
971
972macro_rules! eq_array_primitive {
973    ($array:expr, $index:expr, $array_cast:ident, $VALUE:expr) => {{
974        let array = $array_cast($array)?;
975        let is_valid = array.is_valid($index);
976        Ok::<bool, DataFusionError>(match $VALUE {
977            Some(val) => is_valid && &array.value($index) == val,
978            None => !is_valid,
979        })
980    }};
981}
982
983impl ScalarValue {
984    /// Create a [`Result<ScalarValue>`] with the provided value and datatype
985    ///
986    /// # Panics
987    ///
988    /// Panics if d is not compatible with T
989    pub fn new_primitive<T: ArrowPrimitiveType>(
990        a: Option<T::Native>,
991        d: &DataType,
992    ) -> Result<Self> {
993        match a {
994            None => d.try_into(),
995            Some(v) => {
996                let array = PrimitiveArray::<T>::new(vec![v].into(), None)
997                    .with_data_type(d.clone());
998                Self::try_from_array(&array, 0)
999            }
1000        }
1001    }
1002
1003    /// Create a decimal Scalar from value/precision and scale.
1004    pub fn try_new_decimal128(value: i128, precision: u8, scale: i8) -> Result<Self> {
1005        // make sure the precision and scale is valid
1006        if precision <= DECIMAL128_MAX_PRECISION && scale.unsigned_abs() <= precision {
1007            return Ok(ScalarValue::Decimal128(Some(value), precision, scale));
1008        }
1009        _internal_err!(
1010            "Can not new a decimal type ScalarValue for precision {precision} and scale {scale}"
1011        )
1012    }
1013
1014    /// Create a Null instance of ScalarValue for this datatype
1015    ///
1016    /// Example
1017    /// ```
1018    /// use datafusion_common::ScalarValue;
1019    /// use arrow::datatypes::DataType;
1020    ///
1021    /// let scalar = ScalarValue::try_new_null(&DataType::Int32).unwrap();
1022    /// assert_eq!(scalar.is_null(), true);
1023    /// assert_eq!(scalar.data_type(), DataType::Int32);
1024    /// ```
1025    pub fn try_new_null(data_type: &DataType) -> Result<Self> {
1026        Ok(match data_type {
1027            DataType::Boolean => ScalarValue::Boolean(None),
1028            DataType::Float16 => ScalarValue::Float16(None),
1029            DataType::Float64 => ScalarValue::Float64(None),
1030            DataType::Float32 => ScalarValue::Float32(None),
1031            DataType::Int8 => ScalarValue::Int8(None),
1032            DataType::Int16 => ScalarValue::Int16(None),
1033            DataType::Int32 => ScalarValue::Int32(None),
1034            DataType::Int64 => ScalarValue::Int64(None),
1035            DataType::UInt8 => ScalarValue::UInt8(None),
1036            DataType::UInt16 => ScalarValue::UInt16(None),
1037            DataType::UInt32 => ScalarValue::UInt32(None),
1038            DataType::UInt64 => ScalarValue::UInt64(None),
1039            DataType::Decimal128(precision, scale) => {
1040                ScalarValue::Decimal128(None, *precision, *scale)
1041            }
1042            DataType::Decimal256(precision, scale) => {
1043                ScalarValue::Decimal256(None, *precision, *scale)
1044            }
1045            DataType::Utf8 => ScalarValue::Utf8(None),
1046            DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
1047            DataType::Utf8View => ScalarValue::Utf8View(None),
1048            DataType::Binary => ScalarValue::Binary(None),
1049            DataType::BinaryView => ScalarValue::BinaryView(None),
1050            DataType::FixedSizeBinary(len) => ScalarValue::FixedSizeBinary(*len, None),
1051            DataType::LargeBinary => ScalarValue::LargeBinary(None),
1052            DataType::Date32 => ScalarValue::Date32(None),
1053            DataType::Date64 => ScalarValue::Date64(None),
1054            DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(None),
1055            DataType::Time32(TimeUnit::Millisecond) => {
1056                ScalarValue::Time32Millisecond(None)
1057            }
1058            DataType::Time64(TimeUnit::Microsecond) => {
1059                ScalarValue::Time64Microsecond(None)
1060            }
1061            DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64Nanosecond(None),
1062            DataType::Timestamp(TimeUnit::Second, tz_opt) => {
1063                ScalarValue::TimestampSecond(None, tz_opt.clone())
1064            }
1065            DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => {
1066                ScalarValue::TimestampMillisecond(None, tz_opt.clone())
1067            }
1068            DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => {
1069                ScalarValue::TimestampMicrosecond(None, tz_opt.clone())
1070            }
1071            DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => {
1072                ScalarValue::TimestampNanosecond(None, tz_opt.clone())
1073            }
1074            DataType::Interval(IntervalUnit::YearMonth) => {
1075                ScalarValue::IntervalYearMonth(None)
1076            }
1077            DataType::Interval(IntervalUnit::DayTime) => {
1078                ScalarValue::IntervalDayTime(None)
1079            }
1080            DataType::Interval(IntervalUnit::MonthDayNano) => {
1081                ScalarValue::IntervalMonthDayNano(None)
1082            }
1083            DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None),
1084            DataType::Duration(TimeUnit::Millisecond) => {
1085                ScalarValue::DurationMillisecond(None)
1086            }
1087            DataType::Duration(TimeUnit::Microsecond) => {
1088                ScalarValue::DurationMicrosecond(None)
1089            }
1090            DataType::Duration(TimeUnit::Nanosecond) => {
1091                ScalarValue::DurationNanosecond(None)
1092            }
1093            DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary(
1094                index_type.clone(),
1095                Box::new(value_type.as_ref().try_into()?),
1096            ),
1097            // `ScalaValue::List` contains single element `ListArray`.
1098            DataType::List(field_ref) => ScalarValue::List(Arc::new(
1099                GenericListArray::new_null(Arc::clone(field_ref), 1),
1100            )),
1101            // `ScalarValue::LargeList` contains single element `LargeListArray`.
1102            DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new(
1103                GenericListArray::new_null(Arc::clone(field_ref), 1),
1104            )),
1105            // `ScalaValue::FixedSizeList` contains single element `FixedSizeList`.
1106            DataType::FixedSizeList(field_ref, fixed_length) => {
1107                ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null(
1108                    Arc::clone(field_ref),
1109                    *fixed_length,
1110                    1,
1111                )))
1112            }
1113            DataType::Struct(fields) => ScalarValue::Struct(
1114                new_null_array(&DataType::Struct(fields.to_owned()), 1)
1115                    .as_struct()
1116                    .to_owned()
1117                    .into(),
1118            ),
1119            DataType::Map(fields, sorted) => ScalarValue::Map(
1120                new_null_array(&DataType::Map(fields.to_owned(), sorted.to_owned()), 1)
1121                    .as_map()
1122                    .to_owned()
1123                    .into(),
1124            ),
1125            DataType::Union(fields, mode) => {
1126                ScalarValue::Union(None, fields.clone(), *mode)
1127            }
1128            DataType::Null => ScalarValue::Null,
1129            _ => {
1130                return _not_impl_err!(
1131                    "Can't create a null scalar from data_type \"{data_type:?}\""
1132                );
1133            }
1134        })
1135    }
1136
1137    /// Returns a [`ScalarValue::Utf8`] representing `val`
1138    pub fn new_utf8(val: impl Into<String>) -> Self {
1139        ScalarValue::from(val.into())
1140    }
1141
1142    /// Returns a [`ScalarValue::Utf8View`] representing `val`
1143    pub fn new_utf8view(val: impl Into<String>) -> Self {
1144        ScalarValue::Utf8View(Some(val.into()))
1145    }
1146
1147    /// Returns a [`ScalarValue::IntervalYearMonth`] representing
1148    /// `years` years and `months` months
1149    pub fn new_interval_ym(years: i32, months: i32) -> Self {
1150        let val = IntervalYearMonthType::make_value(years, months);
1151        ScalarValue::IntervalYearMonth(Some(val))
1152    }
1153
1154    /// Returns a [`ScalarValue::IntervalDayTime`] representing
1155    /// `days` days and `millis` milliseconds
1156    pub fn new_interval_dt(days: i32, millis: i32) -> Self {
1157        let val = IntervalDayTimeType::make_value(days, millis);
1158        Self::IntervalDayTime(Some(val))
1159    }
1160
1161    /// Returns a [`ScalarValue::IntervalMonthDayNano`] representing
1162    /// `months` months and `days` days, and `nanos` nanoseconds
1163    pub fn new_interval_mdn(months: i32, days: i32, nanos: i64) -> Self {
1164        let val = IntervalMonthDayNanoType::make_value(months, days, nanos);
1165        ScalarValue::IntervalMonthDayNano(Some(val))
1166    }
1167
1168    /// Returns a [`ScalarValue`] representing
1169    /// `value` and `tz_opt` timezone
1170    pub fn new_timestamp<T: ArrowTimestampType>(
1171        value: Option<i64>,
1172        tz_opt: Option<Arc<str>>,
1173    ) -> Self {
1174        match T::UNIT {
1175            TimeUnit::Second => ScalarValue::TimestampSecond(value, tz_opt),
1176            TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(value, tz_opt),
1177            TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(value, tz_opt),
1178            TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(value, tz_opt),
1179        }
1180    }
1181
1182    /// Returns a [`ScalarValue`] representing PI
1183    pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
1184        match datatype {
1185            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
1186            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
1187            _ => _internal_err!("PI is not supported for data type: {:?}", datatype),
1188        }
1189    }
1190
1191    /// Returns a [`ScalarValue`] representing PI's upper bound
1192    pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
1193        // TODO: replace the constants with next_up/next_down when
1194        // they are stabilized: https://doc.rust-lang.org/std/primitive.f64.html#method.next_up
1195        match datatype {
1196            DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
1197            DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
1198            _ => {
1199                _internal_err!("PI_UPPER is not supported for data type: {:?}", datatype)
1200            }
1201        }
1202    }
1203
1204    /// Returns a [`ScalarValue`] representing -PI's lower bound
1205    pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
1206        match datatype {
1207            DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
1208            DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
1209            _ => {
1210                _internal_err!("-PI_LOWER is not supported for data type: {:?}", datatype)
1211            }
1212        }
1213    }
1214
1215    /// Returns a [`ScalarValue`] representing FRAC_PI_2's upper bound
1216    pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
1217        match datatype {
1218            DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
1219            DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
1220            _ => {
1221                _internal_err!(
1222                    "PI_UPPER/2 is not supported for data type: {:?}",
1223                    datatype
1224                )
1225            }
1226        }
1227    }
1228
1229    // Returns a [`ScalarValue`] representing FRAC_PI_2's lower bound
1230    pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
1231        match datatype {
1232            DataType::Float32 => {
1233                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
1234            }
1235            DataType::Float64 => {
1236                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
1237            }
1238            _ => {
1239                _internal_err!(
1240                    "-PI/2_LOWER is not supported for data type: {:?}",
1241                    datatype
1242                )
1243            }
1244        }
1245    }
1246
1247    /// Returns a [`ScalarValue`] representing -PI
1248    pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
1249        match datatype {
1250            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
1251            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
1252            _ => _internal_err!("-PI is not supported for data type: {:?}", datatype),
1253        }
1254    }
1255
1256    /// Returns a [`ScalarValue`] representing PI/2
1257    pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1258        match datatype {
1259            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
1260            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
1261            _ => _internal_err!("PI/2 is not supported for data type: {:?}", datatype),
1262        }
1263    }
1264
1265    /// Returns a [`ScalarValue`] representing -PI/2
1266    pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1267        match datatype {
1268            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
1269            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
1270            _ => _internal_err!("-PI/2 is not supported for data type: {:?}", datatype),
1271        }
1272    }
1273
1274    /// Returns a [`ScalarValue`] representing infinity
1275    pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
1276        match datatype {
1277            DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
1278            DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
1279            _ => {
1280                _internal_err!("Infinity is not supported for data type: {:?}", datatype)
1281            }
1282        }
1283    }
1284
1285    /// Returns a [`ScalarValue`] representing negative infinity
1286    pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
1287        match datatype {
1288            DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
1289            DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
1290            _ => {
1291                _internal_err!(
1292                    "Negative Infinity is not supported for data type: {:?}",
1293                    datatype
1294                )
1295            }
1296        }
1297    }
1298
1299    /// Create a zero value in the given type.
1300    pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
1301        Ok(match datatype {
1302            DataType::Boolean => ScalarValue::Boolean(Some(false)),
1303            DataType::Int8 => ScalarValue::Int8(Some(0)),
1304            DataType::Int16 => ScalarValue::Int16(Some(0)),
1305            DataType::Int32 => ScalarValue::Int32(Some(0)),
1306            DataType::Int64 => ScalarValue::Int64(Some(0)),
1307            DataType::UInt8 => ScalarValue::UInt8(Some(0)),
1308            DataType::UInt16 => ScalarValue::UInt16(Some(0)),
1309            DataType::UInt32 => ScalarValue::UInt32(Some(0)),
1310            DataType::UInt64 => ScalarValue::UInt64(Some(0)),
1311            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(0.0))),
1312            DataType::Float32 => ScalarValue::Float32(Some(0.0)),
1313            DataType::Float64 => ScalarValue::Float64(Some(0.0)),
1314            DataType::Decimal128(precision, scale) => {
1315                ScalarValue::Decimal128(Some(0), *precision, *scale)
1316            }
1317            DataType::Decimal256(precision, scale) => {
1318                ScalarValue::Decimal256(Some(i256::ZERO), *precision, *scale)
1319            }
1320            DataType::Timestamp(TimeUnit::Second, tz) => {
1321                ScalarValue::TimestampSecond(Some(0), tz.clone())
1322            }
1323            DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1324                ScalarValue::TimestampMillisecond(Some(0), tz.clone())
1325            }
1326            DataType::Timestamp(TimeUnit::Microsecond, tz) => {
1327                ScalarValue::TimestampMicrosecond(Some(0), tz.clone())
1328            }
1329            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
1330                ScalarValue::TimestampNanosecond(Some(0), tz.clone())
1331            }
1332            DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(Some(0)),
1333            DataType::Time32(TimeUnit::Millisecond) => {
1334                ScalarValue::Time32Millisecond(Some(0))
1335            }
1336            DataType::Time64(TimeUnit::Microsecond) => {
1337                ScalarValue::Time64Microsecond(Some(0))
1338            }
1339            DataType::Time64(TimeUnit::Nanosecond) => {
1340                ScalarValue::Time64Nanosecond(Some(0))
1341            }
1342            DataType::Interval(IntervalUnit::YearMonth) => {
1343                ScalarValue::IntervalYearMonth(Some(0))
1344            }
1345            DataType::Interval(IntervalUnit::DayTime) => {
1346                ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
1347            }
1348            DataType::Interval(IntervalUnit::MonthDayNano) => {
1349                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
1350            }
1351            DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(Some(0)),
1352            DataType::Duration(TimeUnit::Millisecond) => {
1353                ScalarValue::DurationMillisecond(Some(0))
1354            }
1355            DataType::Duration(TimeUnit::Microsecond) => {
1356                ScalarValue::DurationMicrosecond(Some(0))
1357            }
1358            DataType::Duration(TimeUnit::Nanosecond) => {
1359                ScalarValue::DurationNanosecond(Some(0))
1360            }
1361            DataType::Date32 => ScalarValue::Date32(Some(0)),
1362            DataType::Date64 => ScalarValue::Date64(Some(0)),
1363            _ => {
1364                return _not_impl_err!(
1365                    "Can't create a zero scalar from data_type \"{datatype:?}\""
1366                );
1367            }
1368        })
1369    }
1370
1371    /// Create an one value in the given type.
1372    pub fn new_one(datatype: &DataType) -> Result<ScalarValue> {
1373        Ok(match datatype {
1374            DataType::Int8 => ScalarValue::Int8(Some(1)),
1375            DataType::Int16 => ScalarValue::Int16(Some(1)),
1376            DataType::Int32 => ScalarValue::Int32(Some(1)),
1377            DataType::Int64 => ScalarValue::Int64(Some(1)),
1378            DataType::UInt8 => ScalarValue::UInt8(Some(1)),
1379            DataType::UInt16 => ScalarValue::UInt16(Some(1)),
1380            DataType::UInt32 => ScalarValue::UInt32(Some(1)),
1381            DataType::UInt64 => ScalarValue::UInt64(Some(1)),
1382            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(1.0))),
1383            DataType::Float32 => ScalarValue::Float32(Some(1.0)),
1384            DataType::Float64 => ScalarValue::Float64(Some(1.0)),
1385            _ => {
1386                return _not_impl_err!(
1387                    "Can't create an one scalar from data_type \"{datatype:?}\""
1388                );
1389            }
1390        })
1391    }
1392
1393    /// Create a negative one value in the given type.
1394    pub fn new_negative_one(datatype: &DataType) -> Result<ScalarValue> {
1395        Ok(match datatype {
1396            DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)),
1397            DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)),
1398            DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)),
1399            DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)),
1400            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(-1.0))),
1401            DataType::Float32 => ScalarValue::Float32(Some(-1.0)),
1402            DataType::Float64 => ScalarValue::Float64(Some(-1.0)),
1403            _ => {
1404                return _not_impl_err!(
1405                    "Can't create a negative one scalar from data_type \"{datatype:?}\""
1406                );
1407            }
1408        })
1409    }
1410
1411    pub fn new_ten(datatype: &DataType) -> Result<ScalarValue> {
1412        Ok(match datatype {
1413            DataType::Int8 => ScalarValue::Int8(Some(10)),
1414            DataType::Int16 => ScalarValue::Int16(Some(10)),
1415            DataType::Int32 => ScalarValue::Int32(Some(10)),
1416            DataType::Int64 => ScalarValue::Int64(Some(10)),
1417            DataType::UInt8 => ScalarValue::UInt8(Some(10)),
1418            DataType::UInt16 => ScalarValue::UInt16(Some(10)),
1419            DataType::UInt32 => ScalarValue::UInt32(Some(10)),
1420            DataType::UInt64 => ScalarValue::UInt64(Some(10)),
1421            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(10.0))),
1422            DataType::Float32 => ScalarValue::Float32(Some(10.0)),
1423            DataType::Float64 => ScalarValue::Float64(Some(10.0)),
1424            _ => {
1425                return _not_impl_err!(
1426                    "Can't create a ten scalar from data_type \"{datatype:?}\""
1427                );
1428            }
1429        })
1430    }
1431
1432    /// return the [`DataType`] of this `ScalarValue`
1433    pub fn data_type(&self) -> DataType {
1434        match self {
1435            ScalarValue::Boolean(_) => DataType::Boolean,
1436            ScalarValue::UInt8(_) => DataType::UInt8,
1437            ScalarValue::UInt16(_) => DataType::UInt16,
1438            ScalarValue::UInt32(_) => DataType::UInt32,
1439            ScalarValue::UInt64(_) => DataType::UInt64,
1440            ScalarValue::Int8(_) => DataType::Int8,
1441            ScalarValue::Int16(_) => DataType::Int16,
1442            ScalarValue::Int32(_) => DataType::Int32,
1443            ScalarValue::Int64(_) => DataType::Int64,
1444            ScalarValue::Decimal128(_, precision, scale) => {
1445                DataType::Decimal128(*precision, *scale)
1446            }
1447            ScalarValue::Decimal256(_, precision, scale) => {
1448                DataType::Decimal256(*precision, *scale)
1449            }
1450            ScalarValue::TimestampSecond(_, tz_opt) => {
1451                DataType::Timestamp(TimeUnit::Second, tz_opt.clone())
1452            }
1453            ScalarValue::TimestampMillisecond(_, tz_opt) => {
1454                DataType::Timestamp(TimeUnit::Millisecond, tz_opt.clone())
1455            }
1456            ScalarValue::TimestampMicrosecond(_, tz_opt) => {
1457                DataType::Timestamp(TimeUnit::Microsecond, tz_opt.clone())
1458            }
1459            ScalarValue::TimestampNanosecond(_, tz_opt) => {
1460                DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1461            }
1462            ScalarValue::Float16(_) => DataType::Float16,
1463            ScalarValue::Float32(_) => DataType::Float32,
1464            ScalarValue::Float64(_) => DataType::Float64,
1465            ScalarValue::Utf8(_) => DataType::Utf8,
1466            ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
1467            ScalarValue::Utf8View(_) => DataType::Utf8View,
1468            ScalarValue::Binary(_) => DataType::Binary,
1469            ScalarValue::BinaryView(_) => DataType::BinaryView,
1470            ScalarValue::FixedSizeBinary(sz, _) => DataType::FixedSizeBinary(*sz),
1471            ScalarValue::LargeBinary(_) => DataType::LargeBinary,
1472            ScalarValue::List(arr) => arr.data_type().to_owned(),
1473            ScalarValue::LargeList(arr) => arr.data_type().to_owned(),
1474            ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(),
1475            ScalarValue::Struct(arr) => arr.data_type().to_owned(),
1476            ScalarValue::Map(arr) => arr.data_type().to_owned(),
1477            ScalarValue::Date32(_) => DataType::Date32,
1478            ScalarValue::Date64(_) => DataType::Date64,
1479            ScalarValue::Time32Second(_) => DataType::Time32(TimeUnit::Second),
1480            ScalarValue::Time32Millisecond(_) => DataType::Time32(TimeUnit::Millisecond),
1481            ScalarValue::Time64Microsecond(_) => DataType::Time64(TimeUnit::Microsecond),
1482            ScalarValue::Time64Nanosecond(_) => DataType::Time64(TimeUnit::Nanosecond),
1483            ScalarValue::IntervalYearMonth(_) => {
1484                DataType::Interval(IntervalUnit::YearMonth)
1485            }
1486            ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
1487            ScalarValue::IntervalMonthDayNano(_) => {
1488                DataType::Interval(IntervalUnit::MonthDayNano)
1489            }
1490            ScalarValue::DurationSecond(_) => DataType::Duration(TimeUnit::Second),
1491            ScalarValue::DurationMillisecond(_) => {
1492                DataType::Duration(TimeUnit::Millisecond)
1493            }
1494            ScalarValue::DurationMicrosecond(_) => {
1495                DataType::Duration(TimeUnit::Microsecond)
1496            }
1497            ScalarValue::DurationNanosecond(_) => {
1498                DataType::Duration(TimeUnit::Nanosecond)
1499            }
1500            ScalarValue::Union(_, fields, mode) => DataType::Union(fields.clone(), *mode),
1501            ScalarValue::Dictionary(k, v) => {
1502                DataType::Dictionary(k.clone(), Box::new(v.data_type()))
1503            }
1504            ScalarValue::Null => DataType::Null,
1505        }
1506    }
1507
1508    /// Calculate arithmetic negation for a scalar value
1509    pub fn arithmetic_negate(&self) -> Result<Self> {
1510        fn neg_checked_with_ctx<T: ArrowNativeTypeOp>(
1511            v: T,
1512            ctx: impl Fn() -> String,
1513        ) -> Result<T> {
1514            v.neg_checked()
1515                .map_err(|e| arrow_datafusion_err!(e).context(ctx()))
1516        }
1517        match self {
1518            ScalarValue::Int8(None)
1519            | ScalarValue::Int16(None)
1520            | ScalarValue::Int32(None)
1521            | ScalarValue::Int64(None)
1522            | ScalarValue::Float16(None)
1523            | ScalarValue::Float32(None)
1524            | ScalarValue::Float64(None) => Ok(self.clone()),
1525            ScalarValue::Float16(Some(v)) => {
1526                Ok(ScalarValue::Float16(Some(f16::from_f32(-v.to_f32()))))
1527            }
1528            ScalarValue::Float64(Some(v)) => Ok(ScalarValue::Float64(Some(-v))),
1529            ScalarValue::Float32(Some(v)) => Ok(ScalarValue::Float32(Some(-v))),
1530            ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(v.neg_checked()?))),
1531            ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(v.neg_checked()?))),
1532            ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(v.neg_checked()?))),
1533            ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(v.neg_checked()?))),
1534            ScalarValue::IntervalYearMonth(Some(v)) => Ok(
1535                ScalarValue::IntervalYearMonth(Some(neg_checked_with_ctx(*v, || {
1536                    format!("In negation of IntervalYearMonth({v})")
1537                })?)),
1538            ),
1539            ScalarValue::IntervalDayTime(Some(v)) => {
1540                let (days, ms) = IntervalDayTimeType::to_parts(*v);
1541                let val = IntervalDayTimeType::make_value(
1542                    neg_checked_with_ctx(days, || {
1543                        format!("In negation of days {days} in IntervalDayTime")
1544                    })?,
1545                    neg_checked_with_ctx(ms, || {
1546                        format!("In negation of milliseconds {ms} in IntervalDayTime")
1547                    })?,
1548                );
1549                Ok(ScalarValue::IntervalDayTime(Some(val)))
1550            }
1551            ScalarValue::IntervalMonthDayNano(Some(v)) => {
1552                let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
1553                let val = IntervalMonthDayNanoType::make_value(
1554                    neg_checked_with_ctx(months, || {
1555                        format!("In negation of months {months} of IntervalMonthDayNano")
1556                    })?,
1557                    neg_checked_with_ctx(days, || {
1558                        format!("In negation of days {days} of IntervalMonthDayNano")
1559                    })?,
1560                    neg_checked_with_ctx(nanos, || {
1561                        format!("In negation of nanos {nanos} of IntervalMonthDayNano")
1562                    })?,
1563                );
1564                Ok(ScalarValue::IntervalMonthDayNano(Some(val)))
1565            }
1566            ScalarValue::Decimal128(Some(v), precision, scale) => {
1567                Ok(ScalarValue::Decimal128(
1568                    Some(neg_checked_with_ctx(*v, || {
1569                        format!("In negation of Decimal128({v}, {precision}, {scale})")
1570                    })?),
1571                    *precision,
1572                    *scale,
1573                ))
1574            }
1575            ScalarValue::Decimal256(Some(v), precision, scale) => {
1576                Ok(ScalarValue::Decimal256(
1577                    Some(neg_checked_with_ctx(*v, || {
1578                        format!("In negation of Decimal256({v}, {precision}, {scale})")
1579                    })?),
1580                    *precision,
1581                    *scale,
1582                ))
1583            }
1584            ScalarValue::TimestampSecond(Some(v), tz) => {
1585                Ok(ScalarValue::TimestampSecond(
1586                    Some(neg_checked_with_ctx(*v, || {
1587                        format!("In negation of TimestampSecond({v})")
1588                    })?),
1589                    tz.clone(),
1590                ))
1591            }
1592            ScalarValue::TimestampNanosecond(Some(v), tz) => {
1593                Ok(ScalarValue::TimestampNanosecond(
1594                    Some(neg_checked_with_ctx(*v, || {
1595                        format!("In negation of TimestampNanoSecond({v})")
1596                    })?),
1597                    tz.clone(),
1598                ))
1599            }
1600            ScalarValue::TimestampMicrosecond(Some(v), tz) => {
1601                Ok(ScalarValue::TimestampMicrosecond(
1602                    Some(neg_checked_with_ctx(*v, || {
1603                        format!("In negation of TimestampMicroSecond({v})")
1604                    })?),
1605                    tz.clone(),
1606                ))
1607            }
1608            ScalarValue::TimestampMillisecond(Some(v), tz) => {
1609                Ok(ScalarValue::TimestampMillisecond(
1610                    Some(neg_checked_with_ctx(*v, || {
1611                        format!("In negation of TimestampMilliSecond({v})")
1612                    })?),
1613                    tz.clone(),
1614                ))
1615            }
1616            value => _internal_err!(
1617                "Can not run arithmetic negative on scalar value {value:?}"
1618            ),
1619        }
1620    }
1621
1622    /// Wrapping addition of `ScalarValue`
1623    ///
1624    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1625    /// should operate on Arrays directly, using vectorized array kernels
1626    pub fn add<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1627        let r = add_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1628        Self::try_from_array(r.as_ref(), 0)
1629    }
1630    /// Checked addition of `ScalarValue`
1631    ///
1632    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1633    /// should operate on Arrays directly, using vectorized array kernels
1634    pub fn add_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1635        let r = add(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1636        Self::try_from_array(r.as_ref(), 0)
1637    }
1638
1639    /// Wrapping subtraction of `ScalarValue`
1640    ///
1641    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1642    /// should operate on Arrays directly, using vectorized array kernels
1643    pub fn sub<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1644        let r = sub_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1645        Self::try_from_array(r.as_ref(), 0)
1646    }
1647
1648    /// Checked subtraction of `ScalarValue`
1649    ///
1650    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1651    /// should operate on Arrays directly, using vectorized array kernels
1652    pub fn sub_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1653        let r = sub(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1654        Self::try_from_array(r.as_ref(), 0)
1655    }
1656
1657    /// Wrapping multiplication of `ScalarValue`
1658    ///
1659    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1660    /// should operate on Arrays directly, using vectorized array kernels.
1661    pub fn mul<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1662        let r = mul_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1663        Self::try_from_array(r.as_ref(), 0)
1664    }
1665
1666    /// Checked multiplication of `ScalarValue`
1667    ///
1668    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1669    /// should operate on Arrays directly, using vectorized array kernels.
1670    pub fn mul_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1671        let r = mul(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1672        Self::try_from_array(r.as_ref(), 0)
1673    }
1674
1675    /// Performs `lhs / rhs`
1676    ///
1677    /// Overflow or division by zero will result in an error, with exception to
1678    /// floating point numbers, which instead follow the IEEE 754 rules.
1679    ///
1680    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1681    /// should operate on Arrays directly, using vectorized array kernels.
1682    pub fn div<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1683        let r = div(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1684        Self::try_from_array(r.as_ref(), 0)
1685    }
1686
1687    /// Performs `lhs % rhs`
1688    ///
1689    /// Overflow or division by zero will result in an error, with exception to
1690    /// floating point numbers, which instead follow the IEEE 754 rules.
1691    ///
1692    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
1693    /// should operate on Arrays directly, using vectorized array kernels.
1694    pub fn rem<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
1695        let r = rem(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
1696        Self::try_from_array(r.as_ref(), 0)
1697    }
1698
1699    pub fn is_unsigned(&self) -> bool {
1700        matches!(
1701            self,
1702            ScalarValue::UInt8(_)
1703                | ScalarValue::UInt16(_)
1704                | ScalarValue::UInt32(_)
1705                | ScalarValue::UInt64(_)
1706        )
1707    }
1708
1709    /// whether this value is null or not.
1710    pub fn is_null(&self) -> bool {
1711        match self {
1712            ScalarValue::Boolean(v) => v.is_none(),
1713            ScalarValue::Null => true,
1714            ScalarValue::Float16(v) => v.is_none(),
1715            ScalarValue::Float32(v) => v.is_none(),
1716            ScalarValue::Float64(v) => v.is_none(),
1717            ScalarValue::Decimal128(v, _, _) => v.is_none(),
1718            ScalarValue::Decimal256(v, _, _) => v.is_none(),
1719            ScalarValue::Int8(v) => v.is_none(),
1720            ScalarValue::Int16(v) => v.is_none(),
1721            ScalarValue::Int32(v) => v.is_none(),
1722            ScalarValue::Int64(v) => v.is_none(),
1723            ScalarValue::UInt8(v) => v.is_none(),
1724            ScalarValue::UInt16(v) => v.is_none(),
1725            ScalarValue::UInt32(v) => v.is_none(),
1726            ScalarValue::UInt64(v) => v.is_none(),
1727            ScalarValue::Utf8(v)
1728            | ScalarValue::Utf8View(v)
1729            | ScalarValue::LargeUtf8(v) => v.is_none(),
1730            ScalarValue::Binary(v)
1731            | ScalarValue::BinaryView(v)
1732            | ScalarValue::FixedSizeBinary(_, v)
1733            | ScalarValue::LargeBinary(v) => v.is_none(),
1734            // arr.len() should be 1 for a list scalar, but we don't seem to
1735            // enforce that anywhere, so we still check against array length.
1736            ScalarValue::List(arr) => arr.len() == arr.null_count(),
1737            ScalarValue::LargeList(arr) => arr.len() == arr.null_count(),
1738            ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(),
1739            ScalarValue::Struct(arr) => arr.len() == arr.null_count(),
1740            ScalarValue::Map(arr) => arr.len() == arr.null_count(),
1741            ScalarValue::Date32(v) => v.is_none(),
1742            ScalarValue::Date64(v) => v.is_none(),
1743            ScalarValue::Time32Second(v) => v.is_none(),
1744            ScalarValue::Time32Millisecond(v) => v.is_none(),
1745            ScalarValue::Time64Microsecond(v) => v.is_none(),
1746            ScalarValue::Time64Nanosecond(v) => v.is_none(),
1747            ScalarValue::TimestampSecond(v, _) => v.is_none(),
1748            ScalarValue::TimestampMillisecond(v, _) => v.is_none(),
1749            ScalarValue::TimestampMicrosecond(v, _) => v.is_none(),
1750            ScalarValue::TimestampNanosecond(v, _) => v.is_none(),
1751            ScalarValue::IntervalYearMonth(v) => v.is_none(),
1752            ScalarValue::IntervalDayTime(v) => v.is_none(),
1753            ScalarValue::IntervalMonthDayNano(v) => v.is_none(),
1754            ScalarValue::DurationSecond(v) => v.is_none(),
1755            ScalarValue::DurationMillisecond(v) => v.is_none(),
1756            ScalarValue::DurationMicrosecond(v) => v.is_none(),
1757            ScalarValue::DurationNanosecond(v) => v.is_none(),
1758            ScalarValue::Union(v, _, _) => match v {
1759                Some((_, s)) => s.is_null(),
1760                None => true,
1761            },
1762            ScalarValue::Dictionary(_, v) => v.is_null(),
1763        }
1764    }
1765
1766    /// Absolute distance between two numeric values (of the same type). This method will return
1767    /// None if either one of the arguments are null. It might also return None if the resulting
1768    /// distance is greater than [`usize::MAX`]. If the type is a float, then the distance will be
1769    /// rounded to the nearest integer.
1770    ///
1771    ///
1772    /// Note: the datatype itself must support subtraction.
1773    pub fn distance(&self, other: &ScalarValue) -> Option<usize> {
1774        match (self, other) {
1775            (Self::Int8(Some(l)), Self::Int8(Some(r))) => Some(l.abs_diff(*r) as _),
1776            (Self::Int16(Some(l)), Self::Int16(Some(r))) => Some(l.abs_diff(*r) as _),
1777            (Self::Int32(Some(l)), Self::Int32(Some(r))) => Some(l.abs_diff(*r) as _),
1778            (Self::Int64(Some(l)), Self::Int64(Some(r))) => Some(l.abs_diff(*r) as _),
1779            (Self::UInt8(Some(l)), Self::UInt8(Some(r))) => Some(l.abs_diff(*r) as _),
1780            (Self::UInt16(Some(l)), Self::UInt16(Some(r))) => Some(l.abs_diff(*r) as _),
1781            (Self::UInt32(Some(l)), Self::UInt32(Some(r))) => Some(l.abs_diff(*r) as _),
1782            (Self::UInt64(Some(l)), Self::UInt64(Some(r))) => Some(l.abs_diff(*r) as _),
1783            // TODO: we might want to look into supporting ceil/floor here for floats.
1784            (Self::Float16(Some(l)), Self::Float16(Some(r))) => {
1785                Some((f16::to_f32(*l) - f16::to_f32(*r)).abs().round() as _)
1786            }
1787            (Self::Float32(Some(l)), Self::Float32(Some(r))) => {
1788                Some((l - r).abs().round() as _)
1789            }
1790            (Self::Float64(Some(l)), Self::Float64(Some(r))) => {
1791                Some((l - r).abs().round() as _)
1792            }
1793            _ => None,
1794        }
1795    }
1796
1797    /// Converts a scalar value into an 1-row array.
1798    ///
1799    /// # Errors
1800    ///
1801    /// Errors if the ScalarValue cannot be converted into a 1-row array
1802    pub fn to_array(&self) -> Result<ArrayRef> {
1803        self.to_array_of_size(1)
1804    }
1805
1806    /// Converts a scalar into an arrow [`Scalar`] (which implements
1807    /// the [`Datum`] interface).
1808    ///
1809    /// This can be used to call arrow compute kernels such as `lt`
1810    ///
1811    /// # Errors
1812    ///
1813    /// Errors if the ScalarValue cannot be converted into a 1-row array
1814    ///
1815    /// # Example
1816    /// ```
1817    /// use datafusion_common::ScalarValue;
1818    /// use arrow::array::{BooleanArray, Int32Array};
1819    ///
1820    /// let arr = Int32Array::from(vec![Some(1), None, Some(10)]);
1821    /// let five = ScalarValue::Int32(Some(5));
1822    ///
1823    /// let result = arrow::compute::kernels::cmp::lt(
1824    ///   &arr,
1825    ///   &five.to_scalar().unwrap(),
1826    /// ).unwrap();
1827    ///
1828    /// let expected = BooleanArray::from(vec![
1829    ///     Some(true),
1830    ///     None,
1831    ///     Some(false)
1832    ///   ]
1833    /// );
1834    ///
1835    /// assert_eq!(&result, &expected);
1836    /// ```
1837    /// [`Datum`]: arrow::array::Datum
1838    pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
1839        Ok(Scalar::new(self.to_array_of_size(1)?))
1840    }
1841
1842    /// Converts an iterator of references [`ScalarValue`] into an [`ArrayRef`]
1843    /// corresponding to those values. For example, an iterator of
1844    /// [`ScalarValue::Int32`] would be converted to an [`Int32Array`].
1845    ///
1846    /// Returns an error if the iterator is empty or if the
1847    /// [`ScalarValue`]s are not all the same type
1848    ///
1849    /// # Example
1850    /// ```
1851    /// use datafusion_common::ScalarValue;
1852    /// use arrow::array::{ArrayRef, BooleanArray};
1853    ///
1854    /// let scalars = vec![
1855    ///   ScalarValue::Boolean(Some(true)),
1856    ///   ScalarValue::Boolean(None),
1857    ///   ScalarValue::Boolean(Some(false)),
1858    /// ];
1859    ///
1860    /// // Build an Array from the list of ScalarValues
1861    /// let array = ScalarValue::iter_to_array(scalars.into_iter())
1862    ///   .unwrap();
1863    ///
1864    /// let expected: ArrayRef = std::sync::Arc::new(
1865    ///   BooleanArray::from(vec![
1866    ///     Some(true),
1867    ///     None,
1868    ///     Some(false)
1869    ///   ]
1870    /// ));
1871    ///
1872    /// assert_eq!(&array, &expected);
1873    /// ```
1874    pub fn iter_to_array(
1875        scalars: impl IntoIterator<Item = ScalarValue>,
1876    ) -> Result<ArrayRef> {
1877        let mut scalars = scalars.into_iter().peekable();
1878
1879        // figure out the type based on the first element
1880        let data_type = match scalars.peek() {
1881            None => {
1882                return _exec_err!("Empty iterator passed to ScalarValue::iter_to_array");
1883            }
1884            Some(sv) => sv.data_type(),
1885        };
1886
1887        /// Creates an array of $ARRAY_TY by unpacking values of
1888        /// SCALAR_TY for primitive types
1889        macro_rules! build_array_primitive {
1890            ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
1891                {
1892                    let array = scalars.map(|sv| {
1893                        if let ScalarValue::$SCALAR_TY(v) = sv {
1894                            Ok(v)
1895                        } else {
1896                            _exec_err!(
1897                                "Inconsistent types in ScalarValue::iter_to_array. \
1898                                    Expected {:?}, got {:?}",
1899                                data_type, sv
1900                            )
1901                        }
1902                    })
1903                    .collect::<Result<$ARRAY_TY>>()?;
1904                    Arc::new(array)
1905                }
1906            }};
1907        }
1908
1909        macro_rules! build_array_primitive_tz {
1910            ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
1911                {
1912                    let array = scalars.map(|sv| {
1913                        if let ScalarValue::$SCALAR_TY(v, _) = sv {
1914                            Ok(v)
1915                        } else {
1916                            _exec_err!(
1917                                "Inconsistent types in ScalarValue::iter_to_array. \
1918                                    Expected {:?}, got {:?}",
1919                                data_type, sv
1920                            )
1921                        }
1922                    })
1923                    .collect::<Result<$ARRAY_TY>>()?;
1924                    Arc::new(array.with_timezone_opt($TZ.clone()))
1925                }
1926            }};
1927        }
1928
1929        /// Creates an array of $ARRAY_TY by unpacking values of
1930        /// SCALAR_TY for "string-like" types.
1931        macro_rules! build_array_string {
1932            ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
1933                {
1934                    let array = scalars.map(|sv| {
1935                        if let ScalarValue::$SCALAR_TY(v) = sv {
1936                            Ok(v)
1937                        } else {
1938                            _exec_err!(
1939                                "Inconsistent types in ScalarValue::iter_to_array. \
1940                                    Expected {:?}, got {:?}",
1941                                data_type, sv
1942                            )
1943                        }
1944                    })
1945                    .collect::<Result<$ARRAY_TY>>()?;
1946                    Arc::new(array)
1947                }
1948            }};
1949        }
1950
1951        let array: ArrayRef = match &data_type {
1952            DataType::Decimal128(precision, scale) => {
1953                let decimal_array =
1954                    ScalarValue::iter_to_decimal_array(scalars, *precision, *scale)?;
1955                Arc::new(decimal_array)
1956            }
1957            DataType::Decimal256(precision, scale) => {
1958                let decimal_array =
1959                    ScalarValue::iter_to_decimal256_array(scalars, *precision, *scale)?;
1960                Arc::new(decimal_array)
1961            }
1962            DataType::Null => ScalarValue::iter_to_null_array(scalars)?,
1963            DataType::Boolean => build_array_primitive!(BooleanArray, Boolean),
1964            DataType::Float16 => build_array_primitive!(Float16Array, Float16),
1965            DataType::Float32 => build_array_primitive!(Float32Array, Float32),
1966            DataType::Float64 => build_array_primitive!(Float64Array, Float64),
1967            DataType::Int8 => build_array_primitive!(Int8Array, Int8),
1968            DataType::Int16 => build_array_primitive!(Int16Array, Int16),
1969            DataType::Int32 => build_array_primitive!(Int32Array, Int32),
1970            DataType::Int64 => build_array_primitive!(Int64Array, Int64),
1971            DataType::UInt8 => build_array_primitive!(UInt8Array, UInt8),
1972            DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
1973            DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
1974            DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
1975            DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
1976            DataType::Utf8 => build_array_string!(StringArray, Utf8),
1977            DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
1978            DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView),
1979            DataType::Binary => build_array_string!(BinaryArray, Binary),
1980            DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
1981            DataType::Date32 => build_array_primitive!(Date32Array, Date32),
1982            DataType::Date64 => build_array_primitive!(Date64Array, Date64),
1983            DataType::Time32(TimeUnit::Second) => {
1984                build_array_primitive!(Time32SecondArray, Time32Second)
1985            }
1986            DataType::Time32(TimeUnit::Millisecond) => {
1987                build_array_primitive!(Time32MillisecondArray, Time32Millisecond)
1988            }
1989            DataType::Time64(TimeUnit::Microsecond) => {
1990                build_array_primitive!(Time64MicrosecondArray, Time64Microsecond)
1991            }
1992            DataType::Time64(TimeUnit::Nanosecond) => {
1993                build_array_primitive!(Time64NanosecondArray, Time64Nanosecond)
1994            }
1995            DataType::Timestamp(TimeUnit::Second, tz) => {
1996                build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz)
1997            }
1998            DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1999                build_array_primitive_tz!(
2000                    TimestampMillisecondArray,
2001                    TimestampMillisecond,
2002                    tz
2003                )
2004            }
2005            DataType::Timestamp(TimeUnit::Microsecond, tz) => {
2006                build_array_primitive_tz!(
2007                    TimestampMicrosecondArray,
2008                    TimestampMicrosecond,
2009                    tz
2010                )
2011            }
2012            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
2013                build_array_primitive_tz!(
2014                    TimestampNanosecondArray,
2015                    TimestampNanosecond,
2016                    tz
2017                )
2018            }
2019            DataType::Duration(TimeUnit::Second) => {
2020                build_array_primitive!(DurationSecondArray, DurationSecond)
2021            }
2022            DataType::Duration(TimeUnit::Millisecond) => {
2023                build_array_primitive!(DurationMillisecondArray, DurationMillisecond)
2024            }
2025            DataType::Duration(TimeUnit::Microsecond) => {
2026                build_array_primitive!(DurationMicrosecondArray, DurationMicrosecond)
2027            }
2028            DataType::Duration(TimeUnit::Nanosecond) => {
2029                build_array_primitive!(DurationNanosecondArray, DurationNanosecond)
2030            }
2031            DataType::Interval(IntervalUnit::DayTime) => {
2032                build_array_primitive!(IntervalDayTimeArray, IntervalDayTime)
2033            }
2034            DataType::Interval(IntervalUnit::YearMonth) => {
2035                build_array_primitive!(IntervalYearMonthArray, IntervalYearMonth)
2036            }
2037            DataType::Interval(IntervalUnit::MonthDayNano) => {
2038                build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano)
2039            }
2040            DataType::FixedSizeList(_, _) => {
2041                // arrow::compute::concat does not allow inconsistent types including the size of FixedSizeList.
2042                // The length of nulls here we got is 1, so we need to resize the length of nulls to
2043                // the length of non-nulls.
2044                let mut arrays =
2045                    scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2046                let first_non_null_data_type = arrays
2047                    .iter()
2048                    .find(|sv| !sv.is_null(0))
2049                    .map(|sv| sv.data_type().to_owned());
2050                if let Some(DataType::FixedSizeList(f, l)) = first_non_null_data_type {
2051                    for array in arrays.iter_mut() {
2052                        if array.is_null(0) {
2053                            *array = Arc::new(FixedSizeListArray::new_null(
2054                                Arc::clone(&f),
2055                                l,
2056                                1,
2057                            ));
2058                        }
2059                    }
2060                }
2061                let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2062                arrow::compute::concat(arrays.as_slice())?
2063            }
2064            DataType::List(_)
2065            | DataType::LargeList(_)
2066            | DataType::Map(_, _)
2067            | DataType::Struct(_)
2068            | DataType::Union(_, _) => {
2069                let arrays = scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2070                let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2071                arrow::compute::concat(arrays.as_slice())?
2072            }
2073            DataType::Dictionary(key_type, value_type) => {
2074                // create the values array
2075                let value_scalars = scalars
2076                    .map(|scalar| match scalar {
2077                        ScalarValue::Dictionary(inner_key_type, scalar) => {
2078                            if &inner_key_type == key_type {
2079                                Ok(*scalar)
2080                            } else {
2081                                _exec_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})")
2082                            }
2083                        }
2084                        _ => {
2085                            _exec_err!(
2086                                "Expected scalar of type {value_type} but found: {scalar} {scalar:?}"
2087                            )
2088                        }
2089                    })
2090                    .collect::<Result<Vec<_>>>()?;
2091
2092                let values = Self::iter_to_array(value_scalars)?;
2093                assert_eq!(values.data_type(), value_type.as_ref());
2094
2095                match key_type.as_ref() {
2096                    DataType::Int8 => dict_from_values::<Int8Type>(values)?,
2097                    DataType::Int16 => dict_from_values::<Int16Type>(values)?,
2098                    DataType::Int32 => dict_from_values::<Int32Type>(values)?,
2099                    DataType::Int64 => dict_from_values::<Int64Type>(values)?,
2100                    DataType::UInt8 => dict_from_values::<UInt8Type>(values)?,
2101                    DataType::UInt16 => dict_from_values::<UInt16Type>(values)?,
2102                    DataType::UInt32 => dict_from_values::<UInt32Type>(values)?,
2103                    DataType::UInt64 => dict_from_values::<UInt64Type>(values)?,
2104                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2105                }
2106            }
2107            DataType::FixedSizeBinary(size) => {
2108                let array = scalars
2109                    .map(|sv| {
2110                        if let ScalarValue::FixedSizeBinary(_, v) = sv {
2111                            Ok(v)
2112                        } else {
2113                            _exec_err!(
2114                                "Inconsistent types in ScalarValue::iter_to_array. \
2115                                Expected {data_type:?}, got {sv:?}"
2116                            )
2117                        }
2118                    })
2119                    .collect::<Result<Vec<_>>>()?;
2120                let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2121                    array.into_iter(),
2122                    *size,
2123                )?;
2124                Arc::new(array)
2125            }
2126            // explicitly enumerate unsupported types so newly added
2127            // types must be aknowledged, Time32 and Time64 types are
2128            // not supported if the TimeUnit is not valid (Time32 can
2129            // only be used with Second and Millisecond, Time64 only
2130            // with Microsecond and Nanosecond)
2131            DataType::Time32(TimeUnit::Microsecond)
2132            | DataType::Time32(TimeUnit::Nanosecond)
2133            | DataType::Time64(TimeUnit::Second)
2134            | DataType::Time64(TimeUnit::Millisecond)
2135            | DataType::RunEndEncoded(_, _)
2136            | DataType::ListView(_)
2137            | DataType::LargeListView(_) => {
2138                return _not_impl_err!(
2139                    "Unsupported creation of {:?} array from ScalarValue {:?}",
2140                    data_type,
2141                    scalars.peek()
2142                );
2143            }
2144        };
2145        Ok(array)
2146    }
2147
2148    fn iter_to_null_array(
2149        scalars: impl IntoIterator<Item = ScalarValue>,
2150    ) -> Result<ArrayRef> {
2151        let length = scalars.into_iter().try_fold(
2152            0usize,
2153            |r, element: ScalarValue| match element {
2154                ScalarValue::Null => Ok::<usize, DataFusionError>(r + 1),
2155                s => {
2156                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2157                }
2158            },
2159        )?;
2160        Ok(new_null_array(&DataType::Null, length))
2161    }
2162
2163    fn iter_to_decimal_array(
2164        scalars: impl IntoIterator<Item = ScalarValue>,
2165        precision: u8,
2166        scale: i8,
2167    ) -> Result<Decimal128Array> {
2168        let array = scalars
2169            .into_iter()
2170            .map(|element: ScalarValue| match element {
2171                ScalarValue::Decimal128(v1, _, _) => Ok(v1),
2172                s => {
2173                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2174                }
2175            })
2176            .collect::<Result<Decimal128Array>>()?
2177            .with_precision_and_scale(precision, scale)?;
2178        Ok(array)
2179    }
2180
2181    fn iter_to_decimal256_array(
2182        scalars: impl IntoIterator<Item = ScalarValue>,
2183        precision: u8,
2184        scale: i8,
2185    ) -> Result<Decimal256Array> {
2186        let array = scalars
2187            .into_iter()
2188            .map(|element: ScalarValue| match element {
2189                ScalarValue::Decimal256(v1, _, _) => Ok(v1),
2190                s => {
2191                    _internal_err!(
2192                        "Expected ScalarValue::Decimal256 element. Received {s:?}"
2193                    )
2194                }
2195            })
2196            .collect::<Result<Decimal256Array>>()?
2197            .with_precision_and_scale(precision, scale)?;
2198        Ok(array)
2199    }
2200
2201    fn build_decimal_array(
2202        value: Option<i128>,
2203        precision: u8,
2204        scale: i8,
2205        size: usize,
2206    ) -> Result<Decimal128Array> {
2207        Ok(match value {
2208            Some(val) => Decimal128Array::from(vec![val; size])
2209                .with_precision_and_scale(precision, scale)?,
2210            None => {
2211                let mut builder = Decimal128Array::builder(size)
2212                    .with_precision_and_scale(precision, scale)?;
2213                builder.append_nulls(size);
2214                builder.finish()
2215            }
2216        })
2217    }
2218
2219    fn build_decimal256_array(
2220        value: Option<i256>,
2221        precision: u8,
2222        scale: i8,
2223        size: usize,
2224    ) -> Result<Decimal256Array> {
2225        Ok(repeat_n(value, size)
2226            .collect::<Decimal256Array>()
2227            .with_precision_and_scale(precision, scale)?)
2228    }
2229
2230    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
2231    /// `data_type`, to a single element [`ListArray`].
2232    ///
2233    /// Example
2234    /// ```
2235    /// use datafusion_common::ScalarValue;
2236    /// use arrow::array::{ListArray, Int32Array};
2237    /// use arrow::datatypes::{DataType, Int32Type};
2238    /// use datafusion_common::cast::as_list_array;
2239    ///
2240    /// let scalars = vec![
2241    ///    ScalarValue::Int32(Some(1)),
2242    ///    ScalarValue::Int32(None),
2243    ///    ScalarValue::Int32(Some(2))
2244    /// ];
2245    ///
2246    /// let result = ScalarValue::new_list(&scalars, &DataType::Int32, true);
2247    ///
2248    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(
2249    ///     vec![
2250    ///        Some(vec![Some(1), None, Some(2)])
2251    ///     ]);
2252    ///
2253    /// assert_eq!(*result, expected);
2254    /// ```
2255    pub fn new_list(
2256        values: &[ScalarValue],
2257        data_type: &DataType,
2258        nullable: bool,
2259    ) -> Arc<ListArray> {
2260        let values = if values.is_empty() {
2261            new_empty_array(data_type)
2262        } else {
2263            Self::iter_to_array(values.iter().cloned()).unwrap()
2264        };
2265        Arc::new(
2266            SingleRowListArrayBuilder::new(values)
2267                .with_nullable(nullable)
2268                .build_list_array(),
2269        )
2270    }
2271
2272    /// Same as [`ScalarValue::new_list`] but with nullable set to true.
2273    pub fn new_list_nullable(
2274        values: &[ScalarValue],
2275        data_type: &DataType,
2276    ) -> Arc<ListArray> {
2277        Self::new_list(values, data_type, true)
2278    }
2279
2280    /// Create ListArray with Null with specific data type
2281    ///
2282    /// - new_null_list(i32, nullable, 1): `ListArray[NULL]`
2283    pub fn new_null_list(data_type: DataType, nullable: bool, null_len: usize) -> Self {
2284        let data_type = DataType::List(Field::new_list_field(data_type, nullable).into());
2285        Self::List(Arc::new(ListArray::from(ArrayData::new_null(
2286            &data_type, null_len,
2287        ))))
2288    }
2289
2290    /// Converts `IntoIterator<Item = ScalarValue>` where each element has type corresponding to
2291    /// `data_type`, to a [`ListArray`].
2292    ///
2293    /// Example
2294    /// ```
2295    /// use datafusion_common::ScalarValue;
2296    /// use arrow::array::{ListArray, Int32Array};
2297    /// use arrow::datatypes::{DataType, Int32Type};
2298    /// use datafusion_common::cast::as_list_array;
2299    ///
2300    /// let scalars = vec![
2301    ///    ScalarValue::Int32(Some(1)),
2302    ///    ScalarValue::Int32(None),
2303    ///    ScalarValue::Int32(Some(2))
2304    /// ];
2305    ///
2306    /// let result = ScalarValue::new_list_from_iter(scalars.into_iter(), &DataType::Int32, true);
2307    ///
2308    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(
2309    ///     vec![
2310    ///        Some(vec![Some(1), None, Some(2)])
2311    ///     ]);
2312    ///
2313    /// assert_eq!(*result, expected);
2314    /// ```
2315    pub fn new_list_from_iter(
2316        values: impl IntoIterator<Item = ScalarValue> + ExactSizeIterator,
2317        data_type: &DataType,
2318        nullable: bool,
2319    ) -> Arc<ListArray> {
2320        let values = if values.len() == 0 {
2321            new_empty_array(data_type)
2322        } else {
2323            Self::iter_to_array(values).unwrap()
2324        };
2325        Arc::new(
2326            SingleRowListArrayBuilder::new(values)
2327                .with_nullable(nullable)
2328                .build_list_array(),
2329        )
2330    }
2331
2332    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
2333    /// `data_type`, to a [`LargeListArray`].
2334    ///
2335    /// Example
2336    /// ```
2337    /// use datafusion_common::ScalarValue;
2338    /// use arrow::array::{LargeListArray, Int32Array};
2339    /// use arrow::datatypes::{DataType, Int32Type};
2340    /// use datafusion_common::cast::as_large_list_array;
2341    ///
2342    /// let scalars = vec![
2343    ///    ScalarValue::Int32(Some(1)),
2344    ///    ScalarValue::Int32(None),
2345    ///    ScalarValue::Int32(Some(2))
2346    /// ];
2347    ///
2348    /// let result = ScalarValue::new_large_list(&scalars, &DataType::Int32);
2349    ///
2350    /// let expected = LargeListArray::from_iter_primitive::<Int32Type, _, _>(
2351    ///     vec![
2352    ///        Some(vec![Some(1), None, Some(2)])
2353    ///     ]);
2354    ///
2355    /// assert_eq!(*result, expected);
2356    /// ```
2357    pub fn new_large_list(
2358        values: &[ScalarValue],
2359        data_type: &DataType,
2360    ) -> Arc<LargeListArray> {
2361        let values = if values.is_empty() {
2362            new_empty_array(data_type)
2363        } else {
2364            Self::iter_to_array(values.iter().cloned()).unwrap()
2365        };
2366        Arc::new(SingleRowListArrayBuilder::new(values).build_large_list_array())
2367    }
2368
2369    /// Converts a scalar value into an array of `size` rows.
2370    ///
2371    /// # Errors
2372    ///
2373    /// Errors if `self` is
2374    /// - a decimal that fails be converted to a decimal array of size
2375    /// - a `FixedsizeList` that fails to be concatenated into an array of size
2376    /// - a `List` that fails to be concatenated into an array of size
2377    /// - a `Dictionary` that fails be converted to a dictionary array of size
2378    pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
2379        Ok(match self {
2380            ScalarValue::Decimal128(e, precision, scale) => Arc::new(
2381                ScalarValue::build_decimal_array(*e, *precision, *scale, size)?,
2382            ),
2383            ScalarValue::Decimal256(e, precision, scale) => Arc::new(
2384                ScalarValue::build_decimal256_array(*e, *precision, *scale, size)?,
2385            ),
2386            ScalarValue::Boolean(e) => {
2387                Arc::new(BooleanArray::from(vec![*e; size])) as ArrayRef
2388            }
2389            ScalarValue::Float64(e) => {
2390                build_array_from_option!(Float64, Float64Array, e, size)
2391            }
2392            ScalarValue::Float32(e) => {
2393                build_array_from_option!(Float32, Float32Array, e, size)
2394            }
2395            ScalarValue::Float16(e) => {
2396                build_array_from_option!(Float16, Float16Array, e, size)
2397            }
2398            ScalarValue::Int8(e) => build_array_from_option!(Int8, Int8Array, e, size),
2399            ScalarValue::Int16(e) => build_array_from_option!(Int16, Int16Array, e, size),
2400            ScalarValue::Int32(e) => build_array_from_option!(Int32, Int32Array, e, size),
2401            ScalarValue::Int64(e) => build_array_from_option!(Int64, Int64Array, e, size),
2402            ScalarValue::UInt8(e) => build_array_from_option!(UInt8, UInt8Array, e, size),
2403            ScalarValue::UInt16(e) => {
2404                build_array_from_option!(UInt16, UInt16Array, e, size)
2405            }
2406            ScalarValue::UInt32(e) => {
2407                build_array_from_option!(UInt32, UInt32Array, e, size)
2408            }
2409            ScalarValue::UInt64(e) => {
2410                build_array_from_option!(UInt64, UInt64Array, e, size)
2411            }
2412            ScalarValue::TimestampSecond(e, tz_opt) => {
2413                build_timestamp_array_from_option!(
2414                    TimeUnit::Second,
2415                    tz_opt.clone(),
2416                    TimestampSecondArray,
2417                    e,
2418                    size
2419                )
2420            }
2421            ScalarValue::TimestampMillisecond(e, tz_opt) => {
2422                build_timestamp_array_from_option!(
2423                    TimeUnit::Millisecond,
2424                    tz_opt.clone(),
2425                    TimestampMillisecondArray,
2426                    e,
2427                    size
2428                )
2429            }
2430
2431            ScalarValue::TimestampMicrosecond(e, tz_opt) => {
2432                build_timestamp_array_from_option!(
2433                    TimeUnit::Microsecond,
2434                    tz_opt.clone(),
2435                    TimestampMicrosecondArray,
2436                    e,
2437                    size
2438                )
2439            }
2440            ScalarValue::TimestampNanosecond(e, tz_opt) => {
2441                build_timestamp_array_from_option!(
2442                    TimeUnit::Nanosecond,
2443                    tz_opt.clone(),
2444                    TimestampNanosecondArray,
2445                    e,
2446                    size
2447                )
2448            }
2449            ScalarValue::Utf8(e) => match e {
2450                Some(value) => {
2451                    Arc::new(StringArray::from_iter_values(repeat_n(value, size)))
2452                }
2453                None => new_null_array(&DataType::Utf8, size),
2454            },
2455            ScalarValue::Utf8View(e) => match e {
2456                Some(value) => {
2457                    Arc::new(StringViewArray::from_iter_values(repeat_n(value, size)))
2458                }
2459                None => new_null_array(&DataType::Utf8View, size),
2460            },
2461            ScalarValue::LargeUtf8(e) => match e {
2462                Some(value) => {
2463                    Arc::new(LargeStringArray::from_iter_values(repeat_n(value, size)))
2464                }
2465                None => new_null_array(&DataType::LargeUtf8, size),
2466            },
2467            ScalarValue::Binary(e) => match e {
2468                Some(value) => Arc::new(
2469                    repeat_n(Some(value.as_slice()), size).collect::<BinaryArray>(),
2470                ),
2471                None => Arc::new(repeat_n(None::<&str>, size).collect::<BinaryArray>()),
2472            },
2473            ScalarValue::BinaryView(e) => match e {
2474                Some(value) => Arc::new(
2475                    repeat_n(Some(value.as_slice()), size).collect::<BinaryViewArray>(),
2476                ),
2477                None => {
2478                    Arc::new(repeat_n(None::<&str>, size).collect::<BinaryViewArray>())
2479                }
2480            },
2481            ScalarValue::FixedSizeBinary(s, e) => match e {
2482                Some(value) => Arc::new(
2483                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2484                        repeat_n(Some(value.as_slice()), size),
2485                        *s,
2486                    )
2487                    .unwrap(),
2488                ),
2489                None => Arc::new(
2490                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2491                        repeat_n(None::<&[u8]>, size),
2492                        *s,
2493                    )
2494                    .unwrap(),
2495                ),
2496            },
2497            ScalarValue::LargeBinary(e) => match e {
2498                Some(value) => Arc::new(
2499                    repeat_n(Some(value.as_slice()), size).collect::<LargeBinaryArray>(),
2500                ),
2501                None => {
2502                    Arc::new(repeat_n(None::<&str>, size).collect::<LargeBinaryArray>())
2503                }
2504            },
2505            ScalarValue::List(arr) => {
2506                if size == 1 {
2507                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
2508                }
2509                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2510            }
2511            ScalarValue::LargeList(arr) => {
2512                if size == 1 {
2513                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
2514                }
2515                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2516            }
2517            ScalarValue::FixedSizeList(arr) => {
2518                if size == 1 {
2519                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
2520                }
2521                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2522            }
2523            ScalarValue::Struct(arr) => {
2524                if size == 1 {
2525                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
2526                }
2527                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2528            }
2529            ScalarValue::Map(arr) => {
2530                if size == 1 {
2531                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
2532                }
2533                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
2534            }
2535            ScalarValue::Date32(e) => {
2536                build_array_from_option!(Date32, Date32Array, e, size)
2537            }
2538            ScalarValue::Date64(e) => {
2539                build_array_from_option!(Date64, Date64Array, e, size)
2540            }
2541            ScalarValue::Time32Second(e) => {
2542                build_array_from_option!(
2543                    Time32,
2544                    TimeUnit::Second,
2545                    Time32SecondArray,
2546                    e,
2547                    size
2548                )
2549            }
2550            ScalarValue::Time32Millisecond(e) => {
2551                build_array_from_option!(
2552                    Time32,
2553                    TimeUnit::Millisecond,
2554                    Time32MillisecondArray,
2555                    e,
2556                    size
2557                )
2558            }
2559            ScalarValue::Time64Microsecond(e) => {
2560                build_array_from_option!(
2561                    Time64,
2562                    TimeUnit::Microsecond,
2563                    Time64MicrosecondArray,
2564                    e,
2565                    size
2566                )
2567            }
2568            ScalarValue::Time64Nanosecond(e) => {
2569                build_array_from_option!(
2570                    Time64,
2571                    TimeUnit::Nanosecond,
2572                    Time64NanosecondArray,
2573                    e,
2574                    size
2575                )
2576            }
2577            ScalarValue::IntervalDayTime(e) => build_array_from_option!(
2578                Interval,
2579                IntervalUnit::DayTime,
2580                IntervalDayTimeArray,
2581                e,
2582                size
2583            ),
2584            ScalarValue::IntervalYearMonth(e) => build_array_from_option!(
2585                Interval,
2586                IntervalUnit::YearMonth,
2587                IntervalYearMonthArray,
2588                e,
2589                size
2590            ),
2591            ScalarValue::IntervalMonthDayNano(e) => build_array_from_option!(
2592                Interval,
2593                IntervalUnit::MonthDayNano,
2594                IntervalMonthDayNanoArray,
2595                e,
2596                size
2597            ),
2598            ScalarValue::DurationSecond(e) => build_array_from_option!(
2599                Duration,
2600                TimeUnit::Second,
2601                DurationSecondArray,
2602                e,
2603                size
2604            ),
2605            ScalarValue::DurationMillisecond(e) => build_array_from_option!(
2606                Duration,
2607                TimeUnit::Millisecond,
2608                DurationMillisecondArray,
2609                e,
2610                size
2611            ),
2612            ScalarValue::DurationMicrosecond(e) => build_array_from_option!(
2613                Duration,
2614                TimeUnit::Microsecond,
2615                DurationMicrosecondArray,
2616                e,
2617                size
2618            ),
2619            ScalarValue::DurationNanosecond(e) => build_array_from_option!(
2620                Duration,
2621                TimeUnit::Nanosecond,
2622                DurationNanosecondArray,
2623                e,
2624                size
2625            ),
2626            ScalarValue::Union(value, fields, mode) => match value {
2627                Some((v_id, value)) => {
2628                    let mut new_fields = Vec::with_capacity(fields.len());
2629                    let mut child_arrays = Vec::<ArrayRef>::with_capacity(fields.len());
2630                    for (f_id, field) in fields.iter() {
2631                        let ar = if f_id == *v_id {
2632                            value.to_array_of_size(size)?
2633                        } else {
2634                            let dt = field.data_type();
2635                            match mode {
2636                                UnionMode::Sparse => new_null_array(dt, size),
2637                                // In a dense union, only the child with values needs to be
2638                                // allocated
2639                                UnionMode::Dense => new_null_array(dt, 0),
2640                            }
2641                        };
2642                        let field = (**field).clone();
2643                        child_arrays.push(ar);
2644                        new_fields.push(field.clone());
2645                    }
2646                    let type_ids = repeat_n(*v_id, size);
2647                    let type_ids = ScalarBuffer::<i8>::from_iter(type_ids);
2648                    let value_offsets = match mode {
2649                        UnionMode::Sparse => None,
2650                        UnionMode::Dense => Some(ScalarBuffer::from_iter(0..size as i32)),
2651                    };
2652                    let ar = UnionArray::try_new(
2653                        fields.clone(),
2654                        type_ids,
2655                        value_offsets,
2656                        child_arrays,
2657                    )
2658                    .map_err(|e| DataFusionError::ArrowError(Box::new(e), None))?;
2659                    Arc::new(ar)
2660                }
2661                None => {
2662                    let dt = self.data_type();
2663                    new_null_array(&dt, size)
2664                }
2665            },
2666            ScalarValue::Dictionary(key_type, v) => {
2667                // values array is one element long (the value)
2668                match key_type.as_ref() {
2669                    DataType::Int8 => dict_from_scalar::<Int8Type>(v, size)?,
2670                    DataType::Int16 => dict_from_scalar::<Int16Type>(v, size)?,
2671                    DataType::Int32 => dict_from_scalar::<Int32Type>(v, size)?,
2672                    DataType::Int64 => dict_from_scalar::<Int64Type>(v, size)?,
2673                    DataType::UInt8 => dict_from_scalar::<UInt8Type>(v, size)?,
2674                    DataType::UInt16 => dict_from_scalar::<UInt16Type>(v, size)?,
2675                    DataType::UInt32 => dict_from_scalar::<UInt32Type>(v, size)?,
2676                    DataType::UInt64 => dict_from_scalar::<UInt64Type>(v, size)?,
2677                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2678                }
2679            }
2680            ScalarValue::Null => new_null_array(&DataType::Null, size),
2681        })
2682    }
2683
2684    fn get_decimal_value_from_array(
2685        array: &dyn Array,
2686        index: usize,
2687        precision: u8,
2688        scale: i8,
2689    ) -> Result<ScalarValue> {
2690        match array.data_type() {
2691            DataType::Decimal128(_, _) => {
2692                let array = as_decimal128_array(array)?;
2693                if array.is_null(index) {
2694                    Ok(ScalarValue::Decimal128(None, precision, scale))
2695                } else {
2696                    let value = array.value(index);
2697                    Ok(ScalarValue::Decimal128(Some(value), precision, scale))
2698                }
2699            }
2700            DataType::Decimal256(_, _) => {
2701                let array = as_decimal256_array(array)?;
2702                if array.is_null(index) {
2703                    Ok(ScalarValue::Decimal256(None, precision, scale))
2704                } else {
2705                    let value = array.value(index);
2706                    Ok(ScalarValue::Decimal256(Some(value), precision, scale))
2707                }
2708            }
2709            _ => _internal_err!("Unsupported decimal type"),
2710        }
2711    }
2712
2713    fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
2714        let arrays = repeat_n(arr, size).collect::<Vec<_>>();
2715        let ret = match !arrays.is_empty() {
2716            true => arrow::compute::concat(arrays.as_slice())?,
2717            false => arr.slice(0, 0),
2718        };
2719        Ok(ret)
2720    }
2721
2722    /// Retrieve ScalarValue for each row in `array`
2723    ///
2724    /// Example 1: Array (ScalarValue::Int32)
2725    /// ```
2726    /// use datafusion_common::ScalarValue;
2727    /// use arrow::array::ListArray;
2728    /// use arrow::datatypes::{DataType, Int32Type};
2729    ///
2730    /// // Equivalent to [[1,2,3], [4,5]]
2731    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2732    ///    Some(vec![Some(1), Some(2), Some(3)]),
2733    ///    Some(vec![Some(4), Some(5)])
2734    /// ]);
2735    ///
2736    /// // Convert the array into Scalar Values for each row
2737    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
2738    ///
2739    /// let expected = vec![
2740    /// vec![
2741    ///     ScalarValue::Int32(Some(1)),
2742    ///     ScalarValue::Int32(Some(2)),
2743    ///     ScalarValue::Int32(Some(3)),
2744    /// ],
2745    /// vec![
2746    ///    ScalarValue::Int32(Some(4)),
2747    ///    ScalarValue::Int32(Some(5)),
2748    /// ],
2749    /// ];
2750    ///
2751    /// assert_eq!(scalar_vec, expected);
2752    /// ```
2753    ///
2754    /// Example 2: Nested array (ScalarValue::List)
2755    /// ```
2756    /// use datafusion_common::ScalarValue;
2757    /// use arrow::array::ListArray;
2758    /// use arrow::datatypes::{DataType, Int32Type};
2759    /// use datafusion_common::utils::SingleRowListArrayBuilder;
2760    /// use std::sync::Arc;
2761    ///
2762    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2763    ///    Some(vec![Some(1), Some(2), Some(3)]),
2764    ///    Some(vec![Some(4), Some(5)])
2765    /// ]);
2766    ///
2767    /// // Wrap into another layer of list, we got nested array as [ [[1,2,3], [4,5]] ]
2768    /// let list_arr = SingleRowListArrayBuilder::new(Arc::new(list_arr)).build_list_array();
2769    ///
2770    /// // Convert the array into Scalar Values for each row, we got 1D arrays in this example
2771    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
2772    ///
2773    /// let l1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2774    ///     Some(vec![Some(1), Some(2), Some(3)]),
2775    /// ]);
2776    /// let l2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
2777    ///     Some(vec![Some(4), Some(5)]),
2778    /// ]);
2779    ///
2780    /// let expected = vec![
2781    ///   vec![
2782    ///     ScalarValue::List(Arc::new(l1)),
2783    ///     ScalarValue::List(Arc::new(l2)),
2784    ///   ],
2785    /// ];
2786    ///
2787    /// assert_eq!(scalar_vec, expected);
2788    /// ```
2789    pub fn convert_array_to_scalar_vec(array: &dyn Array) -> Result<Vec<Vec<Self>>> {
2790        let mut scalars = Vec::with_capacity(array.len());
2791
2792        for index in 0..array.len() {
2793            let nested_array = array.as_list::<i32>().value(index);
2794            let scalar_values = (0..nested_array.len())
2795                .map(|i| ScalarValue::try_from_array(&nested_array, i))
2796                .collect::<Result<Vec<_>>>()?;
2797            scalars.push(scalar_values);
2798        }
2799
2800        Ok(scalars)
2801    }
2802
2803    #[deprecated(
2804        since = "46.0.0",
2805        note = "This function is obsolete. Use `to_array` instead"
2806    )]
2807    pub fn raw_data(&self) -> Result<ArrayRef> {
2808        match self {
2809            ScalarValue::List(arr) => Ok(arr.to_owned()),
2810            _ => _internal_err!("ScalarValue is not a list"),
2811        }
2812    }
2813
2814    /// Converts a value in `array` at `index` into a ScalarValue
2815    pub fn try_from_array(array: &dyn Array, index: usize) -> Result<Self> {
2816        // handle NULL value
2817        if !array.is_valid(index) {
2818            return array.data_type().try_into();
2819        }
2820
2821        Ok(match array.data_type() {
2822            DataType::Null => ScalarValue::Null,
2823            DataType::Decimal128(precision, scale) => {
2824                ScalarValue::get_decimal_value_from_array(
2825                    array, index, *precision, *scale,
2826                )?
2827            }
2828            DataType::Decimal256(precision, scale) => {
2829                ScalarValue::get_decimal_value_from_array(
2830                    array, index, *precision, *scale,
2831                )?
2832            }
2833            DataType::Boolean => typed_cast!(array, index, as_boolean_array, Boolean)?,
2834            DataType::Float64 => typed_cast!(array, index, as_float64_array, Float64)?,
2835            DataType::Float32 => typed_cast!(array, index, as_float32_array, Float32)?,
2836            DataType::Float16 => typed_cast!(array, index, as_float16_array, Float16)?,
2837            DataType::UInt64 => typed_cast!(array, index, as_uint64_array, UInt64)?,
2838            DataType::UInt32 => typed_cast!(array, index, as_uint32_array, UInt32)?,
2839            DataType::UInt16 => typed_cast!(array, index, as_uint16_array, UInt16)?,
2840            DataType::UInt8 => typed_cast!(array, index, as_uint8_array, UInt8)?,
2841            DataType::Int64 => typed_cast!(array, index, as_int64_array, Int64)?,
2842            DataType::Int32 => typed_cast!(array, index, as_int32_array, Int32)?,
2843            DataType::Int16 => typed_cast!(array, index, as_int16_array, Int16)?,
2844            DataType::Int8 => typed_cast!(array, index, as_int8_array, Int8)?,
2845            DataType::Binary => typed_cast!(array, index, as_binary_array, Binary)?,
2846            DataType::LargeBinary => {
2847                typed_cast!(array, index, as_large_binary_array, LargeBinary)?
2848            }
2849            DataType::BinaryView => {
2850                typed_cast!(array, index, as_binary_view_array, BinaryView)?
2851            }
2852            DataType::Utf8 => typed_cast!(array, index, as_string_array, Utf8)?,
2853            DataType::LargeUtf8 => {
2854                typed_cast!(array, index, as_large_string_array, LargeUtf8)?
2855            }
2856            DataType::Utf8View => {
2857                typed_cast!(array, index, as_string_view_array, Utf8View)?
2858            }
2859            DataType::List(field) => {
2860                let list_array = array.as_list::<i32>();
2861                let nested_array = list_array.value(index);
2862                // Produces a single element `ListArray` with the value at `index`.
2863                SingleRowListArrayBuilder::new(nested_array)
2864                    .with_field(field)
2865                    .build_list_scalar()
2866            }
2867            DataType::LargeList(field) => {
2868                let list_array = as_large_list_array(array)?;
2869                let nested_array = list_array.value(index);
2870                // Produces a single element `LargeListArray` with the value at `index`.
2871                SingleRowListArrayBuilder::new(nested_array)
2872                    .with_field(field)
2873                    .build_large_list_scalar()
2874            }
2875            // TODO: There is no test for FixedSizeList now, add it later
2876            DataType::FixedSizeList(field, _) => {
2877                let list_array = as_fixed_size_list_array(array)?;
2878                let nested_array = list_array.value(index);
2879                // Produces a single element `FixedSizeListArray` with the value at `index`.
2880                let list_size = nested_array.len();
2881                SingleRowListArrayBuilder::new(nested_array)
2882                    .with_field(field)
2883                    .build_fixed_size_list_scalar(list_size)
2884            }
2885            DataType::Date32 => typed_cast!(array, index, as_date32_array, Date32)?,
2886            DataType::Date64 => typed_cast!(array, index, as_date64_array, Date64)?,
2887            DataType::Time32(TimeUnit::Second) => {
2888                typed_cast!(array, index, as_time32_second_array, Time32Second)?
2889            }
2890            DataType::Time32(TimeUnit::Millisecond) => {
2891                typed_cast!(array, index, as_time32_millisecond_array, Time32Millisecond)?
2892            }
2893            DataType::Time64(TimeUnit::Microsecond) => {
2894                typed_cast!(array, index, as_time64_microsecond_array, Time64Microsecond)?
2895            }
2896            DataType::Time64(TimeUnit::Nanosecond) => {
2897                typed_cast!(array, index, as_time64_nanosecond_array, Time64Nanosecond)?
2898            }
2899            DataType::Timestamp(TimeUnit::Second, tz_opt) => typed_cast_tz!(
2900                array,
2901                index,
2902                as_timestamp_second_array,
2903                TimestampSecond,
2904                tz_opt
2905            )?,
2906            DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_cast_tz!(
2907                array,
2908                index,
2909                as_timestamp_millisecond_array,
2910                TimestampMillisecond,
2911                tz_opt
2912            )?,
2913            DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_cast_tz!(
2914                array,
2915                index,
2916                as_timestamp_microsecond_array,
2917                TimestampMicrosecond,
2918                tz_opt
2919            )?,
2920            DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_cast_tz!(
2921                array,
2922                index,
2923                as_timestamp_nanosecond_array,
2924                TimestampNanosecond,
2925                tz_opt
2926            )?,
2927            DataType::Dictionary(key_type, _) => {
2928                let (values_array, values_index) = match key_type.as_ref() {
2929                    DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
2930                    DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
2931                    DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
2932                    DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
2933                    DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
2934                    DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
2935                    DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
2936                    DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
2937                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
2938                };
2939                // look up the index in the values dictionary
2940                let value = match values_index {
2941                    Some(values_index) => {
2942                        ScalarValue::try_from_array(values_array, values_index)
2943                    }
2944                    // else entry was null, so return null
2945                    None => values_array.data_type().try_into(),
2946                }?;
2947
2948                Self::Dictionary(key_type.clone(), Box::new(value))
2949            }
2950            DataType::Struct(_) => {
2951                let a = array.slice(index, 1);
2952                Self::Struct(Arc::new(a.as_struct().to_owned()))
2953            }
2954            DataType::FixedSizeBinary(_) => {
2955                let array = as_fixed_size_binary_array(array)?;
2956                let size = match array.data_type() {
2957                    DataType::FixedSizeBinary(size) => *size,
2958                    _ => unreachable!(),
2959                };
2960                ScalarValue::FixedSizeBinary(
2961                    size,
2962                    match array.is_null(index) {
2963                        true => None,
2964                        false => Some(array.value(index).into()),
2965                    },
2966                )
2967            }
2968            DataType::Interval(IntervalUnit::DayTime) => {
2969                typed_cast!(array, index, as_interval_dt_array, IntervalDayTime)?
2970            }
2971            DataType::Interval(IntervalUnit::YearMonth) => {
2972                typed_cast!(array, index, as_interval_ym_array, IntervalYearMonth)?
2973            }
2974            DataType::Interval(IntervalUnit::MonthDayNano) => {
2975                typed_cast!(array, index, as_interval_mdn_array, IntervalMonthDayNano)?
2976            }
2977
2978            DataType::Duration(TimeUnit::Second) => {
2979                typed_cast!(array, index, as_duration_second_array, DurationSecond)?
2980            }
2981            DataType::Duration(TimeUnit::Millisecond) => typed_cast!(
2982                array,
2983                index,
2984                as_duration_millisecond_array,
2985                DurationMillisecond
2986            )?,
2987            DataType::Duration(TimeUnit::Microsecond) => typed_cast!(
2988                array,
2989                index,
2990                as_duration_microsecond_array,
2991                DurationMicrosecond
2992            )?,
2993            DataType::Duration(TimeUnit::Nanosecond) => typed_cast!(
2994                array,
2995                index,
2996                as_duration_nanosecond_array,
2997                DurationNanosecond
2998            )?,
2999            DataType::Map(_, _) => {
3000                let a = array.slice(index, 1);
3001                Self::Map(Arc::new(a.as_map().to_owned()))
3002            }
3003            DataType::Union(fields, mode) => {
3004                let array = as_union_array(array)?;
3005                let ti = array.type_id(index);
3006                let index = array.value_offset(index);
3007                let value = ScalarValue::try_from_array(array.child(ti), index)?;
3008                ScalarValue::Union(Some((ti, Box::new(value))), fields.clone(), *mode)
3009            }
3010            other => {
3011                return _not_impl_err!(
3012                    "Can't create a scalar from array of type \"{other:?}\""
3013                );
3014            }
3015        })
3016    }
3017
3018    /// Try to parse `value` into a ScalarValue of type `target_type`
3019    pub fn try_from_string(value: String, target_type: &DataType) -> Result<Self> {
3020        ScalarValue::from(value).cast_to(target_type)
3021    }
3022
3023    /// Returns the Some(`&str`) representation of `ScalarValue` of logical string type
3024    ///
3025    /// Returns `None` if this `ScalarValue` is not a logical string type or the
3026    /// `ScalarValue` represents the `NULL` value.
3027    ///
3028    /// Note you can use [`Option::flatten`] to check for non null logical
3029    /// strings.
3030    ///
3031    /// For example, [`ScalarValue::Utf8`], [`ScalarValue::LargeUtf8`], and
3032    /// [`ScalarValue::Dictionary`] with a logical string value and store
3033    /// strings and can be accessed as `&str` using this method.
3034    ///
3035    /// # Example: logical strings
3036    /// ```
3037    /// # use datafusion_common::ScalarValue;
3038    /// /// non strings return None
3039    /// let scalar = ScalarValue::from(42);
3040    /// assert_eq!(scalar.try_as_str(), None);
3041    /// // Non null logical string returns Some(Some(&str))
3042    /// let scalar = ScalarValue::from("hello");
3043    /// assert_eq!(scalar.try_as_str(), Some(Some("hello")));
3044    /// // Null logical string returns Some(None)
3045    /// let scalar = ScalarValue::Utf8(None);
3046    /// assert_eq!(scalar.try_as_str(), Some(None));
3047    /// ```
3048    ///
3049    /// # Example: use [`Option::flatten`] to check for non-null logical strings
3050    /// ```
3051    /// # use datafusion_common::ScalarValue;
3052    /// // Non null logical string returns Some(Some(&str))
3053    /// let scalar = ScalarValue::from("hello");
3054    /// assert_eq!(scalar.try_as_str().flatten(), Some("hello"));
3055    /// ```
3056    pub fn try_as_str(&self) -> Option<Option<&str>> {
3057        let v = match self {
3058            ScalarValue::Utf8(v) => v,
3059            ScalarValue::LargeUtf8(v) => v,
3060            ScalarValue::Utf8View(v) => v,
3061            ScalarValue::Dictionary(_, v) => return v.try_as_str(),
3062            _ => return None,
3063        };
3064        Some(v.as_ref().map(|v| v.as_str()))
3065    }
3066
3067    /// Try to cast this value to a ScalarValue of type `data_type`
3068    pub fn cast_to(&self, target_type: &DataType) -> Result<Self> {
3069        self.cast_to_with_options(target_type, &DEFAULT_CAST_OPTIONS)
3070    }
3071
3072    /// Try to cast this value to a ScalarValue of type `data_type` with [`CastOptions`]
3073    pub fn cast_to_with_options(
3074        &self,
3075        target_type: &DataType,
3076        cast_options: &CastOptions<'static>,
3077    ) -> Result<Self> {
3078        let scalar_array = match (self, target_type) {
3079            (
3080                ScalarValue::Decimal128(Some(decimal_value), _, scale),
3081                DataType::Timestamp(time_unit, None),
3082            ) => {
3083                let scale_factor = 10_i128.pow(*scale as u32);
3084                let seconds = decimal_value / scale_factor;
3085                let fraction = decimal_value % scale_factor;
3086
3087                let timestamp_value = match time_unit {
3088                    TimeUnit::Second => ScalarValue::Int64(Some(seconds as i64)),
3089                    TimeUnit::Millisecond => {
3090                        let millis = seconds * 1_000 + (fraction * 1_000) / scale_factor;
3091                        ScalarValue::Int64(Some(millis as i64))
3092                    }
3093                    TimeUnit::Microsecond => {
3094                        let micros =
3095                            seconds * 1_000_000 + (fraction * 1_000_000) / scale_factor;
3096                        ScalarValue::Int64(Some(micros as i64))
3097                    }
3098                    TimeUnit::Nanosecond => {
3099                        let nanos = seconds * 1_000_000_000
3100                            + (fraction * 1_000_000_000) / scale_factor;
3101                        ScalarValue::Int64(Some(nanos as i64))
3102                    }
3103                };
3104
3105                timestamp_value.to_array()?
3106            }
3107            _ => self.to_array()?,
3108        };
3109
3110        let cast_arr = cast_with_options(&scalar_array, target_type, cast_options)?;
3111        ScalarValue::try_from_array(&cast_arr, 0)
3112    }
3113
3114    fn eq_array_decimal(
3115        array: &ArrayRef,
3116        index: usize,
3117        value: Option<&i128>,
3118        precision: u8,
3119        scale: i8,
3120    ) -> Result<bool> {
3121        let array = as_decimal128_array(array)?;
3122        if array.precision() != precision || array.scale() != scale {
3123            return Ok(false);
3124        }
3125        let is_null = array.is_null(index);
3126        if let Some(v) = value {
3127            Ok(!array.is_null(index) && array.value(index) == *v)
3128        } else {
3129            Ok(is_null)
3130        }
3131    }
3132
3133    fn eq_array_decimal256(
3134        array: &ArrayRef,
3135        index: usize,
3136        value: Option<&i256>,
3137        precision: u8,
3138        scale: i8,
3139    ) -> Result<bool> {
3140        let array = as_decimal256_array(array)?;
3141        if array.precision() != precision || array.scale() != scale {
3142            return Ok(false);
3143        }
3144        let is_null = array.is_null(index);
3145        if let Some(v) = value {
3146            Ok(!array.is_null(index) && array.value(index) == *v)
3147        } else {
3148            Ok(is_null)
3149        }
3150    }
3151
3152    /// Compares a single row of array @ index for equality with self,
3153    /// in an optimized fashion.
3154    ///
3155    /// This method implements an optimized version of:
3156    ///
3157    /// ```text
3158    ///     let arr_scalar = Self::try_from_array(array, index).unwrap();
3159    ///     arr_scalar.eq(self)
3160    /// ```
3161    ///
3162    /// *Performance note*: the arrow compute kernels should be
3163    /// preferred over this function if at all possible as they can be
3164    /// vectorized and are generally much faster.
3165    ///
3166    /// This function has a few narrow use cases such as hash table key
3167    /// comparisons where comparing a single row at a time is necessary.
3168    ///
3169    /// # Errors
3170    ///
3171    /// Errors if
3172    /// - it fails to downcast `array` to the data type of `self`
3173    /// - `self` is a `Struct`
3174    ///
3175    /// # Panics
3176    ///
3177    /// Panics if `self` is a dictionary with invalid key type
3178    #[inline]
3179    pub fn eq_array(&self, array: &ArrayRef, index: usize) -> Result<bool> {
3180        Ok(match self {
3181            ScalarValue::Decimal128(v, precision, scale) => {
3182                ScalarValue::eq_array_decimal(
3183                    array,
3184                    index,
3185                    v.as_ref(),
3186                    *precision,
3187                    *scale,
3188                )?
3189            }
3190            ScalarValue::Decimal256(v, precision, scale) => {
3191                ScalarValue::eq_array_decimal256(
3192                    array,
3193                    index,
3194                    v.as_ref(),
3195                    *precision,
3196                    *scale,
3197                )?
3198            }
3199            ScalarValue::Boolean(val) => {
3200                eq_array_primitive!(array, index, as_boolean_array, val)?
3201            }
3202            ScalarValue::Float16(val) => {
3203                eq_array_primitive!(array, index, as_float16_array, val)?
3204            }
3205            ScalarValue::Float32(val) => {
3206                eq_array_primitive!(array, index, as_float32_array, val)?
3207            }
3208            ScalarValue::Float64(val) => {
3209                eq_array_primitive!(array, index, as_float64_array, val)?
3210            }
3211            ScalarValue::Int8(val) => {
3212                eq_array_primitive!(array, index, as_int8_array, val)?
3213            }
3214            ScalarValue::Int16(val) => {
3215                eq_array_primitive!(array, index, as_int16_array, val)?
3216            }
3217            ScalarValue::Int32(val) => {
3218                eq_array_primitive!(array, index, as_int32_array, val)?
3219            }
3220            ScalarValue::Int64(val) => {
3221                eq_array_primitive!(array, index, as_int64_array, val)?
3222            }
3223            ScalarValue::UInt8(val) => {
3224                eq_array_primitive!(array, index, as_uint8_array, val)?
3225            }
3226            ScalarValue::UInt16(val) => {
3227                eq_array_primitive!(array, index, as_uint16_array, val)?
3228            }
3229            ScalarValue::UInt32(val) => {
3230                eq_array_primitive!(array, index, as_uint32_array, val)?
3231            }
3232            ScalarValue::UInt64(val) => {
3233                eq_array_primitive!(array, index, as_uint64_array, val)?
3234            }
3235            ScalarValue::Utf8(val) => {
3236                eq_array_primitive!(array, index, as_string_array, val)?
3237            }
3238            ScalarValue::Utf8View(val) => {
3239                eq_array_primitive!(array, index, as_string_view_array, val)?
3240            }
3241            ScalarValue::LargeUtf8(val) => {
3242                eq_array_primitive!(array, index, as_large_string_array, val)?
3243            }
3244            ScalarValue::Binary(val) => {
3245                eq_array_primitive!(array, index, as_binary_array, val)?
3246            }
3247            ScalarValue::BinaryView(val) => {
3248                eq_array_primitive!(array, index, as_binary_view_array, val)?
3249            }
3250            ScalarValue::FixedSizeBinary(_, val) => {
3251                eq_array_primitive!(array, index, as_fixed_size_binary_array, val)?
3252            }
3253            ScalarValue::LargeBinary(val) => {
3254                eq_array_primitive!(array, index, as_large_binary_array, val)?
3255            }
3256            ScalarValue::List(arr) => {
3257                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3258            }
3259            ScalarValue::LargeList(arr) => {
3260                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3261            }
3262            ScalarValue::FixedSizeList(arr) => {
3263                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3264            }
3265            ScalarValue::Struct(arr) => {
3266                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3267            }
3268            ScalarValue::Map(arr) => {
3269                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
3270            }
3271            ScalarValue::Date32(val) => {
3272                eq_array_primitive!(array, index, as_date32_array, val)?
3273            }
3274            ScalarValue::Date64(val) => {
3275                eq_array_primitive!(array, index, as_date64_array, val)?
3276            }
3277            ScalarValue::Time32Second(val) => {
3278                eq_array_primitive!(array, index, as_time32_second_array, val)?
3279            }
3280            ScalarValue::Time32Millisecond(val) => {
3281                eq_array_primitive!(array, index, as_time32_millisecond_array, val)?
3282            }
3283            ScalarValue::Time64Microsecond(val) => {
3284                eq_array_primitive!(array, index, as_time64_microsecond_array, val)?
3285            }
3286            ScalarValue::Time64Nanosecond(val) => {
3287                eq_array_primitive!(array, index, as_time64_nanosecond_array, val)?
3288            }
3289            ScalarValue::TimestampSecond(val, _) => {
3290                eq_array_primitive!(array, index, as_timestamp_second_array, val)?
3291            }
3292            ScalarValue::TimestampMillisecond(val, _) => {
3293                eq_array_primitive!(array, index, as_timestamp_millisecond_array, val)?
3294            }
3295            ScalarValue::TimestampMicrosecond(val, _) => {
3296                eq_array_primitive!(array, index, as_timestamp_microsecond_array, val)?
3297            }
3298            ScalarValue::TimestampNanosecond(val, _) => {
3299                eq_array_primitive!(array, index, as_timestamp_nanosecond_array, val)?
3300            }
3301            ScalarValue::IntervalYearMonth(val) => {
3302                eq_array_primitive!(array, index, as_interval_ym_array, val)?
3303            }
3304            ScalarValue::IntervalDayTime(val) => {
3305                eq_array_primitive!(array, index, as_interval_dt_array, val)?
3306            }
3307            ScalarValue::IntervalMonthDayNano(val) => {
3308                eq_array_primitive!(array, index, as_interval_mdn_array, val)?
3309            }
3310            ScalarValue::DurationSecond(val) => {
3311                eq_array_primitive!(array, index, as_duration_second_array, val)?
3312            }
3313            ScalarValue::DurationMillisecond(val) => {
3314                eq_array_primitive!(array, index, as_duration_millisecond_array, val)?
3315            }
3316            ScalarValue::DurationMicrosecond(val) => {
3317                eq_array_primitive!(array, index, as_duration_microsecond_array, val)?
3318            }
3319            ScalarValue::DurationNanosecond(val) => {
3320                eq_array_primitive!(array, index, as_duration_nanosecond_array, val)?
3321            }
3322            ScalarValue::Union(value, _, _) => {
3323                let array = as_union_array(array)?;
3324                let ti = array.type_id(index);
3325                let index = array.value_offset(index);
3326                if let Some((ti_v, value)) = value {
3327                    ti_v == &ti && value.eq_array(array.child(ti), index)?
3328                } else {
3329                    array.child(ti).is_null(index)
3330                }
3331            }
3332            ScalarValue::Dictionary(key_type, v) => {
3333                let (values_array, values_index) = match key_type.as_ref() {
3334                    DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
3335                    DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
3336                    DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
3337                    DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
3338                    DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
3339                    DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
3340                    DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
3341                    DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
3342                    _ => unreachable!("Invalid dictionary keys type: {:?}", key_type),
3343                };
3344                // was the value in the array non null?
3345                match values_index {
3346                    Some(values_index) => v.eq_array(values_array, values_index)?,
3347                    None => v.is_null(),
3348                }
3349            }
3350            ScalarValue::Null => array.is_null(index),
3351        })
3352    }
3353
3354    fn eq_array_list(arr1: &ArrayRef, arr2: &ArrayRef, index: usize) -> bool {
3355        let right = arr2.slice(index, 1);
3356        arr1 == &right
3357    }
3358
3359    /// Compare `self` with `other` and return an `Ordering`.
3360    ///
3361    /// This is the same as [`PartialOrd`] except that it returns
3362    /// `Err` if the values cannot be compared, e.g., they have incompatible data types.
3363    pub fn try_cmp(&self, other: &Self) -> Result<Ordering> {
3364        self.partial_cmp(other).ok_or_else(|| {
3365            _internal_datafusion_err!("Uncomparable values: {self:?}, {other:?}")
3366        })
3367    }
3368
3369    /// Estimate size if bytes including `Self`. For values with internal containers such as `String`
3370    /// includes the allocated size (`capacity`) rather than the current length (`len`)
3371    pub fn size(&self) -> usize {
3372        size_of_val(self)
3373            + match self {
3374                ScalarValue::Null
3375                | ScalarValue::Boolean(_)
3376                | ScalarValue::Float16(_)
3377                | ScalarValue::Float32(_)
3378                | ScalarValue::Float64(_)
3379                | ScalarValue::Decimal128(_, _, _)
3380                | ScalarValue::Decimal256(_, _, _)
3381                | ScalarValue::Int8(_)
3382                | ScalarValue::Int16(_)
3383                | ScalarValue::Int32(_)
3384                | ScalarValue::Int64(_)
3385                | ScalarValue::UInt8(_)
3386                | ScalarValue::UInt16(_)
3387                | ScalarValue::UInt32(_)
3388                | ScalarValue::UInt64(_)
3389                | ScalarValue::Date32(_)
3390                | ScalarValue::Date64(_)
3391                | ScalarValue::Time32Second(_)
3392                | ScalarValue::Time32Millisecond(_)
3393                | ScalarValue::Time64Microsecond(_)
3394                | ScalarValue::Time64Nanosecond(_)
3395                | ScalarValue::IntervalYearMonth(_)
3396                | ScalarValue::IntervalDayTime(_)
3397                | ScalarValue::IntervalMonthDayNano(_)
3398                | ScalarValue::DurationSecond(_)
3399                | ScalarValue::DurationMillisecond(_)
3400                | ScalarValue::DurationMicrosecond(_)
3401                | ScalarValue::DurationNanosecond(_) => 0,
3402                ScalarValue::Utf8(s)
3403                | ScalarValue::LargeUtf8(s)
3404                | ScalarValue::Utf8View(s) => {
3405                    s.as_ref().map(|s| s.capacity()).unwrap_or_default()
3406                }
3407                ScalarValue::TimestampSecond(_, s)
3408                | ScalarValue::TimestampMillisecond(_, s)
3409                | ScalarValue::TimestampMicrosecond(_, s)
3410                | ScalarValue::TimestampNanosecond(_, s) => {
3411                    s.as_ref().map(|s| s.len()).unwrap_or_default()
3412                }
3413                ScalarValue::Binary(b)
3414                | ScalarValue::FixedSizeBinary(_, b)
3415                | ScalarValue::LargeBinary(b)
3416                | ScalarValue::BinaryView(b) => {
3417                    b.as_ref().map(|b| b.capacity()).unwrap_or_default()
3418                }
3419                ScalarValue::List(arr) => arr.get_array_memory_size(),
3420                ScalarValue::LargeList(arr) => arr.get_array_memory_size(),
3421                ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(),
3422                ScalarValue::Struct(arr) => arr.get_array_memory_size(),
3423                ScalarValue::Map(arr) => arr.get_array_memory_size(),
3424                ScalarValue::Union(vals, fields, _mode) => {
3425                    vals.as_ref()
3426                        .map(|(_id, sv)| sv.size() - size_of_val(sv))
3427                        .unwrap_or_default()
3428                        // `fields` is boxed, so it is NOT already included in `self`
3429                        + size_of_val(fields)
3430                        + (size_of::<Field>() * fields.len())
3431                        + fields.iter().map(|(_idx, field)| field.size() - size_of_val(field)).sum::<usize>()
3432                }
3433                ScalarValue::Dictionary(dt, sv) => {
3434                    // `dt` and `sv` are boxed, so they are NOT already included in `self`
3435                    dt.size() + sv.size()
3436                }
3437            }
3438    }
3439
3440    /// Estimates [size](Self::size) of [`Vec`] in bytes.
3441    ///
3442    /// Includes the size of the [`Vec`] container itself.
3443    pub fn size_of_vec(vec: &Vec<Self>) -> usize {
3444        size_of_val(vec)
3445            + (size_of::<ScalarValue>() * vec.capacity())
3446            + vec
3447                .iter()
3448                .map(|sv| sv.size() - size_of_val(sv))
3449                .sum::<usize>()
3450    }
3451
3452    /// Estimates [size](Self::size) of [`VecDeque`] in bytes.
3453    ///
3454    /// Includes the size of the [`VecDeque`] container itself.
3455    pub fn size_of_vec_deque(vec_deque: &VecDeque<Self>) -> usize {
3456        size_of_val(vec_deque)
3457            + (size_of::<ScalarValue>() * vec_deque.capacity())
3458            + vec_deque
3459                .iter()
3460                .map(|sv| sv.size() - size_of_val(sv))
3461                .sum::<usize>()
3462    }
3463
3464    /// Estimates [size](Self::size) of [`HashSet`] in bytes.
3465    ///
3466    /// Includes the size of the [`HashSet`] container itself.
3467    pub fn size_of_hashset<S>(set: &HashSet<Self, S>) -> usize {
3468        size_of_val(set)
3469            + (size_of::<ScalarValue>() * set.capacity())
3470            + set
3471                .iter()
3472                .map(|sv| sv.size() - size_of_val(sv))
3473                .sum::<usize>()
3474    }
3475
3476    /// Compacts the allocation referenced by `self` to the minimum, copying the data if
3477    /// necessary.
3478    ///
3479    /// This can be relevant when `self` is a list or contains a list as a nested value, as
3480    /// a single list holds an Arc to its entire original array buffer.
3481    pub fn compact(&mut self) {
3482        match self {
3483            ScalarValue::Null
3484            | ScalarValue::Boolean(_)
3485            | ScalarValue::Float16(_)
3486            | ScalarValue::Float32(_)
3487            | ScalarValue::Float64(_)
3488            | ScalarValue::Decimal128(_, _, _)
3489            | ScalarValue::Decimal256(_, _, _)
3490            | ScalarValue::Int8(_)
3491            | ScalarValue::Int16(_)
3492            | ScalarValue::Int32(_)
3493            | ScalarValue::Int64(_)
3494            | ScalarValue::UInt8(_)
3495            | ScalarValue::UInt16(_)
3496            | ScalarValue::UInt32(_)
3497            | ScalarValue::UInt64(_)
3498            | ScalarValue::Date32(_)
3499            | ScalarValue::Date64(_)
3500            | ScalarValue::Time32Second(_)
3501            | ScalarValue::Time32Millisecond(_)
3502            | ScalarValue::Time64Microsecond(_)
3503            | ScalarValue::Time64Nanosecond(_)
3504            | ScalarValue::IntervalYearMonth(_)
3505            | ScalarValue::IntervalDayTime(_)
3506            | ScalarValue::IntervalMonthDayNano(_)
3507            | ScalarValue::DurationSecond(_)
3508            | ScalarValue::DurationMillisecond(_)
3509            | ScalarValue::DurationMicrosecond(_)
3510            | ScalarValue::DurationNanosecond(_)
3511            | ScalarValue::Utf8(_)
3512            | ScalarValue::LargeUtf8(_)
3513            | ScalarValue::Utf8View(_)
3514            | ScalarValue::TimestampSecond(_, _)
3515            | ScalarValue::TimestampMillisecond(_, _)
3516            | ScalarValue::TimestampMicrosecond(_, _)
3517            | ScalarValue::TimestampNanosecond(_, _)
3518            | ScalarValue::Binary(_)
3519            | ScalarValue::FixedSizeBinary(_, _)
3520            | ScalarValue::LargeBinary(_)
3521            | ScalarValue::BinaryView(_) => (),
3522            ScalarValue::FixedSizeList(arr) => {
3523                let array = copy_array_data(&arr.to_data());
3524                *Arc::make_mut(arr) = FixedSizeListArray::from(array);
3525            }
3526            ScalarValue::List(arr) => {
3527                let array = copy_array_data(&arr.to_data());
3528                *Arc::make_mut(arr) = ListArray::from(array);
3529            }
3530            ScalarValue::LargeList(arr) => {
3531                let array = copy_array_data(&arr.to_data());
3532                *Arc::make_mut(arr) = LargeListArray::from(array)
3533            }
3534            ScalarValue::Struct(arr) => {
3535                let array = copy_array_data(&arr.to_data());
3536                *Arc::make_mut(arr) = StructArray::from(array);
3537            }
3538            ScalarValue::Map(arr) => {
3539                let array = copy_array_data(&arr.to_data());
3540                *Arc::make_mut(arr) = MapArray::from(array);
3541            }
3542            ScalarValue::Union(val, _, _) => {
3543                if let Some((_, value)) = val.as_mut() {
3544                    value.compact();
3545                }
3546            }
3547            ScalarValue::Dictionary(_, value) => {
3548                value.compact();
3549            }
3550        }
3551    }
3552
3553    /// Compacts ([ScalarValue::compact]) the current [ScalarValue] and returns it.
3554    pub fn compacted(mut self) -> Self {
3555        self.compact();
3556        self
3557    }
3558}
3559
3560/// Compacts the data of an `ArrayData` into a new `ArrayData`.
3561///
3562/// This is useful when you want to minimize the memory footprint of an
3563/// `ArrayData`. For example, the value returned by [`Array::slice`] still
3564/// points at the same underlying data buffers as the original array, which may
3565/// hold many more values. Calling `copy_array_data` on the sliced array will
3566/// create a new, smaller, `ArrayData` that only contains the data for the
3567/// sliced array.
3568///
3569/// # Example
3570/// ```
3571/// # use arrow::array::{make_array, Array, Int32Array};
3572/// use datafusion_common::scalar::copy_array_data;
3573/// let array = Int32Array::from_iter_values(0..8192);
3574/// // Take only the first 2 elements
3575/// let sliced_array = array.slice(0, 2);
3576/// // The memory footprint of `sliced_array` is close to 8192 * 4 bytes
3577/// assert_eq!(32864, sliced_array.get_array_memory_size());
3578/// // however, we can copy the data to a new `ArrayData`
3579/// let new_array = make_array(copy_array_data(&sliced_array.into_data()));
3580/// // The memory footprint of `new_array` is now only 2 * 4 bytes
3581/// // and overhead:
3582/// assert_eq!(160, new_array.get_array_memory_size());
3583/// ```
3584///
3585/// See also [`ScalarValue::compact`] which applies to `ScalarValue` instances
3586/// as necessary.
3587pub fn copy_array_data(src_data: &ArrayData) -> ArrayData {
3588    let mut copy = MutableArrayData::new(vec![&src_data], true, src_data.len());
3589    copy.extend(0, 0, src_data.len());
3590    copy.freeze()
3591}
3592
3593macro_rules! impl_scalar {
3594    ($ty:ty, $scalar:tt) => {
3595        impl From<$ty> for ScalarValue {
3596            fn from(value: $ty) -> Self {
3597                ScalarValue::$scalar(Some(value))
3598            }
3599        }
3600
3601        impl From<Option<$ty>> for ScalarValue {
3602            fn from(value: Option<$ty>) -> Self {
3603                ScalarValue::$scalar(value)
3604            }
3605        }
3606    };
3607}
3608
3609impl_scalar!(f64, Float64);
3610impl_scalar!(f32, Float32);
3611impl_scalar!(i8, Int8);
3612impl_scalar!(i16, Int16);
3613impl_scalar!(i32, Int32);
3614impl_scalar!(i64, Int64);
3615impl_scalar!(bool, Boolean);
3616impl_scalar!(u8, UInt8);
3617impl_scalar!(u16, UInt16);
3618impl_scalar!(u32, UInt32);
3619impl_scalar!(u64, UInt64);
3620
3621impl From<&str> for ScalarValue {
3622    fn from(value: &str) -> Self {
3623        Some(value).into()
3624    }
3625}
3626
3627impl From<Option<&str>> for ScalarValue {
3628    fn from(value: Option<&str>) -> Self {
3629        let value = value.map(|s| s.to_string());
3630        ScalarValue::Utf8(value)
3631    }
3632}
3633
3634/// Wrapper to create ScalarValue::Struct for convenience
3635impl From<Vec<(&str, ScalarValue)>> for ScalarValue {
3636    fn from(value: Vec<(&str, ScalarValue)>) -> Self {
3637        value
3638            .into_iter()
3639            .fold(ScalarStructBuilder::new(), |builder, (name, value)| {
3640                builder.with_name_and_scalar(name, value)
3641            })
3642            .build()
3643            .unwrap()
3644    }
3645}
3646
3647impl FromStr for ScalarValue {
3648    type Err = Infallible;
3649
3650    fn from_str(s: &str) -> Result<Self, Self::Err> {
3651        Ok(s.into())
3652    }
3653}
3654
3655impl From<String> for ScalarValue {
3656    fn from(value: String) -> Self {
3657        ScalarValue::Utf8(Some(value))
3658    }
3659}
3660
3661macro_rules! impl_try_from {
3662    ($SCALAR:ident, $NATIVE:ident) => {
3663        impl TryFrom<ScalarValue> for $NATIVE {
3664            type Error = DataFusionError;
3665
3666            fn try_from(value: ScalarValue) -> Result<Self> {
3667                match value {
3668                    ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value),
3669                    _ => _internal_err!(
3670                        "Cannot convert {:?} to {}",
3671                        value,
3672                        std::any::type_name::<Self>()
3673                    ),
3674                }
3675            }
3676        }
3677    };
3678}
3679
3680impl_try_from!(Int8, i8);
3681impl_try_from!(Int16, i16);
3682
3683// special implementation for i32 because of Date32 and Time32
3684impl TryFrom<ScalarValue> for i32 {
3685    type Error = DataFusionError;
3686
3687    fn try_from(value: ScalarValue) -> Result<Self> {
3688        match value {
3689            ScalarValue::Int32(Some(inner_value))
3690            | ScalarValue::Date32(Some(inner_value))
3691            | ScalarValue::Time32Second(Some(inner_value))
3692            | ScalarValue::Time32Millisecond(Some(inner_value)) => Ok(inner_value),
3693            _ => _internal_err!(
3694                "Cannot convert {:?} to {}",
3695                value,
3696                std::any::type_name::<Self>()
3697            ),
3698        }
3699    }
3700}
3701
3702// special implementation for i64 because of Date64, Time64 and Timestamp
3703impl TryFrom<ScalarValue> for i64 {
3704    type Error = DataFusionError;
3705
3706    fn try_from(value: ScalarValue) -> Result<Self> {
3707        match value {
3708            ScalarValue::Int64(Some(inner_value))
3709            | ScalarValue::Date64(Some(inner_value))
3710            | ScalarValue::Time64Microsecond(Some(inner_value))
3711            | ScalarValue::Time64Nanosecond(Some(inner_value))
3712            | ScalarValue::TimestampNanosecond(Some(inner_value), _)
3713            | ScalarValue::TimestampMicrosecond(Some(inner_value), _)
3714            | ScalarValue::TimestampMillisecond(Some(inner_value), _)
3715            | ScalarValue::TimestampSecond(Some(inner_value), _) => Ok(inner_value),
3716            _ => _internal_err!(
3717                "Cannot convert {:?} to {}",
3718                value,
3719                std::any::type_name::<Self>()
3720            ),
3721        }
3722    }
3723}
3724
3725// special implementation for i128 because of Decimal128
3726impl TryFrom<ScalarValue> for i128 {
3727    type Error = DataFusionError;
3728
3729    fn try_from(value: ScalarValue) -> Result<Self> {
3730        match value {
3731            ScalarValue::Decimal128(Some(inner_value), _, _) => Ok(inner_value),
3732            _ => _internal_err!(
3733                "Cannot convert {:?} to {}",
3734                value,
3735                std::any::type_name::<Self>()
3736            ),
3737        }
3738    }
3739}
3740
3741// special implementation for i256 because of Decimal128
3742impl TryFrom<ScalarValue> for i256 {
3743    type Error = DataFusionError;
3744
3745    fn try_from(value: ScalarValue) -> Result<Self> {
3746        match value {
3747            ScalarValue::Decimal256(Some(inner_value), _, _) => Ok(inner_value),
3748            _ => _internal_err!(
3749                "Cannot convert {:?} to {}",
3750                value,
3751                std::any::type_name::<Self>()
3752            ),
3753        }
3754    }
3755}
3756
3757impl_try_from!(UInt8, u8);
3758impl_try_from!(UInt16, u16);
3759impl_try_from!(UInt32, u32);
3760impl_try_from!(UInt64, u64);
3761impl_try_from!(Float32, f32);
3762impl_try_from!(Float64, f64);
3763impl_try_from!(Boolean, bool);
3764
3765impl TryFrom<DataType> for ScalarValue {
3766    type Error = DataFusionError;
3767
3768    /// Create a Null instance of ScalarValue for this datatype
3769    fn try_from(datatype: DataType) -> Result<Self> {
3770        (&datatype).try_into()
3771    }
3772}
3773
3774impl TryFrom<&DataType> for ScalarValue {
3775    type Error = DataFusionError;
3776
3777    /// Create a Null instance of ScalarValue for this datatype
3778    fn try_from(data_type: &DataType) -> Result<Self> {
3779        Self::try_new_null(data_type)
3780    }
3781}
3782
3783macro_rules! format_option {
3784    ($F:expr, $EXPR:expr) => {{
3785        match $EXPR {
3786            Some(e) => write!($F, "{e}"),
3787            None => write!($F, "NULL"),
3788        }
3789    }};
3790}
3791
3792// Implement Display trait for ScalarValue
3793//
3794// # Panics
3795//
3796// Panics if there is an error when creating a visual representation of columns via `arrow::util::pretty`
3797impl fmt::Display for ScalarValue {
3798    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
3799        match self {
3800            ScalarValue::Decimal128(v, p, s) => {
3801                write!(f, "{v:?},{p:?},{s:?}")?;
3802            }
3803            ScalarValue::Decimal256(v, p, s) => {
3804                write!(f, "{v:?},{p:?},{s:?}")?;
3805            }
3806            ScalarValue::Boolean(e) => format_option!(f, e)?,
3807            ScalarValue::Float16(e) => format_option!(f, e)?,
3808            ScalarValue::Float32(e) => format_option!(f, e)?,
3809            ScalarValue::Float64(e) => format_option!(f, e)?,
3810            ScalarValue::Int8(e) => format_option!(f, e)?,
3811            ScalarValue::Int16(e) => format_option!(f, e)?,
3812            ScalarValue::Int32(e) => format_option!(f, e)?,
3813            ScalarValue::Int64(e) => format_option!(f, e)?,
3814            ScalarValue::UInt8(e) => format_option!(f, e)?,
3815            ScalarValue::UInt16(e) => format_option!(f, e)?,
3816            ScalarValue::UInt32(e) => format_option!(f, e)?,
3817            ScalarValue::UInt64(e) => format_option!(f, e)?,
3818            ScalarValue::TimestampSecond(e, _) => format_option!(f, e)?,
3819            ScalarValue::TimestampMillisecond(e, _) => format_option!(f, e)?,
3820            ScalarValue::TimestampMicrosecond(e, _) => format_option!(f, e)?,
3821            ScalarValue::TimestampNanosecond(e, _) => format_option!(f, e)?,
3822            ScalarValue::Utf8(e)
3823            | ScalarValue::LargeUtf8(e)
3824            | ScalarValue::Utf8View(e) => format_option!(f, e)?,
3825            ScalarValue::Binary(e)
3826            | ScalarValue::FixedSizeBinary(_, e)
3827            | ScalarValue::LargeBinary(e)
3828            | ScalarValue::BinaryView(e) => match e {
3829                Some(bytes) => {
3830                    // print up to first 10 bytes, with trailing ... if needed
3831                    for b in bytes.iter().take(10) {
3832                        write!(f, "{b:02X}")?;
3833                    }
3834                    if bytes.len() > 10 {
3835                        write!(f, "...")?;
3836                    }
3837                }
3838                None => write!(f, "NULL")?,
3839            },
3840            ScalarValue::List(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3841            ScalarValue::LargeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3842            ScalarValue::FixedSizeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
3843            ScalarValue::Date32(e) => format_option!(
3844                f,
3845                e.map(|v| {
3846                    let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
3847                    match epoch.checked_add_signed(Duration::try_days(v as i64).unwrap())
3848                    {
3849                        Some(date) => date.to_string(),
3850                        None => "".to_string(),
3851                    }
3852                })
3853            )?,
3854            ScalarValue::Date64(e) => format_option!(
3855                f,
3856                e.map(|v| {
3857                    let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
3858                    match epoch.checked_add_signed(Duration::try_milliseconds(v).unwrap())
3859                    {
3860                        Some(date) => date.to_string(),
3861                        None => "".to_string(),
3862                    }
3863                })
3864            )?,
3865            ScalarValue::Time32Second(e) => format_option!(f, e)?,
3866            ScalarValue::Time32Millisecond(e) => format_option!(f, e)?,
3867            ScalarValue::Time64Microsecond(e) => format_option!(f, e)?,
3868            ScalarValue::Time64Nanosecond(e) => format_option!(f, e)?,
3869            ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
3870            ScalarValue::IntervalMonthDayNano(e) => {
3871                format_option!(f, e.map(|v| format!("{v:?}")))?
3872            }
3873            ScalarValue::IntervalDayTime(e) => {
3874                format_option!(f, e.map(|v| format!("{v:?}")))?;
3875            }
3876            ScalarValue::DurationSecond(e) => format_option!(f, e)?,
3877            ScalarValue::DurationMillisecond(e) => format_option!(f, e)?,
3878            ScalarValue::DurationMicrosecond(e) => format_option!(f, e)?,
3879            ScalarValue::DurationNanosecond(e) => format_option!(f, e)?,
3880            ScalarValue::Struct(struct_arr) => {
3881                // ScalarValue Struct should always have a single element
3882                assert_eq!(struct_arr.len(), 1);
3883
3884                if struct_arr.null_count() == struct_arr.len() {
3885                    write!(f, "NULL")?;
3886                    return Ok(());
3887                }
3888
3889                let columns = struct_arr.columns();
3890                let fields = struct_arr.fields();
3891                let nulls = struct_arr.nulls();
3892
3893                write!(
3894                    f,
3895                    "{{{}}}",
3896                    columns
3897                        .iter()
3898                        .zip(fields.iter())
3899                        .map(|(column, field)| {
3900                            if nulls.is_some_and(|b| b.is_null(0)) {
3901                                format!("{}:NULL", field.name())
3902                            } else if let DataType::Struct(_) = field.data_type() {
3903                                let sv = ScalarValue::Struct(Arc::new(
3904                                    column.as_struct().to_owned(),
3905                                ));
3906                                format!("{}:{sv}", field.name())
3907                            } else {
3908                                let sv = array_value_to_string(column, 0).unwrap();
3909                                format!("{}:{sv}", field.name())
3910                            }
3911                        })
3912                        .collect::<Vec<_>>()
3913                        .join(",")
3914                )?
3915            }
3916            ScalarValue::Map(map_arr) => {
3917                if map_arr.null_count() == map_arr.len() {
3918                    write!(f, "NULL")?;
3919                    return Ok(());
3920                }
3921
3922                write!(
3923                    f,
3924                    "[{}]",
3925                    map_arr
3926                        .iter()
3927                        .map(|struct_array| {
3928                            if let Some(arr) = struct_array {
3929                                let mut buffer = VecDeque::new();
3930                                for i in 0..arr.len() {
3931                                    let key =
3932                                        array_value_to_string(arr.column(0), i).unwrap();
3933                                    let value =
3934                                        array_value_to_string(arr.column(1), i).unwrap();
3935                                    buffer.push_back(format!("{key}:{value}"));
3936                                }
3937                                format!(
3938                                    "{{{}}}",
3939                                    buffer
3940                                        .into_iter()
3941                                        .collect::<Vec<_>>()
3942                                        .join(",")
3943                                        .as_str()
3944                                )
3945                            } else {
3946                                "NULL".to_string()
3947                            }
3948                        })
3949                        .collect::<Vec<_>>()
3950                        .join(",")
3951                )?
3952            }
3953            ScalarValue::Union(val, _fields, _mode) => match val {
3954                Some((id, val)) => write!(f, "{id}:{val}")?,
3955                None => write!(f, "NULL")?,
3956            },
3957            ScalarValue::Dictionary(_k, v) => write!(f, "{v}")?,
3958            ScalarValue::Null => write!(f, "NULL")?,
3959        };
3960        Ok(())
3961    }
3962}
3963
3964fn fmt_list(arr: ArrayRef, f: &mut fmt::Formatter) -> fmt::Result {
3965    // ScalarValue List, LargeList, FixedSizeList should always have a single element
3966    assert_eq!(arr.len(), 1);
3967    let options = FormatOptions::default().with_display_error(true);
3968    let formatter =
3969        ArrayFormatter::try_new(arr.as_ref() as &dyn Array, &options).unwrap();
3970    let value_formatter = formatter.value(0);
3971    write!(f, "{value_formatter}")
3972}
3973
3974/// writes a byte array to formatter. `[1, 2, 3]` ==> `"1,2,3"`
3975fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result {
3976    let mut iter = data.iter();
3977    if let Some(b) = iter.next() {
3978        write!(f, "{b}")?;
3979    }
3980    for b in iter {
3981        write!(f, ",{b}")?;
3982    }
3983    Ok(())
3984}
3985
3986impl fmt::Debug for ScalarValue {
3987    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
3988        match self {
3989            ScalarValue::Decimal128(_, _, _) => write!(f, "Decimal128({self})"),
3990            ScalarValue::Decimal256(_, _, _) => write!(f, "Decimal256({self})"),
3991            ScalarValue::Boolean(_) => write!(f, "Boolean({self})"),
3992            ScalarValue::Float16(_) => write!(f, "Float16({self})"),
3993            ScalarValue::Float32(_) => write!(f, "Float32({self})"),
3994            ScalarValue::Float64(_) => write!(f, "Float64({self})"),
3995            ScalarValue::Int8(_) => write!(f, "Int8({self})"),
3996            ScalarValue::Int16(_) => write!(f, "Int16({self})"),
3997            ScalarValue::Int32(_) => write!(f, "Int32({self})"),
3998            ScalarValue::Int64(_) => write!(f, "Int64({self})"),
3999            ScalarValue::UInt8(_) => write!(f, "UInt8({self})"),
4000            ScalarValue::UInt16(_) => write!(f, "UInt16({self})"),
4001            ScalarValue::UInt32(_) => write!(f, "UInt32({self})"),
4002            ScalarValue::UInt64(_) => write!(f, "UInt64({self})"),
4003            ScalarValue::TimestampSecond(_, tz_opt) => {
4004                write!(f, "TimestampSecond({self}, {tz_opt:?})")
4005            }
4006            ScalarValue::TimestampMillisecond(_, tz_opt) => {
4007                write!(f, "TimestampMillisecond({self}, {tz_opt:?})")
4008            }
4009            ScalarValue::TimestampMicrosecond(_, tz_opt) => {
4010                write!(f, "TimestampMicrosecond({self}, {tz_opt:?})")
4011            }
4012            ScalarValue::TimestampNanosecond(_, tz_opt) => {
4013                write!(f, "TimestampNanosecond({self}, {tz_opt:?})")
4014            }
4015            ScalarValue::Utf8(None) => write!(f, "Utf8({self})"),
4016            ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{self}\")"),
4017            ScalarValue::Utf8View(None) => write!(f, "Utf8View({self})"),
4018            ScalarValue::Utf8View(Some(_)) => write!(f, "Utf8View(\"{self}\")"),
4019            ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({self})"),
4020            ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{self}\")"),
4021            ScalarValue::Binary(None) => write!(f, "Binary({self})"),
4022            ScalarValue::Binary(Some(b)) => {
4023                write!(f, "Binary(\"")?;
4024                fmt_binary(b.as_slice(), f)?;
4025                write!(f, "\")")
4026            }
4027            ScalarValue::BinaryView(None) => write!(f, "BinaryView({self})"),
4028            ScalarValue::BinaryView(Some(b)) => {
4029                write!(f, "BinaryView(\"")?;
4030                fmt_binary(b.as_slice(), f)?;
4031                write!(f, "\")")
4032            }
4033            ScalarValue::FixedSizeBinary(size, None) => {
4034                write!(f, "FixedSizeBinary({size}, {self})")
4035            }
4036            ScalarValue::FixedSizeBinary(size, Some(b)) => {
4037                write!(f, "FixedSizeBinary({size}, \"")?;
4038                fmt_binary(b.as_slice(), f)?;
4039                write!(f, "\")")
4040            }
4041            ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({self})"),
4042            ScalarValue::LargeBinary(Some(b)) => {
4043                write!(f, "LargeBinary(\"")?;
4044                fmt_binary(b.as_slice(), f)?;
4045                write!(f, "\")")
4046            }
4047            ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"),
4048            ScalarValue::List(_) => write!(f, "List({self})"),
4049            ScalarValue::LargeList(_) => write!(f, "LargeList({self})"),
4050            ScalarValue::Struct(struct_arr) => {
4051                // ScalarValue Struct should always have a single element
4052                assert_eq!(struct_arr.len(), 1);
4053
4054                let columns = struct_arr.columns();
4055                let fields = struct_arr.fields();
4056
4057                write!(
4058                    f,
4059                    "Struct({{{}}})",
4060                    columns
4061                        .iter()
4062                        .zip(fields.iter())
4063                        .map(|(column, field)| {
4064                            let sv = array_value_to_string(column, 0).unwrap();
4065                            let name = field.name();
4066                            format!("{name}:{sv}")
4067                        })
4068                        .collect::<Vec<_>>()
4069                        .join(",")
4070                )
4071            }
4072            ScalarValue::Map(map_arr) => {
4073                write!(
4074                    f,
4075                    "Map([{}])",
4076                    map_arr
4077                        .iter()
4078                        .map(|struct_array| {
4079                            if let Some(arr) = struct_array {
4080                                let buffer: Vec<String> = (0..arr.len())
4081                                    .map(|i| {
4082                                        let key = array_value_to_string(arr.column(0), i)
4083                                            .unwrap();
4084                                        let value =
4085                                            array_value_to_string(arr.column(1), i)
4086                                                .unwrap();
4087                                        format!("{key:?}:{value:?}")
4088                                    })
4089                                    .collect();
4090                                format!("{{{}}}", buffer.join(","))
4091                            } else {
4092                                "NULL".to_string()
4093                            }
4094                        })
4095                        .collect::<Vec<_>>()
4096                        .join(",")
4097                )
4098            }
4099            ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"),
4100            ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"),
4101            ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{self}\")"),
4102            ScalarValue::Time32Millisecond(_) => {
4103                write!(f, "Time32Millisecond(\"{self}\")")
4104            }
4105            ScalarValue::Time64Microsecond(_) => {
4106                write!(f, "Time64Microsecond(\"{self}\")")
4107            }
4108            ScalarValue::Time64Nanosecond(_) => {
4109                write!(f, "Time64Nanosecond(\"{self}\")")
4110            }
4111            ScalarValue::IntervalDayTime(_) => {
4112                write!(f, "IntervalDayTime(\"{self}\")")
4113            }
4114            ScalarValue::IntervalYearMonth(_) => {
4115                write!(f, "IntervalYearMonth(\"{self}\")")
4116            }
4117            ScalarValue::IntervalMonthDayNano(_) => {
4118                write!(f, "IntervalMonthDayNano(\"{self}\")")
4119            }
4120            ScalarValue::DurationSecond(_) => write!(f, "DurationSecond(\"{self}\")"),
4121            ScalarValue::DurationMillisecond(_) => {
4122                write!(f, "DurationMillisecond(\"{self}\")")
4123            }
4124            ScalarValue::DurationMicrosecond(_) => {
4125                write!(f, "DurationMicrosecond(\"{self}\")")
4126            }
4127            ScalarValue::DurationNanosecond(_) => {
4128                write!(f, "DurationNanosecond(\"{self}\")")
4129            }
4130            ScalarValue::Union(val, _fields, _mode) => match val {
4131                Some((id, val)) => write!(f, "Union {id}:{val}"),
4132                None => write!(f, "Union(NULL)"),
4133            },
4134            ScalarValue::Dictionary(k, v) => write!(f, "Dictionary({k:?}, {v:?})"),
4135            ScalarValue::Null => write!(f, "NULL"),
4136        }
4137    }
4138}
4139
4140/// Trait used to map a NativeType to a ScalarValue
4141pub trait ScalarType<T: ArrowNativeType> {
4142    /// returns a scalar from an optional T
4143    fn scalar(r: Option<T>) -> ScalarValue;
4144}
4145
4146impl ScalarType<f32> for Float32Type {
4147    fn scalar(r: Option<f32>) -> ScalarValue {
4148        ScalarValue::Float32(r)
4149    }
4150}
4151
4152impl ScalarType<i64> for TimestampSecondType {
4153    fn scalar(r: Option<i64>) -> ScalarValue {
4154        ScalarValue::TimestampSecond(r, None)
4155    }
4156}
4157
4158impl ScalarType<i64> for TimestampMillisecondType {
4159    fn scalar(r: Option<i64>) -> ScalarValue {
4160        ScalarValue::TimestampMillisecond(r, None)
4161    }
4162}
4163
4164impl ScalarType<i64> for TimestampMicrosecondType {
4165    fn scalar(r: Option<i64>) -> ScalarValue {
4166        ScalarValue::TimestampMicrosecond(r, None)
4167    }
4168}
4169
4170impl ScalarType<i64> for TimestampNanosecondType {
4171    fn scalar(r: Option<i64>) -> ScalarValue {
4172        ScalarValue::TimestampNanosecond(r, None)
4173    }
4174}
4175
4176impl ScalarType<i32> for Date32Type {
4177    fn scalar(r: Option<i32>) -> ScalarValue {
4178        ScalarValue::Date32(r)
4179    }
4180}
4181
4182#[cfg(test)]
4183mod tests {
4184    use super::*;
4185    use crate::cast::{as_list_array, as_map_array, as_struct_array};
4186    use crate::test_util::batches_to_string;
4187    use arrow::array::{
4188        FixedSizeListBuilder, Int32Builder, LargeListBuilder, ListBuilder, MapBuilder,
4189        NullArray, NullBufferBuilder, OffsetSizeTrait, PrimitiveBuilder, RecordBatch,
4190        StringBuilder, StringDictionaryBuilder, StructBuilder, UnionBuilder,
4191    };
4192    use arrow::buffer::{Buffer, OffsetBuffer};
4193    use arrow::compute::{is_null, kernels};
4194    use arrow::datatypes::{ArrowNumericType, Fields, Float64Type};
4195    use arrow::error::ArrowError;
4196    use arrow::util::pretty::pretty_format_columns;
4197    use chrono::NaiveDate;
4198    use insta::assert_snapshot;
4199    use rand::Rng;
4200
4201    #[test]
4202    fn test_scalar_value_from_for_map() {
4203        let string_builder = StringBuilder::new();
4204        let int_builder = Int32Builder::with_capacity(4);
4205        let mut builder = MapBuilder::new(None, string_builder, int_builder);
4206        builder.keys().append_value("joe");
4207        builder.values().append_value(1);
4208        builder.append(true).unwrap();
4209
4210        builder.keys().append_value("blogs");
4211        builder.values().append_value(2);
4212        builder.keys().append_value("foo");
4213        builder.values().append_value(4);
4214        builder.append(true).unwrap();
4215        builder.append(true).unwrap();
4216        builder.append(false).unwrap();
4217
4218        let expected = builder.finish();
4219
4220        let sv = ScalarValue::Map(Arc::new(expected.clone()));
4221        let map_arr = sv.to_array().unwrap();
4222        let actual = as_map_array(&map_arr).unwrap();
4223        assert_eq!(actual, &expected);
4224    }
4225
4226    #[test]
4227    fn test_scalar_value_from_for_struct() {
4228        let boolean = Arc::new(BooleanArray::from(vec![false]));
4229        let int = Arc::new(Int32Array::from(vec![42]));
4230
4231        let expected = StructArray::from(vec![
4232            (
4233                Arc::new(Field::new("b", DataType::Boolean, false)),
4234                Arc::clone(&boolean) as ArrayRef,
4235            ),
4236            (
4237                Arc::new(Field::new("c", DataType::Int32, false)),
4238                Arc::clone(&int) as ArrayRef,
4239            ),
4240        ]);
4241
4242        let sv = ScalarStructBuilder::new()
4243            .with_array(Field::new("b", DataType::Boolean, false), boolean)
4244            .with_array(Field::new("c", DataType::Int32, false), int)
4245            .build()
4246            .unwrap();
4247
4248        let struct_arr = sv.to_array().unwrap();
4249        let actual = as_struct_array(&struct_arr).unwrap();
4250        assert_eq!(actual, &expected);
4251    }
4252
4253    #[test]
4254    #[should_panic(
4255        expected = "InvalidArgumentError(\"Incorrect array length for StructArray field \\\"bool\\\", expected 1 got 4\")"
4256    )]
4257    fn test_scalar_value_from_for_struct_should_panic() {
4258        let _ = ScalarStructBuilder::new()
4259            .with_array(
4260                Field::new("bool", DataType::Boolean, false),
4261                Arc::new(BooleanArray::from(vec![false, true, false, false])),
4262            )
4263            .with_array(
4264                Field::new("i32", DataType::Int32, false),
4265                Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
4266            )
4267            .build()
4268            .unwrap();
4269    }
4270
4271    #[test]
4272    fn test_to_array_of_size_for_nested() {
4273        // Struct
4274        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
4275        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4276
4277        let struct_array = StructArray::from(vec![
4278            (
4279                Arc::new(Field::new("b", DataType::Boolean, false)),
4280                Arc::clone(&boolean) as ArrayRef,
4281            ),
4282            (
4283                Arc::new(Field::new("c", DataType::Int32, false)),
4284                Arc::clone(&int) as ArrayRef,
4285            ),
4286        ]);
4287        let sv = ScalarValue::Struct(Arc::new(struct_array));
4288        let actual_arr = sv.to_array_of_size(2).unwrap();
4289
4290        let boolean = Arc::new(BooleanArray::from(vec![
4291            false, false, true, true, false, false, true, true,
4292        ]));
4293        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31, 42, 28, 19, 31]));
4294
4295        let struct_array = StructArray::from(vec![
4296            (
4297                Arc::new(Field::new("b", DataType::Boolean, false)),
4298                Arc::clone(&boolean) as ArrayRef,
4299            ),
4300            (
4301                Arc::new(Field::new("c", DataType::Int32, false)),
4302                Arc::clone(&int) as ArrayRef,
4303            ),
4304        ]);
4305
4306        let actual = as_struct_array(&actual_arr).unwrap();
4307        assert_eq!(actual, &struct_array);
4308
4309        // List
4310        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
4311            Some(1),
4312            None,
4313            Some(2),
4314        ])]);
4315
4316        let sv = ScalarValue::List(Arc::new(arr));
4317        let actual_arr = sv
4318            .to_array_of_size(2)
4319            .expect("Failed to convert to array of size");
4320        let actual_list_arr = actual_arr.as_list::<i32>();
4321
4322        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4323            Some(vec![Some(1), None, Some(2)]),
4324            Some(vec![Some(1), None, Some(2)]),
4325        ]);
4326
4327        assert_eq!(&arr, actual_list_arr);
4328    }
4329
4330    #[test]
4331    fn test_to_array_of_size_for_fsl() {
4332        let values = Int32Array::from_iter([Some(1), None, Some(2)]);
4333        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
4334        let arr = FixedSizeListArray::new(Arc::clone(&field), 3, Arc::new(values), None);
4335        let sv = ScalarValue::FixedSizeList(Arc::new(arr));
4336        let actual_arr = sv
4337            .to_array_of_size(2)
4338            .expect("Failed to convert to array of size");
4339
4340        let expected_values =
4341            Int32Array::from_iter([Some(1), None, Some(2), Some(1), None, Some(2)]);
4342        let expected_arr =
4343            FixedSizeListArray::new(field, 3, Arc::new(expected_values), None);
4344
4345        assert_eq!(
4346            &expected_arr,
4347            as_fixed_size_list_array(actual_arr.as_ref()).unwrap()
4348        );
4349
4350        let empty_array = sv
4351            .to_array_of_size(0)
4352            .expect("Failed to convert to empty array");
4353
4354        assert_eq!(empty_array.len(), 0);
4355    }
4356
4357    #[test]
4358    fn test_list_to_array_string() {
4359        let scalars = vec![
4360            ScalarValue::from("rust"),
4361            ScalarValue::from("arrow"),
4362            ScalarValue::from("data-fusion"),
4363        ];
4364
4365        let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8);
4366
4367        let expected = single_row_list_array(vec!["rust", "arrow", "data-fusion"]);
4368        assert_eq!(*result, expected);
4369    }
4370
4371    fn single_row_list_array(items: Vec<&str>) -> ListArray {
4372        SingleRowListArrayBuilder::new(Arc::new(StringArray::from(items)))
4373            .build_list_array()
4374    }
4375
4376    fn build_list<O: OffsetSizeTrait>(
4377        values: Vec<Option<Vec<Option<i64>>>>,
4378    ) -> Vec<ScalarValue> {
4379        values
4380            .into_iter()
4381            .map(|v| {
4382                let arr = if v.is_some() {
4383                    Arc::new(
4384                        GenericListArray::<O>::from_iter_primitive::<Int64Type, _, _>(
4385                            vec![v],
4386                        ),
4387                    )
4388                } else if O::IS_LARGE {
4389                    new_null_array(
4390                        &DataType::LargeList(Arc::new(Field::new_list_field(
4391                            DataType::Int64,
4392                            true,
4393                        ))),
4394                        1,
4395                    )
4396                } else {
4397                    new_null_array(
4398                        &DataType::List(Arc::new(Field::new_list_field(
4399                            DataType::Int64,
4400                            true,
4401                        ))),
4402                        1,
4403                    )
4404                };
4405
4406                if O::IS_LARGE {
4407                    ScalarValue::LargeList(arr.as_list::<i64>().to_owned().into())
4408                } else {
4409                    ScalarValue::List(arr.as_list::<i32>().to_owned().into())
4410                }
4411            })
4412            .collect()
4413    }
4414
4415    #[test]
4416    fn test_iter_to_array_fixed_size_list() {
4417        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
4418        let f1 = Arc::new(FixedSizeListArray::new(
4419            Arc::clone(&field),
4420            3,
4421            Arc::new(Int32Array::from(vec![1, 2, 3])),
4422            None,
4423        ));
4424        let f2 = Arc::new(FixedSizeListArray::new(
4425            Arc::clone(&field),
4426            3,
4427            Arc::new(Int32Array::from(vec![4, 5, 6])),
4428            None,
4429        ));
4430        let f_nulls = Arc::new(FixedSizeListArray::new_null(field, 1, 1));
4431
4432        let scalars = vec![
4433            ScalarValue::FixedSizeList(Arc::clone(&f_nulls)),
4434            ScalarValue::FixedSizeList(f1),
4435            ScalarValue::FixedSizeList(f2),
4436            ScalarValue::FixedSizeList(f_nulls),
4437        ];
4438
4439        let array = ScalarValue::iter_to_array(scalars).unwrap();
4440
4441        let expected = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
4442            vec![
4443                None,
4444                Some(vec![Some(1), Some(2), Some(3)]),
4445                Some(vec![Some(4), Some(5), Some(6)]),
4446                None,
4447            ],
4448            3,
4449        );
4450        assert_eq!(array.as_ref(), &expected);
4451    }
4452
4453    #[test]
4454    fn test_iter_to_array_struct() {
4455        let s1 = StructArray::from(vec![
4456            (
4457                Arc::new(Field::new("A", DataType::Boolean, false)),
4458                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
4459            ),
4460            (
4461                Arc::new(Field::new("B", DataType::Int32, false)),
4462                Arc::new(Int32Array::from(vec![42])) as ArrayRef,
4463            ),
4464        ]);
4465
4466        let s2 = StructArray::from(vec![
4467            (
4468                Arc::new(Field::new("A", DataType::Boolean, false)),
4469                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
4470            ),
4471            (
4472                Arc::new(Field::new("B", DataType::Int32, false)),
4473                Arc::new(Int32Array::from(vec![42])) as ArrayRef,
4474            ),
4475        ]);
4476
4477        let scalars = vec![
4478            ScalarValue::Struct(Arc::new(s1)),
4479            ScalarValue::Struct(Arc::new(s2)),
4480        ];
4481
4482        let array = ScalarValue::iter_to_array(scalars).unwrap();
4483
4484        let expected = StructArray::from(vec![
4485            (
4486                Arc::new(Field::new("A", DataType::Boolean, false)),
4487                Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
4488            ),
4489            (
4490                Arc::new(Field::new("B", DataType::Int32, false)),
4491                Arc::new(Int32Array::from(vec![42, 42])) as ArrayRef,
4492            ),
4493        ]);
4494        assert_eq!(array.as_ref(), &expected);
4495    }
4496
4497    #[test]
4498    fn test_iter_to_array_struct_with_nulls() {
4499        // non-null
4500        let s1 = StructArray::from((
4501            vec![
4502                (
4503                    Arc::new(Field::new("A", DataType::Int32, false)),
4504                    Arc::new(Int32Array::from(vec![1])) as ArrayRef,
4505                ),
4506                (
4507                    Arc::new(Field::new("B", DataType::Int64, false)),
4508                    Arc::new(Int64Array::from(vec![2])) as ArrayRef,
4509                ),
4510            ],
4511            // Present the null mask, 1 is non-null, 0 is null
4512            Buffer::from(&[1]),
4513        ));
4514
4515        // null
4516        let s2 = StructArray::from((
4517            vec![
4518                (
4519                    Arc::new(Field::new("A", DataType::Int32, false)),
4520                    Arc::new(Int32Array::from(vec![3])) as ArrayRef,
4521                ),
4522                (
4523                    Arc::new(Field::new("B", DataType::Int64, false)),
4524                    Arc::new(Int64Array::from(vec![4])) as ArrayRef,
4525                ),
4526            ],
4527            Buffer::from(&[0]),
4528        ));
4529
4530        let scalars = vec![
4531            ScalarValue::Struct(Arc::new(s1)),
4532            ScalarValue::Struct(Arc::new(s2)),
4533        ];
4534
4535        let array = ScalarValue::iter_to_array(scalars).unwrap();
4536        let struct_array = array.as_struct();
4537        assert!(struct_array.is_valid(0));
4538        assert!(struct_array.is_null(1));
4539    }
4540
4541    #[test]
4542    fn iter_to_array_primitive_test() {
4543        // List[[1,2,3]], List[null], List[[4,5]]
4544        let scalars = build_list::<i32>(vec![
4545            Some(vec![Some(1), Some(2), Some(3)]),
4546            None,
4547            Some(vec![Some(4), Some(5)]),
4548        ]);
4549
4550        let array = ScalarValue::iter_to_array(scalars).unwrap();
4551        let list_array = as_list_array(&array).unwrap();
4552        // List[[1,2,3], null, [4,5]]
4553        let expected = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
4554            Some(vec![Some(1), Some(2), Some(3)]),
4555            None,
4556            Some(vec![Some(4), Some(5)]),
4557        ]);
4558        assert_eq!(list_array, &expected);
4559
4560        let scalars = build_list::<i64>(vec![
4561            Some(vec![Some(1), Some(2), Some(3)]),
4562            None,
4563            Some(vec![Some(4), Some(5)]),
4564        ]);
4565
4566        let array = ScalarValue::iter_to_array(scalars).unwrap();
4567        let list_array = as_large_list_array(&array).unwrap();
4568        let expected = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
4569            Some(vec![Some(1), Some(2), Some(3)]),
4570            None,
4571            Some(vec![Some(4), Some(5)]),
4572        ]);
4573        assert_eq!(list_array, &expected);
4574    }
4575
4576    #[test]
4577    fn iter_to_array_string_test() {
4578        let arr1 = single_row_list_array(vec!["foo", "bar", "baz"]);
4579        let arr2 = single_row_list_array(vec!["rust", "world"]);
4580
4581        let scalars = vec![
4582            ScalarValue::List(Arc::new(arr1)),
4583            ScalarValue::List(Arc::new(arr2)),
4584        ];
4585
4586        let array = ScalarValue::iter_to_array(scalars).unwrap();
4587        let result = array.as_list::<i32>();
4588
4589        // build expected array
4590        let string_builder = StringBuilder::with_capacity(5, 25);
4591        let mut list_of_string_builder = ListBuilder::new(string_builder);
4592
4593        list_of_string_builder.values().append_value("foo");
4594        list_of_string_builder.values().append_value("bar");
4595        list_of_string_builder.values().append_value("baz");
4596        list_of_string_builder.append(true);
4597
4598        list_of_string_builder.values().append_value("rust");
4599        list_of_string_builder.values().append_value("world");
4600        list_of_string_builder.append(true);
4601        let expected = list_of_string_builder.finish();
4602
4603        assert_eq!(result, &expected);
4604    }
4605
4606    #[test]
4607    fn test_list_scalar_eq_to_array() {
4608        let list_array: ArrayRef =
4609            Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4610                Some(vec![Some(0), Some(1), Some(2)]),
4611                None,
4612                Some(vec![None, Some(5)]),
4613            ]));
4614
4615        let fsl_array: ArrayRef =
4616            Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
4617                Some(vec![Some(0), Some(1), Some(2)]),
4618                None,
4619                Some(vec![Some(3), None, Some(5)]),
4620            ]));
4621
4622        for arr in [list_array, fsl_array] {
4623            for i in 0..arr.len() {
4624                let scalar =
4625                    ScalarValue::List(arr.slice(i, 1).as_list::<i32>().to_owned().into());
4626                assert!(scalar.eq_array(&arr, i).unwrap());
4627            }
4628        }
4629    }
4630
4631    #[test]
4632    fn test_eq_array_err_message() {
4633        assert_starts_with(
4634            ScalarValue::Utf8(Some("123".to_string()))
4635                .eq_array(&(Arc::new(Int32Array::from(vec![123])) as ArrayRef), 0)
4636                .unwrap_err()
4637                .message(),
4638            "could not cast array of type Int32 to arrow_array::array::byte_array::GenericByteArray<arrow_array::types::GenericStringType<i32>>",
4639        );
4640    }
4641
4642    #[test]
4643    fn scalar_add_trait_test() -> Result<()> {
4644        let float_value = ScalarValue::Float64(Some(123.));
4645        let float_value_2 = ScalarValue::Float64(Some(123.));
4646        assert_eq!(
4647            (float_value.add(&float_value_2))?,
4648            ScalarValue::Float64(Some(246.))
4649        );
4650        assert_eq!(
4651            (float_value.add(float_value_2))?,
4652            ScalarValue::Float64(Some(246.))
4653        );
4654        Ok(())
4655    }
4656
4657    #[test]
4658    fn scalar_sub_trait_test() -> Result<()> {
4659        let float_value = ScalarValue::Float64(Some(123.));
4660        let float_value_2 = ScalarValue::Float64(Some(123.));
4661        assert_eq!(
4662            float_value.sub(&float_value_2)?,
4663            ScalarValue::Float64(Some(0.))
4664        );
4665        assert_eq!(
4666            float_value.sub(float_value_2)?,
4667            ScalarValue::Float64(Some(0.))
4668        );
4669        Ok(())
4670    }
4671
4672    #[test]
4673    fn scalar_sub_trait_int32_test() -> Result<()> {
4674        let int_value = ScalarValue::Int32(Some(42));
4675        let int_value_2 = ScalarValue::Int32(Some(100));
4676        assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int32(Some(-58)));
4677        assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int32(Some(58)));
4678        Ok(())
4679    }
4680
4681    #[test]
4682    fn scalar_sub_trait_int32_overflow_test() {
4683        let int_value = ScalarValue::Int32(Some(i32::MAX));
4684        let int_value_2 = ScalarValue::Int32(Some(i32::MIN));
4685        let err = int_value
4686            .sub_checked(&int_value_2)
4687            .unwrap_err()
4688            .strip_backtrace();
4689        assert_eq!(
4690            err,
4691            "Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
4692        )
4693    }
4694
4695    #[test]
4696    fn scalar_sub_trait_int64_test() -> Result<()> {
4697        let int_value = ScalarValue::Int64(Some(42));
4698        let int_value_2 = ScalarValue::Int64(Some(100));
4699        assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int64(Some(-58)));
4700        assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int64(Some(58)));
4701        Ok(())
4702    }
4703
4704    #[test]
4705    fn scalar_sub_trait_int64_overflow_test() {
4706        let int_value = ScalarValue::Int64(Some(i64::MAX));
4707        let int_value_2 = ScalarValue::Int64(Some(i64::MIN));
4708        let err = int_value
4709            .sub_checked(&int_value_2)
4710            .unwrap_err()
4711            .strip_backtrace();
4712        assert_eq!(err, "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808")
4713    }
4714
4715    #[test]
4716    fn scalar_add_overflow_test() -> Result<()> {
4717        check_scalar_add_overflow::<Int8Type>(
4718            ScalarValue::Int8(Some(i8::MAX)),
4719            ScalarValue::Int8(Some(i8::MAX)),
4720        );
4721        check_scalar_add_overflow::<UInt8Type>(
4722            ScalarValue::UInt8(Some(u8::MAX)),
4723            ScalarValue::UInt8(Some(u8::MAX)),
4724        );
4725        check_scalar_add_overflow::<Int16Type>(
4726            ScalarValue::Int16(Some(i16::MAX)),
4727            ScalarValue::Int16(Some(i16::MAX)),
4728        );
4729        check_scalar_add_overflow::<UInt16Type>(
4730            ScalarValue::UInt16(Some(u16::MAX)),
4731            ScalarValue::UInt16(Some(u16::MAX)),
4732        );
4733        check_scalar_add_overflow::<Int32Type>(
4734            ScalarValue::Int32(Some(i32::MAX)),
4735            ScalarValue::Int32(Some(i32::MAX)),
4736        );
4737        check_scalar_add_overflow::<UInt32Type>(
4738            ScalarValue::UInt32(Some(u32::MAX)),
4739            ScalarValue::UInt32(Some(u32::MAX)),
4740        );
4741        check_scalar_add_overflow::<Int64Type>(
4742            ScalarValue::Int64(Some(i64::MAX)),
4743            ScalarValue::Int64(Some(i64::MAX)),
4744        );
4745        check_scalar_add_overflow::<UInt64Type>(
4746            ScalarValue::UInt64(Some(u64::MAX)),
4747            ScalarValue::UInt64(Some(u64::MAX)),
4748        );
4749
4750        Ok(())
4751    }
4752
4753    // Verifies that ScalarValue has the same behavior with compute kernel when it overflows.
4754    fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
4755    where
4756        T: ArrowNumericType,
4757    {
4758        let scalar_result = left.add_checked(&right);
4759
4760        let left_array = left.to_array().expect("Failed to convert to array");
4761        let right_array = right.to_array().expect("Failed to convert to array");
4762        let arrow_left_array = left_array.as_primitive::<T>();
4763        let arrow_right_array = right_array.as_primitive::<T>();
4764        let arrow_result = add(arrow_left_array, arrow_right_array);
4765
4766        assert_eq!(scalar_result.is_ok(), arrow_result.is_ok());
4767    }
4768
4769    #[test]
4770    fn test_interval_add_timestamp() -> Result<()> {
4771        let interval = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
4772            months: 1,
4773            days: 2,
4774            nanoseconds: 3,
4775        }));
4776        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4777        let result = interval.add(&timestamp)?;
4778        let expect = timestamp.add(&interval)?;
4779        assert_eq!(result, expect);
4780
4781        let interval = ScalarValue::IntervalYearMonth(Some(123));
4782        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4783        let result = interval.add(&timestamp)?;
4784        let expect = timestamp.add(&interval)?;
4785        assert_eq!(result, expect);
4786
4787        let interval = ScalarValue::IntervalDayTime(Some(IntervalDayTime {
4788            days: 1,
4789            milliseconds: 23,
4790        }));
4791        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
4792        let result = interval.add(&timestamp)?;
4793        let expect = timestamp.add(&interval)?;
4794        assert_eq!(result, expect);
4795        Ok(())
4796    }
4797
4798    #[test]
4799    fn test_try_cmp() {
4800        assert_eq!(
4801            ScalarValue::try_cmp(
4802                &ScalarValue::Int32(Some(1)),
4803                &ScalarValue::Int32(Some(2))
4804            )
4805            .unwrap(),
4806            Ordering::Less
4807        );
4808        assert_eq!(
4809            ScalarValue::try_cmp(&ScalarValue::Int32(None), &ScalarValue::Int32(Some(2)))
4810                .unwrap(),
4811            Ordering::Less
4812        );
4813        assert_starts_with(
4814            ScalarValue::try_cmp(
4815                &ScalarValue::Int32(Some(1)),
4816                &ScalarValue::Int64(Some(2)),
4817            )
4818            .unwrap_err()
4819            .message(),
4820            "Uncomparable values: Int32(1), Int64(2)",
4821        );
4822    }
4823
4824    #[test]
4825    fn scalar_decimal_test() -> Result<()> {
4826        let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
4827        assert_eq!(DataType::Decimal128(10, 1), decimal_value.data_type());
4828        let try_into_value: i128 = decimal_value.clone().try_into().unwrap();
4829        assert_eq!(123_i128, try_into_value);
4830        assert!(!decimal_value.is_null());
4831        let neg_decimal_value = decimal_value.arithmetic_negate()?;
4832        match neg_decimal_value {
4833            ScalarValue::Decimal128(v, _, _) => {
4834                assert_eq!(-123, v.unwrap());
4835            }
4836            _ => {
4837                unreachable!();
4838            }
4839        }
4840
4841        // decimal scalar to array
4842        let array = decimal_value
4843            .to_array()
4844            .expect("Failed to convert to array");
4845        let array = as_decimal128_array(&array)?;
4846        assert_eq!(1, array.len());
4847        assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
4848        assert_eq!(123i128, array.value(0));
4849
4850        // decimal scalar to array with size
4851        let array = decimal_value
4852            .to_array_of_size(10)
4853            .expect("Failed to convert to array of size");
4854        let array_decimal = as_decimal128_array(&array)?;
4855        assert_eq!(10, array.len());
4856        assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
4857        assert_eq!(123i128, array_decimal.value(0));
4858        assert_eq!(123i128, array_decimal.value(9));
4859        // test eq array
4860        assert!(decimal_value
4861            .eq_array(&array, 1)
4862            .expect("Failed to compare arrays"));
4863        assert!(decimal_value
4864            .eq_array(&array, 5)
4865            .expect("Failed to compare arrays"));
4866        // test try from array
4867        assert_eq!(
4868            decimal_value,
4869            ScalarValue::try_from_array(&array, 5).unwrap()
4870        );
4871
4872        assert_eq!(
4873            decimal_value,
4874            ScalarValue::try_new_decimal128(123, 10, 1).unwrap()
4875        );
4876
4877        // test compare
4878        let left = ScalarValue::Decimal128(Some(123), 10, 2);
4879        let right = ScalarValue::Decimal128(Some(124), 10, 2);
4880        assert!(!left.eq(&right));
4881        let result = left < right;
4882        assert!(result);
4883        let result = left <= right;
4884        assert!(result);
4885        let right = ScalarValue::Decimal128(Some(124), 10, 3);
4886        // make sure that two decimals with diff datatype can't be compared.
4887        let result = left.partial_cmp(&right);
4888        assert_eq!(None, result);
4889
4890        let decimal_vec = vec![
4891            ScalarValue::Decimal128(Some(1), 10, 2),
4892            ScalarValue::Decimal128(Some(2), 10, 2),
4893            ScalarValue::Decimal128(Some(3), 10, 2),
4894        ];
4895        // convert the vec to decimal array and check the result
4896        let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
4897        assert_eq!(3, array.len());
4898        assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
4899
4900        let decimal_vec = vec![
4901            ScalarValue::Decimal128(Some(1), 10, 2),
4902            ScalarValue::Decimal128(Some(2), 10, 2),
4903            ScalarValue::Decimal128(Some(3), 10, 2),
4904            ScalarValue::Decimal128(None, 10, 2),
4905        ];
4906        let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
4907        assert_eq!(4, array.len());
4908        assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
4909
4910        assert!(ScalarValue::try_new_decimal128(1, 10, 2)
4911            .unwrap()
4912            .eq_array(&array, 0)
4913            .expect("Failed to compare arrays"));
4914        assert!(ScalarValue::try_new_decimal128(2, 10, 2)
4915            .unwrap()
4916            .eq_array(&array, 1)
4917            .expect("Failed to compare arrays"));
4918        assert!(ScalarValue::try_new_decimal128(3, 10, 2)
4919            .unwrap()
4920            .eq_array(&array, 2)
4921            .expect("Failed to compare arrays"));
4922        assert_eq!(
4923            ScalarValue::Decimal128(None, 10, 2),
4924            ScalarValue::try_from_array(&array, 3).unwrap()
4925        );
4926
4927        Ok(())
4928    }
4929
4930    #[test]
4931    fn test_list_partial_cmp() {
4932        let a =
4933            ScalarValue::List(Arc::new(
4934                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4935                    Some(1),
4936                    Some(2),
4937                    Some(3),
4938                ])]),
4939            ));
4940        let b =
4941            ScalarValue::List(Arc::new(
4942                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4943                    Some(1),
4944                    Some(2),
4945                    Some(3),
4946                ])]),
4947            ));
4948        assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal));
4949
4950        let a =
4951            ScalarValue::List(Arc::new(
4952                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4953                    Some(10),
4954                    Some(2),
4955                    Some(3),
4956                ])]),
4957            ));
4958        let b =
4959            ScalarValue::List(Arc::new(
4960                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4961                    Some(1),
4962                    Some(2),
4963                    Some(30),
4964                ])]),
4965            ));
4966        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
4967
4968        let a =
4969            ScalarValue::List(Arc::new(
4970                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4971                    Some(10),
4972                    Some(2),
4973                    Some(3),
4974                ])]),
4975            ));
4976        let b =
4977            ScalarValue::List(Arc::new(
4978                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4979                    Some(10),
4980                    Some(2),
4981                    Some(30),
4982                ])]),
4983            ));
4984        assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
4985
4986        let a =
4987            ScalarValue::List(Arc::new(
4988                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4989                    Some(1),
4990                    Some(2),
4991                    Some(3),
4992                ])]),
4993            ));
4994        let b =
4995            ScalarValue::List(Arc::new(
4996                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
4997                    Some(2),
4998                    Some(3),
4999                ])]),
5000            ));
5001        assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
5002
5003        let a =
5004            ScalarValue::List(Arc::new(
5005                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5006                    Some(2),
5007                    Some(3),
5008                    Some(4),
5009                ])]),
5010            ));
5011        let b =
5012            ScalarValue::List(Arc::new(
5013                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5014                    Some(1),
5015                    Some(2),
5016                ])]),
5017            ));
5018        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5019
5020        let a =
5021            ScalarValue::List(Arc::new(
5022                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5023                    Some(1),
5024                    Some(2),
5025                    Some(3),
5026                ])]),
5027            ));
5028        let b =
5029            ScalarValue::List(Arc::new(
5030                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5031                    Some(1),
5032                    Some(2),
5033                ])]),
5034            ));
5035        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5036
5037        let a =
5038            ScalarValue::List(Arc::new(
5039                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5040                    None,
5041                    Some(2),
5042                    Some(3),
5043                ])]),
5044            ));
5045        let b =
5046            ScalarValue::List(Arc::new(
5047                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
5048                    Some(1),
5049                    Some(2),
5050                    Some(3),
5051                ])]),
5052            ));
5053        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5054
5055        let a = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
5056            Int64Type,
5057            _,
5058            _,
5059        >(vec![Some(vec![
5060            None,
5061            Some(2),
5062            Some(3),
5063        ])])));
5064        let b = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
5065            Int64Type,
5066            _,
5067            _,
5068        >(vec![Some(vec![
5069            Some(1),
5070            Some(2),
5071            Some(3),
5072        ])])));
5073        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5074
5075        let a = ScalarValue::FixedSizeList(Arc::new(
5076            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
5077                vec![Some(vec![None, Some(2), Some(3)])],
5078                3,
5079            ),
5080        ));
5081        let b = ScalarValue::FixedSizeList(Arc::new(
5082            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
5083                vec![Some(vec![Some(1), Some(2), Some(3)])],
5084                3,
5085            ),
5086        ));
5087        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
5088    }
5089
5090    #[test]
5091    fn scalar_value_to_array_u64() -> Result<()> {
5092        let value = ScalarValue::UInt64(Some(13u64));
5093        let array = value.to_array().expect("Failed to convert to array");
5094        let array = as_uint64_array(&array)?;
5095        assert_eq!(array.len(), 1);
5096        assert!(!array.is_null(0));
5097        assert_eq!(array.value(0), 13);
5098
5099        let value = ScalarValue::UInt64(None);
5100        let array = value.to_array().expect("Failed to convert to array");
5101        let array = as_uint64_array(&array)?;
5102        assert_eq!(array.len(), 1);
5103        assert!(array.is_null(0));
5104        Ok(())
5105    }
5106
5107    #[test]
5108    fn scalar_value_to_array_u32() -> Result<()> {
5109        let value = ScalarValue::UInt32(Some(13u32));
5110        let array = value.to_array().expect("Failed to convert to array");
5111        let array = as_uint32_array(&array)?;
5112        assert_eq!(array.len(), 1);
5113        assert!(!array.is_null(0));
5114        assert_eq!(array.value(0), 13);
5115
5116        let value = ScalarValue::UInt32(None);
5117        let array = value.to_array().expect("Failed to convert to array");
5118        let array = as_uint32_array(&array)?;
5119        assert_eq!(array.len(), 1);
5120        assert!(array.is_null(0));
5121        Ok(())
5122    }
5123
5124    #[test]
5125    fn scalar_list_null_to_array() {
5126        let list_array = ScalarValue::new_list_nullable(&[], &DataType::UInt64);
5127
5128        assert_eq!(list_array.len(), 1);
5129        assert_eq!(list_array.values().len(), 0);
5130    }
5131
5132    #[test]
5133    fn scalar_large_list_null_to_array() {
5134        let list_array = ScalarValue::new_large_list(&[], &DataType::UInt64);
5135
5136        assert_eq!(list_array.len(), 1);
5137        assert_eq!(list_array.values().len(), 0);
5138    }
5139
5140    #[test]
5141    fn scalar_list_to_array() -> Result<()> {
5142        let values = vec![
5143            ScalarValue::UInt64(Some(100)),
5144            ScalarValue::UInt64(None),
5145            ScalarValue::UInt64(Some(101)),
5146        ];
5147        let list_array = ScalarValue::new_list_nullable(&values, &DataType::UInt64);
5148        assert_eq!(list_array.len(), 1);
5149        assert_eq!(list_array.values().len(), 3);
5150
5151        let prim_array_ref = list_array.value(0);
5152        let prim_array = as_uint64_array(&prim_array_ref)?;
5153        assert_eq!(prim_array.len(), 3);
5154        assert_eq!(prim_array.value(0), 100);
5155        assert!(prim_array.is_null(1));
5156        assert_eq!(prim_array.value(2), 101);
5157        Ok(())
5158    }
5159
5160    #[test]
5161    fn scalar_large_list_to_array() -> Result<()> {
5162        let values = vec![
5163            ScalarValue::UInt64(Some(100)),
5164            ScalarValue::UInt64(None),
5165            ScalarValue::UInt64(Some(101)),
5166        ];
5167        let list_array = ScalarValue::new_large_list(&values, &DataType::UInt64);
5168        assert_eq!(list_array.len(), 1);
5169        assert_eq!(list_array.values().len(), 3);
5170
5171        let prim_array_ref = list_array.value(0);
5172        let prim_array = as_uint64_array(&prim_array_ref)?;
5173        assert_eq!(prim_array.len(), 3);
5174        assert_eq!(prim_array.value(0), 100);
5175        assert!(prim_array.is_null(1));
5176        assert_eq!(prim_array.value(2), 101);
5177        Ok(())
5178    }
5179
5180    /// Creates array directly and via ScalarValue and ensures they are the same
5181    macro_rules! check_scalar_iter {
5182        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5183            let scalars: Vec<_> =
5184                $INPUT.iter().map(|v| ScalarValue::$SCALAR_T(*v)).collect();
5185
5186            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5187
5188            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
5189
5190            assert_eq!(&array, &expected);
5191        }};
5192    }
5193
5194    /// Creates array directly and via ScalarValue and ensures they are the same
5195    /// but for variants that carry a timezone field.
5196    macro_rules! check_scalar_iter_tz {
5197        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5198            let scalars: Vec<_> = $INPUT
5199                .iter()
5200                .map(|v| ScalarValue::$SCALAR_T(*v, None))
5201                .collect();
5202
5203            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5204
5205            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
5206
5207            assert_eq!(&array, &expected);
5208        }};
5209    }
5210
5211    /// Creates array directly and via ScalarValue and ensures they
5212    /// are the same, for string  arrays
5213    macro_rules! check_scalar_iter_string {
5214        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5215            let scalars: Vec<_> = $INPUT
5216                .iter()
5217                .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_string())))
5218                .collect();
5219
5220            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5221
5222            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
5223
5224            assert_eq!(&array, &expected);
5225        }};
5226    }
5227
5228    /// Creates array directly and via ScalarValue and ensures they
5229    /// are the same, for binary arrays
5230    macro_rules! check_scalar_iter_binary {
5231        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
5232            let scalars: Vec<_> = $INPUT
5233                .iter()
5234                .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_vec())))
5235                .collect();
5236
5237            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
5238
5239            let expected: $ARRAYTYPE =
5240                $INPUT.iter().map(|v| v.map(|v| v.to_vec())).collect();
5241
5242            let expected: ArrayRef = Arc::new(expected);
5243
5244            assert_eq!(&array, &expected);
5245        }};
5246    }
5247
5248    #[test]
5249    // despite clippy claiming they are useless, the code doesn't compile otherwise.
5250    #[allow(clippy::useless_vec)]
5251    fn scalar_iter_to_array_boolean() {
5252        check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]);
5253        check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]);
5254        check_scalar_iter!(Float64, Float64Array, vec![Some(1.9), None, Some(-2.1)]);
5255
5256        check_scalar_iter!(Int8, Int8Array, vec![Some(1), None, Some(3)]);
5257        check_scalar_iter!(Int16, Int16Array, vec![Some(1), None, Some(3)]);
5258        check_scalar_iter!(Int32, Int32Array, vec![Some(1), None, Some(3)]);
5259        check_scalar_iter!(Int64, Int64Array, vec![Some(1), None, Some(3)]);
5260
5261        check_scalar_iter!(UInt8, UInt8Array, vec![Some(1), None, Some(3)]);
5262        check_scalar_iter!(UInt16, UInt16Array, vec![Some(1), None, Some(3)]);
5263        check_scalar_iter!(UInt32, UInt32Array, vec![Some(1), None, Some(3)]);
5264        check_scalar_iter!(UInt64, UInt64Array, vec![Some(1), None, Some(3)]);
5265
5266        check_scalar_iter_tz!(
5267            TimestampSecond,
5268            TimestampSecondArray,
5269            vec![Some(1), None, Some(3)]
5270        );
5271        check_scalar_iter_tz!(
5272            TimestampMillisecond,
5273            TimestampMillisecondArray,
5274            vec![Some(1), None, Some(3)]
5275        );
5276        check_scalar_iter_tz!(
5277            TimestampMicrosecond,
5278            TimestampMicrosecondArray,
5279            vec![Some(1), None, Some(3)]
5280        );
5281        check_scalar_iter_tz!(
5282            TimestampNanosecond,
5283            TimestampNanosecondArray,
5284            vec![Some(1), None, Some(3)]
5285        );
5286
5287        check_scalar_iter_string!(
5288            Utf8,
5289            StringArray,
5290            vec![Some("foo"), None, Some("bar")]
5291        );
5292        check_scalar_iter_string!(
5293            LargeUtf8,
5294            LargeStringArray,
5295            vec![Some("foo"), None, Some("bar")]
5296        );
5297        check_scalar_iter_binary!(
5298            Binary,
5299            BinaryArray,
5300            vec![Some(b"foo"), None, Some(b"bar")]
5301        );
5302        check_scalar_iter_binary!(
5303            LargeBinary,
5304            LargeBinaryArray,
5305            vec![Some(b"foo"), None, Some(b"bar")]
5306        );
5307    }
5308
5309    #[test]
5310    fn scalar_iter_to_array_empty() {
5311        let scalars = vec![] as Vec<ScalarValue>;
5312
5313        let result = ScalarValue::iter_to_array(scalars).unwrap_err();
5314        assert!(
5315            result
5316                .to_string()
5317                .contains("Empty iterator passed to ScalarValue::iter_to_array"),
5318            "{}",
5319            result
5320        );
5321    }
5322
5323    #[test]
5324    fn scalar_iter_to_dictionary() {
5325        fn make_val(v: Option<String>) -> ScalarValue {
5326            let key_type = DataType::Int32;
5327            let value = ScalarValue::Utf8(v);
5328            ScalarValue::Dictionary(Box::new(key_type), Box::new(value))
5329        }
5330
5331        let scalars = [
5332            make_val(Some("Foo".into())),
5333            make_val(None),
5334            make_val(Some("Bar".into())),
5335        ];
5336
5337        let array = ScalarValue::iter_to_array(scalars).unwrap();
5338        let array = as_dictionary_array::<Int32Type>(&array).unwrap();
5339        let values_array = as_string_array(array.values()).unwrap();
5340
5341        let values = array
5342            .keys_iter()
5343            .map(|k| {
5344                k.map(|k| {
5345                    assert!(values_array.is_valid(k));
5346                    values_array.value(k)
5347                })
5348            })
5349            .collect::<Vec<_>>();
5350
5351        let expected = vec![Some("Foo"), None, Some("Bar")];
5352        assert_eq!(values, expected);
5353    }
5354
5355    #[test]
5356    fn scalar_iter_to_array_mismatched_types() {
5357        use ScalarValue::*;
5358        // If the scalar values are not all the correct type, error here
5359        let scalars = [Boolean(Some(true)), Int32(Some(5))];
5360
5361        let result = ScalarValue::iter_to_array(scalars).unwrap_err();
5362        assert!(result.to_string().contains("Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)"),
5363                "{}", result);
5364    }
5365
5366    #[test]
5367    fn scalar_try_from_array_null() {
5368        let array = vec![Some(33), None].into_iter().collect::<Int64Array>();
5369        let array: ArrayRef = Arc::new(array);
5370
5371        assert_eq!(
5372            ScalarValue::Int64(Some(33)),
5373            ScalarValue::try_from_array(&array, 0).unwrap()
5374        );
5375        assert_eq!(
5376            ScalarValue::Int64(None),
5377            ScalarValue::try_from_array(&array, 1).unwrap()
5378        );
5379    }
5380
5381    #[test]
5382    fn scalar_try_from_array_list_array_null() {
5383        let list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5384            Some(vec![Some(1), Some(2)]),
5385            None,
5386        ]);
5387
5388        let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap();
5389        let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap();
5390
5391        let data_type =
5392            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
5393
5394        assert_eq!(non_null_list_scalar.data_type(), data_type);
5395        assert_eq!(null_list_scalar.data_type(), data_type);
5396    }
5397
5398    #[test]
5399    fn scalar_try_from_list_datatypes() {
5400        let inner_field = Arc::new(Field::new_list_field(DataType::Int32, true));
5401
5402        // Test for List
5403        let data_type = &DataType::List(Arc::clone(&inner_field));
5404        let scalar: ScalarValue = data_type.try_into().unwrap();
5405        let expected = ScalarValue::List(
5406            new_null_array(data_type, 1)
5407                .as_list::<i32>()
5408                .to_owned()
5409                .into(),
5410        );
5411        assert_eq!(expected, scalar);
5412        assert!(expected.is_null());
5413
5414        // Test for LargeList
5415        let data_type = &DataType::LargeList(Arc::clone(&inner_field));
5416        let scalar: ScalarValue = data_type.try_into().unwrap();
5417        let expected = ScalarValue::LargeList(
5418            new_null_array(data_type, 1)
5419                .as_list::<i64>()
5420                .to_owned()
5421                .into(),
5422        );
5423        assert_eq!(expected, scalar);
5424        assert!(expected.is_null());
5425
5426        // Test for FixedSizeList(5)
5427        let data_type = &DataType::FixedSizeList(Arc::clone(&inner_field), 5);
5428        let scalar: ScalarValue = data_type.try_into().unwrap();
5429        let expected = ScalarValue::FixedSizeList(
5430            new_null_array(data_type, 1)
5431                .as_fixed_size_list()
5432                .to_owned()
5433                .into(),
5434        );
5435        assert_eq!(expected, scalar);
5436        assert!(expected.is_null());
5437    }
5438
5439    #[test]
5440    fn scalar_try_from_list_of_list() {
5441        let data_type = DataType::List(Arc::new(Field::new_list_field(
5442            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
5443            true,
5444        )));
5445        let data_type = &data_type;
5446        let scalar: ScalarValue = data_type.try_into().unwrap();
5447
5448        let expected = ScalarValue::List(
5449            new_null_array(
5450                &DataType::List(Arc::new(Field::new_list_field(
5451                    DataType::List(Arc::new(Field::new_list_field(
5452                        DataType::Int32,
5453                        true,
5454                    ))),
5455                    true,
5456                ))),
5457                1,
5458            )
5459            .as_list::<i32>()
5460            .to_owned()
5461            .into(),
5462        );
5463
5464        assert_eq!(expected, scalar)
5465    }
5466
5467    #[test]
5468    fn scalar_try_from_not_equal_list_nested_list() {
5469        let list_data_type =
5470            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
5471        let data_type = &list_data_type;
5472        let list_scalar: ScalarValue = data_type.try_into().unwrap();
5473
5474        let nested_list_data_type = DataType::List(Arc::new(Field::new_list_field(
5475            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
5476            true,
5477        )));
5478        let data_type = &nested_list_data_type;
5479        let nested_list_scalar: ScalarValue = data_type.try_into().unwrap();
5480
5481        assert_ne!(list_scalar, nested_list_scalar);
5482    }
5483
5484    #[test]
5485    fn scalar_try_from_dict_datatype() {
5486        let data_type =
5487            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
5488        let data_type = &data_type;
5489        let expected = ScalarValue::Dictionary(
5490            Box::new(DataType::Int8),
5491            Box::new(ScalarValue::Utf8(None)),
5492        );
5493        assert_eq!(expected, data_type.try_into().unwrap())
5494    }
5495
5496    #[test]
5497    fn size_of_scalar() {
5498        // Since ScalarValues are used in a non trivial number of places,
5499        // making it larger means significant more memory consumption
5500        // per distinct value.
5501        //
5502        // Thus this test ensures that no code change makes ScalarValue larger
5503        //
5504        // The alignment requirements differ across architectures and
5505        // thus the size of the enum appears to as well
5506
5507        // The value may also change depending on rust version
5508        assert_eq!(size_of::<ScalarValue>(), 64);
5509    }
5510
5511    #[test]
5512    fn memory_size() {
5513        let sv = ScalarValue::Binary(Some(Vec::with_capacity(10)));
5514        assert_eq!(sv.size(), size_of::<ScalarValue>() + 10,);
5515        let sv_size = sv.size();
5516
5517        let mut v = Vec::with_capacity(10);
5518        // do NOT clone `sv` here because this may shrink the vector capacity
5519        v.push(sv);
5520        assert_eq!(v.capacity(), 10);
5521        assert_eq!(
5522            ScalarValue::size_of_vec(&v),
5523            size_of::<Vec<ScalarValue>>() + (9 * size_of::<ScalarValue>()) + sv_size,
5524        );
5525
5526        let mut s = HashSet::with_capacity(0);
5527        // do NOT clone `sv` here because this may shrink the vector capacity
5528        s.insert(v.pop().unwrap());
5529        // hashsets may easily grow during insert, so capacity is dynamic
5530        let s_capacity = s.capacity();
5531        assert_eq!(
5532            ScalarValue::size_of_hashset(&s),
5533            size_of::<HashSet<ScalarValue>>()
5534                + ((s_capacity - 1) * size_of::<ScalarValue>())
5535                + sv_size,
5536        );
5537    }
5538
5539    #[test]
5540    fn scalar_eq_array() {
5541        // Validate that eq_array has the same semantics as ScalarValue::eq
5542        macro_rules! make_typed_vec {
5543            ($INPUT:expr, $TYPE:ident) => {{
5544                $INPUT
5545                    .iter()
5546                    .map(|v| v.map(|v| v as $TYPE))
5547                    .collect::<Vec<_>>()
5548            }};
5549        }
5550
5551        let bool_vals = [Some(true), None, Some(false)];
5552        let f32_vals = [Some(-1.0), None, Some(1.0)];
5553        let f64_vals = make_typed_vec!(f32_vals, f64);
5554
5555        let i8_vals = [Some(-1), None, Some(1)];
5556        let i16_vals = make_typed_vec!(i8_vals, i16);
5557        let i32_vals = make_typed_vec!(i8_vals, i32);
5558        let i64_vals = make_typed_vec!(i8_vals, i64);
5559
5560        let u8_vals = [Some(0), None, Some(1)];
5561        let u16_vals = make_typed_vec!(u8_vals, u16);
5562        let u32_vals = make_typed_vec!(u8_vals, u32);
5563        let u64_vals = make_typed_vec!(u8_vals, u64);
5564
5565        let str_vals = [Some("foo"), None, Some("bar")];
5566
5567        let interval_dt_vals = [
5568            Some(IntervalDayTime::MINUS_ONE),
5569            None,
5570            Some(IntervalDayTime::ONE),
5571        ];
5572        let interval_mdn_vals = [
5573            Some(IntervalMonthDayNano::MINUS_ONE),
5574            None,
5575            Some(IntervalMonthDayNano::ONE),
5576        ];
5577
5578        /// Test each value in `scalar` with the corresponding element
5579        /// at `array`. Assumes each element is unique (aka not equal
5580        /// with all other indexes)
5581        #[derive(Debug)]
5582        struct TestCase {
5583            array: ArrayRef,
5584            scalars: Vec<ScalarValue>,
5585        }
5586
5587        /// Create a test case for casing the input to the specified array type
5588        macro_rules! make_test_case {
5589            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5590                TestCase {
5591                    array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
5592                    scalars: $INPUT.iter().map(|v| ScalarValue::$SCALAR_TY(*v)).collect(),
5593                }
5594            }};
5595
5596            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
5597                let tz = $TZ;
5598                TestCase {
5599                    array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
5600                    scalars: $INPUT
5601                        .iter()
5602                        .map(|v| ScalarValue::$SCALAR_TY(*v, tz.clone()))
5603                        .collect(),
5604                }
5605            }};
5606        }
5607
5608        macro_rules! make_str_test_case {
5609            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5610                TestCase {
5611                    array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
5612                    scalars: $INPUT
5613                        .iter()
5614                        .map(|v| ScalarValue::$SCALAR_TY(v.map(|v| v.to_string())))
5615                        .collect(),
5616                }
5617            }};
5618        }
5619
5620        macro_rules! make_binary_test_case {
5621            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
5622                TestCase {
5623                    array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
5624                    scalars: $INPUT
5625                        .iter()
5626                        .map(|v| {
5627                            ScalarValue::$SCALAR_TY(v.map(|v| v.as_bytes().to_vec()))
5628                        })
5629                        .collect(),
5630                }
5631            }};
5632        }
5633
5634        /// create a test case for DictionaryArray<$INDEX_TY>
5635        macro_rules! make_str_dict_test_case {
5636            ($INPUT:expr, $INDEX_TY:ident) => {{
5637                TestCase {
5638                    array: Arc::new(
5639                        $INPUT
5640                            .iter()
5641                            .cloned()
5642                            .collect::<DictionaryArray<$INDEX_TY>>(),
5643                    ),
5644                    scalars: $INPUT
5645                        .iter()
5646                        .map(|v| {
5647                            ScalarValue::Dictionary(
5648                                Box::new($INDEX_TY::DATA_TYPE),
5649                                Box::new(ScalarValue::Utf8(v.map(|v| v.to_string()))),
5650                            )
5651                        })
5652                        .collect(),
5653                }
5654            }};
5655        }
5656
5657        let cases = vec![
5658            make_test_case!(bool_vals, BooleanArray, Boolean),
5659            make_test_case!(f32_vals, Float32Array, Float32),
5660            make_test_case!(f64_vals, Float64Array, Float64),
5661            make_test_case!(i8_vals, Int8Array, Int8),
5662            make_test_case!(i16_vals, Int16Array, Int16),
5663            make_test_case!(i32_vals, Int32Array, Int32),
5664            make_test_case!(i64_vals, Int64Array, Int64),
5665            make_test_case!(u8_vals, UInt8Array, UInt8),
5666            make_test_case!(u16_vals, UInt16Array, UInt16),
5667            make_test_case!(u32_vals, UInt32Array, UInt32),
5668            make_test_case!(u64_vals, UInt64Array, UInt64),
5669            make_str_test_case!(str_vals, StringArray, Utf8),
5670            make_str_test_case!(str_vals, LargeStringArray, LargeUtf8),
5671            make_binary_test_case!(str_vals, BinaryArray, Binary),
5672            make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary),
5673            make_test_case!(i32_vals, Date32Array, Date32),
5674            make_test_case!(i64_vals, Date64Array, Date64),
5675            make_test_case!(i32_vals, Time32SecondArray, Time32Second),
5676            make_test_case!(i32_vals, Time32MillisecondArray, Time32Millisecond),
5677            make_test_case!(i64_vals, Time64MicrosecondArray, Time64Microsecond),
5678            make_test_case!(i64_vals, Time64NanosecondArray, Time64Nanosecond),
5679            make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None),
5680            make_test_case!(
5681                i64_vals,
5682                TimestampSecondArray,
5683                TimestampSecond,
5684                Some("UTC".into())
5685            ),
5686            make_test_case!(
5687                i64_vals,
5688                TimestampMillisecondArray,
5689                TimestampMillisecond,
5690                None
5691            ),
5692            make_test_case!(
5693                i64_vals,
5694                TimestampMillisecondArray,
5695                TimestampMillisecond,
5696                Some("UTC".into())
5697            ),
5698            make_test_case!(
5699                i64_vals,
5700                TimestampMicrosecondArray,
5701                TimestampMicrosecond,
5702                None
5703            ),
5704            make_test_case!(
5705                i64_vals,
5706                TimestampMicrosecondArray,
5707                TimestampMicrosecond,
5708                Some("UTC".into())
5709            ),
5710            make_test_case!(
5711                i64_vals,
5712                TimestampNanosecondArray,
5713                TimestampNanosecond,
5714                None
5715            ),
5716            make_test_case!(
5717                i64_vals,
5718                TimestampNanosecondArray,
5719                TimestampNanosecond,
5720                Some("UTC".into())
5721            ),
5722            make_test_case!(i32_vals, IntervalYearMonthArray, IntervalYearMonth),
5723            make_test_case!(interval_dt_vals, IntervalDayTimeArray, IntervalDayTime),
5724            make_test_case!(
5725                interval_mdn_vals,
5726                IntervalMonthDayNanoArray,
5727                IntervalMonthDayNano
5728            ),
5729            make_str_dict_test_case!(str_vals, Int8Type),
5730            make_str_dict_test_case!(str_vals, Int16Type),
5731            make_str_dict_test_case!(str_vals, Int32Type),
5732            make_str_dict_test_case!(str_vals, Int64Type),
5733            make_str_dict_test_case!(str_vals, UInt8Type),
5734            make_str_dict_test_case!(str_vals, UInt16Type),
5735            make_str_dict_test_case!(str_vals, UInt32Type),
5736            make_str_dict_test_case!(str_vals, UInt64Type),
5737        ];
5738
5739        for case in cases {
5740            println!("**** Test Case *****");
5741            let TestCase { array, scalars } = case;
5742            println!("Input array type: {}", array.data_type());
5743            println!("Input scalars: {scalars:#?}");
5744            assert_eq!(array.len(), scalars.len());
5745
5746            for (index, scalar) in scalars.into_iter().enumerate() {
5747                assert!(
5748                    scalar
5749                        .eq_array(&array, index)
5750                        .expect("Failed to compare arrays"),
5751                    "Expected {scalar:?} to be equal to {array:?} at index {index}"
5752                );
5753
5754                // test that all other elements are *not* equal
5755                for other_index in 0..array.len() {
5756                    if index != other_index {
5757                        assert!(
5758                            !scalar.eq_array(&array, other_index).expect("Failed to compare arrays"),
5759                            "Expected {scalar:?} to be NOT equal to {array:?} at index {other_index}"
5760                        );
5761                    }
5762                }
5763            }
5764        }
5765    }
5766
5767    #[test]
5768    fn scalar_partial_ordering() {
5769        use ScalarValue::*;
5770
5771        assert_eq!(
5772            Int64(Some(33)).partial_cmp(&Int64(Some(0))),
5773            Some(Ordering::Greater)
5774        );
5775        assert_eq!(
5776            Int64(Some(0)).partial_cmp(&Int64(Some(33))),
5777            Some(Ordering::Less)
5778        );
5779        assert_eq!(
5780            Int64(Some(33)).partial_cmp(&Int64(Some(33))),
5781            Some(Ordering::Equal)
5782        );
5783        // For different data type, `partial_cmp` returns None.
5784        assert_eq!(Int64(Some(33)).partial_cmp(&Int32(Some(33))), None);
5785        assert_eq!(Int32(Some(33)).partial_cmp(&Int64(Some(33))), None);
5786
5787        assert_eq!(
5788            ScalarValue::from(vec![
5789                ("A", ScalarValue::from(1.0)),
5790                ("B", ScalarValue::from("Z")),
5791            ])
5792            .partial_cmp(&ScalarValue::from(vec![
5793                ("A", ScalarValue::from(2.0)),
5794                ("B", ScalarValue::from("A")),
5795            ])),
5796            Some(Ordering::Less)
5797        );
5798
5799        // For different struct fields, `partial_cmp` returns None.
5800        assert_eq!(
5801            ScalarValue::from(vec![
5802                ("A", ScalarValue::from(1.0)),
5803                ("B", ScalarValue::from("Z")),
5804            ])
5805            .partial_cmp(&ScalarValue::from(vec![
5806                ("a", ScalarValue::from(2.0)),
5807                ("b", ScalarValue::from("A")),
5808            ])),
5809            None
5810        );
5811    }
5812
5813    #[test]
5814    fn test_scalar_value_from_string() {
5815        let scalar = ScalarValue::from("foo");
5816        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5817        let scalar = ScalarValue::from("foo".to_string());
5818        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5819        let scalar = ScalarValue::from_str("foo").unwrap();
5820        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
5821    }
5822
5823    #[test]
5824    fn test_scalar_struct() {
5825        let field_a = Arc::new(Field::new("A", DataType::Int32, false));
5826        let field_b = Arc::new(Field::new("B", DataType::Boolean, false));
5827        let field_c = Arc::new(Field::new("C", DataType::Utf8, false));
5828
5829        let field_e = Arc::new(Field::new("e", DataType::Int16, false));
5830        let field_f = Arc::new(Field::new("f", DataType::Int64, false));
5831        let field_d = Arc::new(Field::new(
5832            "D",
5833            DataType::Struct(vec![Arc::clone(&field_e), Arc::clone(&field_f)].into()),
5834            false,
5835        ));
5836
5837        let struct_array = StructArray::from(vec![
5838            (
5839                Arc::clone(&field_e),
5840                Arc::new(Int16Array::from(vec![2])) as ArrayRef,
5841            ),
5842            (
5843                Arc::clone(&field_f),
5844                Arc::new(Int64Array::from(vec![3])) as ArrayRef,
5845            ),
5846        ]);
5847
5848        let struct_array = StructArray::from(vec![
5849            (
5850                Arc::clone(&field_a),
5851                Arc::new(Int32Array::from(vec![23])) as ArrayRef,
5852            ),
5853            (
5854                Arc::clone(&field_b),
5855                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
5856            ),
5857            (
5858                Arc::clone(&field_c),
5859                Arc::new(StringArray::from(vec!["Hello"])) as ArrayRef,
5860            ),
5861            (Arc::clone(&field_d), Arc::new(struct_array) as ArrayRef),
5862        ]);
5863        let scalar = ScalarValue::Struct(Arc::new(struct_array));
5864
5865        let array = scalar
5866            .to_array_of_size(2)
5867            .expect("Failed to convert to array of size");
5868
5869        let expected = Arc::new(StructArray::from(vec![
5870            (
5871                Arc::clone(&field_a),
5872                Arc::new(Int32Array::from(vec![23, 23])) as ArrayRef,
5873            ),
5874            (
5875                Arc::clone(&field_b),
5876                Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
5877            ),
5878            (
5879                Arc::clone(&field_c),
5880                Arc::new(StringArray::from(vec!["Hello", "Hello"])) as ArrayRef,
5881            ),
5882            (
5883                Arc::clone(&field_d),
5884                Arc::new(StructArray::from(vec![
5885                    (
5886                        Arc::clone(&field_e),
5887                        Arc::new(Int16Array::from(vec![2, 2])) as ArrayRef,
5888                    ),
5889                    (
5890                        Arc::clone(&field_f),
5891                        Arc::new(Int64Array::from(vec![3, 3])) as ArrayRef,
5892                    ),
5893                ])) as ArrayRef,
5894            ),
5895        ])) as ArrayRef;
5896
5897        assert_eq!(&array, &expected);
5898
5899        // Construct from second element of ArrayRef
5900        let constructed = ScalarValue::try_from_array(&expected, 1).unwrap();
5901        assert_eq!(constructed, scalar);
5902
5903        // None version
5904        let none_scalar = ScalarValue::try_from(array.data_type()).unwrap();
5905        assert!(none_scalar.is_null());
5906        assert_eq!(
5907            format!("{none_scalar:?}"),
5908            String::from("Struct({A:,B:,C:,D:})")
5909        );
5910
5911        // Construct with convenience From<Vec<(&str, ScalarValue)>>
5912        let constructed = ScalarValue::from(vec![
5913            ("A", ScalarValue::from(23)),
5914            ("B", ScalarValue::from(false)),
5915            ("C", ScalarValue::from("Hello")),
5916            (
5917                "D",
5918                ScalarValue::from(vec![
5919                    ("e", ScalarValue::from(2i16)),
5920                    ("f", ScalarValue::from(3i64)),
5921                ]),
5922            ),
5923        ]);
5924        assert_eq!(constructed, scalar);
5925
5926        // Build Array from Vec of structs
5927        let scalars = vec![
5928            ScalarValue::from(vec![
5929                ("A", ScalarValue::from(23)),
5930                ("B", ScalarValue::from(false)),
5931                ("C", ScalarValue::from("Hello")),
5932                (
5933                    "D",
5934                    ScalarValue::from(vec![
5935                        ("e", ScalarValue::from(2i16)),
5936                        ("f", ScalarValue::from(3i64)),
5937                    ]),
5938                ),
5939            ]),
5940            ScalarValue::from(vec![
5941                ("A", ScalarValue::from(7)),
5942                ("B", ScalarValue::from(true)),
5943                ("C", ScalarValue::from("World")),
5944                (
5945                    "D",
5946                    ScalarValue::from(vec![
5947                        ("e", ScalarValue::from(4i16)),
5948                        ("f", ScalarValue::from(5i64)),
5949                    ]),
5950                ),
5951            ]),
5952            ScalarValue::from(vec![
5953                ("A", ScalarValue::from(-1000)),
5954                ("B", ScalarValue::from(true)),
5955                ("C", ScalarValue::from("!!!!!")),
5956                (
5957                    "D",
5958                    ScalarValue::from(vec![
5959                        ("e", ScalarValue::from(6i16)),
5960                        ("f", ScalarValue::from(7i64)),
5961                    ]),
5962                ),
5963            ]),
5964        ];
5965        let array = ScalarValue::iter_to_array(scalars).unwrap();
5966
5967        let expected = Arc::new(StructArray::from(vec![
5968            (
5969                Arc::clone(&field_a),
5970                Arc::new(Int32Array::from(vec![23, 7, -1000])) as ArrayRef,
5971            ),
5972            (
5973                Arc::clone(&field_b),
5974                Arc::new(BooleanArray::from(vec![false, true, true])) as ArrayRef,
5975            ),
5976            (
5977                Arc::clone(&field_c),
5978                Arc::new(StringArray::from(vec!["Hello", "World", "!!!!!"])) as ArrayRef,
5979            ),
5980            (
5981                Arc::clone(&field_d),
5982                Arc::new(StructArray::from(vec![
5983                    (
5984                        Arc::clone(&field_e),
5985                        Arc::new(Int16Array::from(vec![2, 4, 6])) as ArrayRef,
5986                    ),
5987                    (
5988                        Arc::clone(&field_f),
5989                        Arc::new(Int64Array::from(vec![3, 5, 7])) as ArrayRef,
5990                    ),
5991                ])) as ArrayRef,
5992            ),
5993        ])) as ArrayRef;
5994
5995        assert_eq!(&array, &expected);
5996    }
5997
5998    #[test]
5999    fn round_trip() {
6000        // Each array type should be able to round tripped through a scalar
6001        let cases: Vec<ArrayRef> = vec![
6002            // int
6003            Arc::new(Int8Array::from(vec![Some(1), None, Some(3)])),
6004            Arc::new(Int16Array::from(vec![Some(1), None, Some(3)])),
6005            Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
6006            Arc::new(Int64Array::from(vec![Some(1), None, Some(3)])),
6007            Arc::new(UInt8Array::from(vec![Some(1), None, Some(3)])),
6008            Arc::new(UInt16Array::from(vec![Some(1), None, Some(3)])),
6009            Arc::new(UInt32Array::from(vec![Some(1), None, Some(3)])),
6010            Arc::new(UInt64Array::from(vec![Some(1), None, Some(3)])),
6011            // bool
6012            Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)])),
6013            // float
6014            Arc::new(Float32Array::from(vec![Some(1.0), None, Some(3.0)])),
6015            Arc::new(Float64Array::from(vec![Some(1.0), None, Some(3.0)])),
6016            // string array
6017            Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")])),
6018            Arc::new(LargeStringArray::from(vec![Some("foo"), None, Some("bar")])),
6019            Arc::new(StringViewArray::from(vec![Some("foo"), None, Some("bar")])),
6020            // string dictionary
6021            {
6022                let mut builder = StringDictionaryBuilder::<Int32Type>::new();
6023                builder.append("foo").unwrap();
6024                builder.append_null();
6025                builder.append("bar").unwrap();
6026                Arc::new(builder.finish())
6027            },
6028            // binary array
6029            Arc::new(BinaryArray::from_iter(vec![
6030                Some(b"foo"),
6031                None,
6032                Some(b"bar"),
6033            ])),
6034            Arc::new(LargeBinaryArray::from_iter(vec![
6035                Some(b"foo"),
6036                None,
6037                Some(b"bar"),
6038            ])),
6039            Arc::new(BinaryViewArray::from_iter(vec![
6040                Some(b"foo"),
6041                None,
6042                Some(b"bar"),
6043            ])),
6044            // timestamp
6045            Arc::new(TimestampSecondArray::from(vec![Some(1), None, Some(3)])),
6046            Arc::new(TimestampMillisecondArray::from(vec![
6047                Some(1),
6048                None,
6049                Some(3),
6050            ])),
6051            Arc::new(TimestampMicrosecondArray::from(vec![
6052                Some(1),
6053                None,
6054                Some(3),
6055            ])),
6056            Arc::new(TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])),
6057            // timestamp with timezone
6058            Arc::new(
6059                TimestampSecondArray::from(vec![Some(1), None, Some(3)])
6060                    .with_timezone_opt(Some("UTC")),
6061            ),
6062            Arc::new(
6063                TimestampMillisecondArray::from(vec![Some(1), None, Some(3)])
6064                    .with_timezone_opt(Some("UTC")),
6065            ),
6066            Arc::new(
6067                TimestampMicrosecondArray::from(vec![Some(1), None, Some(3)])
6068                    .with_timezone_opt(Some("UTC")),
6069            ),
6070            Arc::new(
6071                TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])
6072                    .with_timezone_opt(Some("UTC")),
6073            ),
6074            // date
6075            Arc::new(Date32Array::from(vec![Some(1), None, Some(3)])),
6076            Arc::new(Date64Array::from(vec![Some(1), None, Some(3)])),
6077            // time
6078            Arc::new(Time32SecondArray::from(vec![Some(1), None, Some(3)])),
6079            Arc::new(Time32MillisecondArray::from(vec![Some(1), None, Some(3)])),
6080            Arc::new(Time64MicrosecondArray::from(vec![Some(1), None, Some(3)])),
6081            Arc::new(Time64NanosecondArray::from(vec![Some(1), None, Some(3)])),
6082            // null array
6083            Arc::new(NullArray::new(3)),
6084            // dense union
6085            {
6086                let mut builder = UnionBuilder::new_dense();
6087                builder.append::<Int32Type>("a", 1).unwrap();
6088                builder.append::<Float64Type>("b", 3.4).unwrap();
6089                Arc::new(builder.build().unwrap())
6090            },
6091            // sparse union
6092            {
6093                let mut builder = UnionBuilder::new_sparse();
6094                builder.append::<Int32Type>("a", 1).unwrap();
6095                builder.append::<Float64Type>("b", 3.4).unwrap();
6096                Arc::new(builder.build().unwrap())
6097            },
6098            // list array
6099            {
6100                let values_builder = StringBuilder::new();
6101                let mut builder = ListBuilder::new(values_builder);
6102                // [A, B]
6103                builder.values().append_value("A");
6104                builder.values().append_value("B");
6105                builder.append(true);
6106                // [ ] (empty list)
6107                builder.append(true);
6108                // Null
6109                builder.values().append_value("?"); // irrelevant
6110                builder.append(false);
6111                Arc::new(builder.finish())
6112            },
6113            // large list array
6114            {
6115                let values_builder = StringBuilder::new();
6116                let mut builder = LargeListBuilder::new(values_builder);
6117                // [A, B]
6118                builder.values().append_value("A");
6119                builder.values().append_value("B");
6120                builder.append(true);
6121                // [ ] (empty list)
6122                builder.append(true);
6123                // Null
6124                builder.append(false);
6125                Arc::new(builder.finish())
6126            },
6127            // fixed size list array
6128            {
6129                let values_builder = Int32Builder::new();
6130                let mut builder = FixedSizeListBuilder::new(values_builder, 3);
6131
6132                //  [[0, 1, 2], null, [3, null, 5]
6133                builder.values().append_value(0);
6134                builder.values().append_value(1);
6135                builder.values().append_value(2);
6136                builder.append(true);
6137                builder.values().append_null();
6138                builder.values().append_null();
6139                builder.values().append_null();
6140                builder.append(false);
6141                builder.values().append_value(3);
6142                builder.values().append_null();
6143                builder.values().append_value(5);
6144                builder.append(true);
6145                Arc::new(builder.finish())
6146            },
6147            // map
6148            {
6149                let string_builder = StringBuilder::new();
6150                let int_builder = Int32Builder::with_capacity(4);
6151
6152                let mut builder = MapBuilder::new(None, string_builder, int_builder);
6153                // {"joe": 1}
6154                builder.keys().append_value("joe");
6155                builder.values().append_value(1);
6156                builder.append(true).unwrap();
6157                // {}
6158                builder.append(true).unwrap();
6159                // null
6160                builder.append(false).unwrap();
6161
6162                Arc::new(builder.finish())
6163            },
6164        ];
6165
6166        for arr in cases {
6167            round_trip_through_scalar(arr);
6168        }
6169    }
6170
6171    /// for each row in `arr`:
6172    /// 1. convert to a `ScalarValue`
6173    /// 2. Convert `ScalarValue` back to an `ArrayRef`
6174    /// 3. Compare the original array (sliced) and new array for equality
6175    fn round_trip_through_scalar(arr: ArrayRef) {
6176        for i in 0..arr.len() {
6177            // convert Scalar --> Array
6178            let scalar = ScalarValue::try_from_array(&arr, i).unwrap();
6179            let array = scalar.to_array_of_size(1).unwrap();
6180            assert_eq!(array.len(), 1);
6181            assert_eq!(array.data_type(), arr.data_type());
6182            assert_eq!(array.as_ref(), arr.slice(i, 1).as_ref());
6183        }
6184    }
6185
6186    #[test]
6187    fn test_scalar_union_sparse() {
6188        let field_a = Arc::new(Field::new("A", DataType::Int32, true));
6189        let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
6190        let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
6191        let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
6192
6193        let mut values_a = vec![None; 6];
6194        values_a[0] = Some(42);
6195        let mut values_b = vec![None; 6];
6196        values_b[1] = Some(true);
6197        let mut values_c = vec![None; 6];
6198        values_c[2] = Some("foo");
6199        let children: Vec<ArrayRef> = vec![
6200            Arc::new(Int32Array::from(values_a)),
6201            Arc::new(BooleanArray::from(values_b)),
6202            Arc::new(StringArray::from(values_c)),
6203        ];
6204
6205        let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
6206        let array: ArrayRef = Arc::new(
6207            UnionArray::try_new(fields.clone(), type_ids, None, children)
6208                .expect("UnionArray"),
6209        );
6210
6211        let expected = [
6212            (0, ScalarValue::from(42)),
6213            (1, ScalarValue::from(true)),
6214            (2, ScalarValue::from("foo")),
6215            (0, ScalarValue::Int32(None)),
6216            (1, ScalarValue::Boolean(None)),
6217            (2, ScalarValue::Utf8(None)),
6218        ];
6219
6220        for (i, (ti, value)) in expected.into_iter().enumerate() {
6221            let is_null = value.is_null();
6222            let value = Some((ti, Box::new(value)));
6223            let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Sparse);
6224            let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
6225
6226            assert_eq!(
6227                actual, expected,
6228                "[{i}] {actual} was not equal to {expected}"
6229            );
6230
6231            assert!(
6232                expected.eq_array(&array, i).expect("eq_array"),
6233                "[{i}] {expected}.eq_array was false"
6234            );
6235
6236            if is_null {
6237                assert!(actual.is_null(), "[{i}] {actual} was not null")
6238            }
6239        }
6240    }
6241
6242    #[test]
6243    fn test_scalar_union_dense() {
6244        let field_a = Arc::new(Field::new("A", DataType::Int32, true));
6245        let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
6246        let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
6247        let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
6248        let children: Vec<ArrayRef> = vec![
6249            Arc::new(Int32Array::from(vec![Some(42), None])),
6250            Arc::new(BooleanArray::from(vec![Some(true), None])),
6251            Arc::new(StringArray::from(vec![Some("foo"), None])),
6252        ];
6253
6254        let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
6255        let offsets = ScalarBuffer::from(vec![0, 0, 0, 1, 1, 1]);
6256        let array: ArrayRef = Arc::new(
6257            UnionArray::try_new(fields.clone(), type_ids, Some(offsets), children)
6258                .expect("UnionArray"),
6259        );
6260
6261        let expected = [
6262            (0, ScalarValue::from(42)),
6263            (1, ScalarValue::from(true)),
6264            (2, ScalarValue::from("foo")),
6265            (0, ScalarValue::Int32(None)),
6266            (1, ScalarValue::Boolean(None)),
6267            (2, ScalarValue::Utf8(None)),
6268        ];
6269
6270        for (i, (ti, value)) in expected.into_iter().enumerate() {
6271            let is_null = value.is_null();
6272            let value = Some((ti, Box::new(value)));
6273            let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Dense);
6274            let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
6275
6276            assert_eq!(
6277                actual, expected,
6278                "[{i}] {actual} was not equal to {expected}"
6279            );
6280
6281            assert!(
6282                expected.eq_array(&array, i).expect("eq_array"),
6283                "[{i}] {expected}.eq_array was false"
6284            );
6285
6286            if is_null {
6287                assert!(actual.is_null(), "[{i}] {actual} was not null")
6288            }
6289        }
6290    }
6291
6292    #[test]
6293    fn test_lists_in_struct() {
6294        let field_a = Arc::new(Field::new("A", DataType::Utf8, false));
6295        let field_primitive_list = Arc::new(Field::new(
6296            "primitive_list",
6297            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6298            false,
6299        ));
6300
6301        // Define primitive list scalars
6302        let l0 =
6303            ScalarValue::List(Arc::new(
6304                ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
6305                    Some(1),
6306                    Some(2),
6307                    Some(3),
6308                ])]),
6309            ));
6310        let l1 =
6311            ScalarValue::List(Arc::new(
6312                ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
6313                    Some(4),
6314                    Some(5),
6315                ])]),
6316            ));
6317        let l2 = ScalarValue::List(Arc::new(ListArray::from_iter_primitive::<
6318            Int32Type,
6319            _,
6320            _,
6321        >(vec![Some(vec![Some(6)])])));
6322
6323        // Define struct scalars
6324        let s0 = ScalarValue::from(vec![
6325            ("A", ScalarValue::from("First")),
6326            ("primitive_list", l0),
6327        ]);
6328
6329        let s1 = ScalarValue::from(vec![
6330            ("A", ScalarValue::from("Second")),
6331            ("primitive_list", l1),
6332        ]);
6333
6334        let s2 = ScalarValue::from(vec![
6335            ("A", ScalarValue::from("Third")),
6336            ("primitive_list", l2),
6337        ]);
6338
6339        // iter_to_array for struct scalars
6340        let array =
6341            ScalarValue::iter_to_array(vec![s0.clone(), s1.clone(), s2.clone()]).unwrap();
6342
6343        let array = as_struct_array(&array).unwrap();
6344        let expected = StructArray::from(vec![
6345            (
6346                Arc::clone(&field_a),
6347                Arc::new(StringArray::from(vec!["First", "Second", "Third"])) as ArrayRef,
6348            ),
6349            (
6350                Arc::clone(&field_primitive_list),
6351                Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
6352                    Some(vec![Some(1), Some(2), Some(3)]),
6353                    Some(vec![Some(4), Some(5)]),
6354                    Some(vec![Some(6)]),
6355                ])),
6356            ),
6357        ]);
6358
6359        assert_eq!(array, &expected);
6360
6361        // Define list-of-structs scalars
6362
6363        let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap();
6364        let nl0 = SingleRowListArrayBuilder::new(nl0_array).build_list_scalar();
6365
6366        let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap();
6367        let nl1 = SingleRowListArrayBuilder::new(nl1_array).build_list_scalar();
6368
6369        let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap();
6370        let nl2 = SingleRowListArrayBuilder::new(nl2_array).build_list_scalar();
6371
6372        // iter_to_array for list-of-struct
6373        let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap();
6374        let array = array.as_list::<i32>();
6375
6376        // Construct expected array with array builders
6377        let field_a_builder = StringBuilder::with_capacity(4, 1024);
6378        let primitive_value_builder = Int32Array::builder(8);
6379        let field_primitive_list_builder = ListBuilder::new(primitive_value_builder);
6380
6381        let element_builder = StructBuilder::new(
6382            vec![field_a, field_primitive_list],
6383            vec![
6384                Box::new(field_a_builder),
6385                Box::new(field_primitive_list_builder),
6386            ],
6387        );
6388
6389        let mut list_builder = ListBuilder::new(element_builder);
6390
6391        list_builder
6392            .values()
6393            .field_builder::<StringBuilder>(0)
6394            .unwrap()
6395            .append_value("First");
6396        list_builder
6397            .values()
6398            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6399            .unwrap()
6400            .values()
6401            .append_value(1);
6402        list_builder
6403            .values()
6404            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6405            .unwrap()
6406            .values()
6407            .append_value(2);
6408        list_builder
6409            .values()
6410            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6411            .unwrap()
6412            .values()
6413            .append_value(3);
6414        list_builder
6415            .values()
6416            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6417            .unwrap()
6418            .append(true);
6419        list_builder.values().append(true);
6420
6421        list_builder
6422            .values()
6423            .field_builder::<StringBuilder>(0)
6424            .unwrap()
6425            .append_value("Second");
6426        list_builder
6427            .values()
6428            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6429            .unwrap()
6430            .values()
6431            .append_value(4);
6432        list_builder
6433            .values()
6434            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6435            .unwrap()
6436            .values()
6437            .append_value(5);
6438        list_builder
6439            .values()
6440            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6441            .unwrap()
6442            .append(true);
6443        list_builder.values().append(true);
6444        list_builder.append(true);
6445
6446        list_builder
6447            .values()
6448            .field_builder::<StringBuilder>(0)
6449            .unwrap()
6450            .append_value("Third");
6451        list_builder
6452            .values()
6453            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6454            .unwrap()
6455            .values()
6456            .append_value(6);
6457        list_builder
6458            .values()
6459            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6460            .unwrap()
6461            .append(true);
6462        list_builder.values().append(true);
6463        list_builder.append(true);
6464
6465        list_builder
6466            .values()
6467            .field_builder::<StringBuilder>(0)
6468            .unwrap()
6469            .append_value("Second");
6470        list_builder
6471            .values()
6472            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6473            .unwrap()
6474            .values()
6475            .append_value(4);
6476        list_builder
6477            .values()
6478            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6479            .unwrap()
6480            .values()
6481            .append_value(5);
6482        list_builder
6483            .values()
6484            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
6485            .unwrap()
6486            .append(true);
6487        list_builder.values().append(true);
6488        list_builder.append(true);
6489
6490        let expected = list_builder.finish();
6491
6492        assert_eq!(array, &expected);
6493    }
6494
6495    fn build_2d_list(data: Vec<Option<i32>>) -> ListArray {
6496        let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(data)]);
6497        ListArray::new(
6498            Arc::new(Field::new_list_field(
6499                DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6500                true,
6501            )),
6502            OffsetBuffer::<i32>::from_lengths([1]),
6503            Arc::new(a1),
6504            None,
6505        )
6506    }
6507
6508    #[test]
6509    fn test_nested_lists() {
6510        // Define inner list scalars
6511        let arr1 = build_2d_list(vec![Some(1), Some(2), Some(3)]);
6512        let arr2 = build_2d_list(vec![Some(4), Some(5)]);
6513        let arr3 = build_2d_list(vec![Some(6)]);
6514
6515        let array = ScalarValue::iter_to_array(vec![
6516            ScalarValue::List(Arc::new(arr1)),
6517            ScalarValue::List(Arc::new(arr2)),
6518            ScalarValue::List(Arc::new(arr3)),
6519        ])
6520        .unwrap();
6521        let array = array.as_list::<i32>();
6522
6523        // Construct expected array with array builders
6524        let inner_builder = Int32Array::builder(6);
6525        let middle_builder = ListBuilder::new(inner_builder);
6526        let mut outer_builder = ListBuilder::new(middle_builder);
6527
6528        outer_builder.values().values().append_value(1);
6529        outer_builder.values().values().append_value(2);
6530        outer_builder.values().values().append_value(3);
6531        outer_builder.values().append(true);
6532        outer_builder.append(true);
6533
6534        outer_builder.values().values().append_value(4);
6535        outer_builder.values().values().append_value(5);
6536        outer_builder.values().append(true);
6537        outer_builder.append(true);
6538
6539        outer_builder.values().values().append_value(6);
6540        outer_builder.values().append(true);
6541        outer_builder.append(true);
6542
6543        let expected = outer_builder.finish();
6544
6545        assert_eq!(array, &expected);
6546    }
6547
6548    #[test]
6549    fn scalar_timestamp_ns_utc_timezone() {
6550        let scalar = ScalarValue::TimestampNanosecond(
6551            Some(1599566400000000000),
6552            Some("UTC".into()),
6553        );
6554
6555        assert_eq!(
6556            scalar.data_type(),
6557            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6558        );
6559
6560        let array = scalar.to_array().expect("Failed to convert to array");
6561        assert_eq!(array.len(), 1);
6562        assert_eq!(
6563            array.data_type(),
6564            &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6565        );
6566
6567        let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
6568        assert_eq!(
6569            new_scalar.data_type(),
6570            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
6571        );
6572    }
6573
6574    #[test]
6575    fn cast_round_trip() {
6576        check_scalar_cast(ScalarValue::Int8(Some(5)), DataType::Int16);
6577        check_scalar_cast(ScalarValue::Int8(None), DataType::Int16);
6578
6579        check_scalar_cast(ScalarValue::Float64(Some(5.5)), DataType::Int16);
6580
6581        check_scalar_cast(ScalarValue::Float64(None), DataType::Int16);
6582
6583        check_scalar_cast(
6584            ScalarValue::from("foo"),
6585            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
6586        );
6587
6588        check_scalar_cast(
6589            ScalarValue::Utf8(None),
6590            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
6591        );
6592
6593        check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
6594        check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
6595        check_scalar_cast(
6596            ScalarValue::from("larger than 12 bytes string"),
6597            DataType::Utf8View,
6598        );
6599        check_scalar_cast(
6600            {
6601                let element_field =
6602                    Arc::new(Field::new("element", DataType::Int32, true));
6603
6604                let mut builder =
6605                    ListBuilder::new(Int32Builder::new()).with_field(element_field);
6606                builder.append_value([Some(1)]);
6607                builder.append(true);
6608
6609                ScalarValue::List(Arc::new(builder.finish()))
6610            },
6611            DataType::List(Arc::new(Field::new("element", DataType::Int64, true))),
6612        );
6613        check_scalar_cast(
6614            {
6615                let element_field =
6616                    Arc::new(Field::new("element", DataType::Int32, true));
6617
6618                let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 1)
6619                    .with_field(element_field);
6620                builder.values().append_value(1);
6621                builder.append(true);
6622
6623                ScalarValue::FixedSizeList(Arc::new(builder.finish()))
6624            },
6625            DataType::FixedSizeList(
6626                Arc::new(Field::new("element", DataType::Int64, true)),
6627                1,
6628            ),
6629        );
6630        check_scalar_cast(
6631            {
6632                let element_field =
6633                    Arc::new(Field::new("element", DataType::Int32, true));
6634
6635                let mut builder =
6636                    LargeListBuilder::new(Int32Builder::new()).with_field(element_field);
6637                builder.append_value([Some(1)]);
6638                builder.append(true);
6639
6640                ScalarValue::LargeList(Arc::new(builder.finish()))
6641            },
6642            DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))),
6643        );
6644    }
6645
6646    // mimics how casting work on scalar values by `casting` `scalar` to `desired_type`
6647    fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) {
6648        // convert from scalar --> Array to call cast
6649        let scalar_array = scalar.to_array().expect("Failed to convert to array");
6650        // cast the actual value
6651        let cast_array = kernels::cast::cast(&scalar_array, &desired_type).unwrap();
6652
6653        // turn it back to a scalar
6654        let cast_scalar = ScalarValue::try_from_array(&cast_array, 0).unwrap();
6655        assert_eq!(cast_scalar.data_type(), desired_type);
6656
6657        // Some time later the "cast" scalar is turned back into an array:
6658        let array = cast_scalar
6659            .to_array_of_size(10)
6660            .expect("Failed to convert to array of size");
6661
6662        // The datatype should be "Dictionary" but is actually Utf8!!!
6663        assert_eq!(array.data_type(), &desired_type)
6664    }
6665
6666    #[test]
6667    fn test_scalar_negative() -> Result<()> {
6668        // positive test
6669        let value = ScalarValue::Int32(Some(12));
6670        assert_eq!(ScalarValue::Int32(Some(-12)), value.arithmetic_negate()?);
6671        let value = ScalarValue::Int32(None);
6672        assert_eq!(ScalarValue::Int32(None), value.arithmetic_negate()?);
6673
6674        // negative test
6675        let value = ScalarValue::UInt8(Some(12));
6676        assert!(value.arithmetic_negate().is_err());
6677        let value = ScalarValue::Boolean(None);
6678        assert!(value.arithmetic_negate().is_err());
6679        Ok(())
6680    }
6681
6682    #[test]
6683    #[allow(arithmetic_overflow)] // we want to test them
6684    fn test_scalar_negative_overflows() -> Result<()> {
6685        macro_rules! test_overflow_on_value {
6686            ($($val:expr),* $(,)?) => {$(
6687                {
6688                    let value: ScalarValue = $val;
6689                    let err = value.arithmetic_negate().expect_err("Should receive overflow error on negating {value:?}");
6690                    let root_err = err.find_root();
6691                    match  root_err{
6692                        DataFusionError::ArrowError(err, _) if matches!(err.as_ref(), ArrowError::ArithmeticOverflow(_)) => {}
6693                        _ => return Err(err),
6694                    };
6695                }
6696            )*};
6697        }
6698        test_overflow_on_value!(
6699            // the integers
6700            i8::MIN.into(),
6701            i16::MIN.into(),
6702            i32::MIN.into(),
6703            i64::MIN.into(),
6704            // for decimals, only value needs to be tested
6705            ScalarValue::try_new_decimal128(i128::MIN, 10, 5)?,
6706            ScalarValue::Decimal256(Some(i256::MIN), 20, 5),
6707            // interval, check all possible values
6708            ScalarValue::IntervalYearMonth(Some(i32::MIN)),
6709            ScalarValue::new_interval_dt(i32::MIN, 999),
6710            ScalarValue::new_interval_dt(1, i32::MIN),
6711            ScalarValue::new_interval_mdn(i32::MIN, 15, 123_456),
6712            ScalarValue::new_interval_mdn(12, i32::MIN, 123_456),
6713            ScalarValue::new_interval_mdn(12, 15, i64::MIN),
6714            // tz doesn't matter when negating
6715            ScalarValue::TimestampSecond(Some(i64::MIN), None),
6716            ScalarValue::TimestampMillisecond(Some(i64::MIN), None),
6717            ScalarValue::TimestampMicrosecond(Some(i64::MIN), None),
6718            ScalarValue::TimestampNanosecond(Some(i64::MIN), None),
6719        );
6720
6721        let float_cases = [
6722            (
6723                ScalarValue::Float16(Some(f16::MIN)),
6724                ScalarValue::Float16(Some(f16::MAX)),
6725            ),
6726            (
6727                ScalarValue::Float16(Some(f16::MAX)),
6728                ScalarValue::Float16(Some(f16::MIN)),
6729            ),
6730            (f32::MIN.into(), f32::MAX.into()),
6731            (f32::MAX.into(), f32::MIN.into()),
6732            (f64::MIN.into(), f64::MAX.into()),
6733            (f64::MAX.into(), f64::MIN.into()),
6734        ];
6735        // skip float 16 because they aren't supported
6736        for (test, expected) in float_cases.into_iter().skip(2) {
6737            assert_eq!(test.arithmetic_negate()?, expected);
6738        }
6739        Ok(())
6740    }
6741
6742    #[test]
6743    fn f16_test_overflow() {
6744        // TODO: if negate supports f16, add these cases to `test_scalar_negative_overflows` test case
6745        let cases = [
6746            (
6747                ScalarValue::Float16(Some(f16::MIN)),
6748                ScalarValue::Float16(Some(f16::MAX)),
6749            ),
6750            (
6751                ScalarValue::Float16(Some(f16::MAX)),
6752                ScalarValue::Float16(Some(f16::MIN)),
6753            ),
6754        ];
6755
6756        for (test, expected) in cases {
6757            assert_eq!(test.arithmetic_negate().unwrap(), expected);
6758        }
6759    }
6760
6761    macro_rules! expect_operation_error {
6762        ($TEST_NAME:ident, $FUNCTION:ident, $EXPECTED_ERROR:expr) => {
6763            #[test]
6764            fn $TEST_NAME() {
6765                let lhs = ScalarValue::UInt64(Some(12));
6766                let rhs = ScalarValue::Int32(Some(-3));
6767                match lhs.$FUNCTION(&rhs) {
6768                    Ok(_result) => {
6769                        panic!(
6770                            "Expected binary operation error between lhs: '{:?}', rhs: {:?}",
6771                            lhs, rhs
6772                        );
6773                    }
6774                    Err(e) => {
6775                        let error_message = e.to_string();
6776                        assert!(
6777                            error_message.contains($EXPECTED_ERROR),
6778                            "Expected error '{}' not found in actual error '{}'",
6779                            $EXPECTED_ERROR,
6780                            error_message
6781                        );
6782                    }
6783                }
6784            }
6785        };
6786    }
6787
6788    expect_operation_error!(
6789        expect_add_error,
6790        add,
6791        "Invalid arithmetic operation: UInt64 + Int32"
6792    );
6793    expect_operation_error!(
6794        expect_sub_error,
6795        sub,
6796        "Invalid arithmetic operation: UInt64 - Int32"
6797    );
6798
6799    macro_rules! decimal_op_test_cases {
6800    ($OPERATION:ident, [$([$L_VALUE:expr, $L_PRECISION:expr, $L_SCALE:expr, $R_VALUE:expr, $R_PRECISION:expr, $R_SCALE:expr, $O_VALUE:expr, $O_PRECISION:expr, $O_SCALE:expr]),+]) => {
6801            $(
6802
6803                let left = ScalarValue::Decimal128($L_VALUE, $L_PRECISION, $L_SCALE);
6804                let right = ScalarValue::Decimal128($R_VALUE, $R_PRECISION, $R_SCALE);
6805                let result = left.$OPERATION(&right).unwrap();
6806                assert_eq!(ScalarValue::Decimal128($O_VALUE, $O_PRECISION, $O_SCALE), result);
6807
6808            )+
6809        };
6810    }
6811
6812    #[test]
6813    fn decimal_operations() {
6814        decimal_op_test_cases!(
6815            add,
6816            [
6817                [Some(123), 10, 2, Some(124), 10, 2, Some(123 + 124), 11, 2],
6818                // test sum decimal with diff scale
6819                [
6820                    Some(123),
6821                    10,
6822                    3,
6823                    Some(124),
6824                    10,
6825                    2,
6826                    Some(123 + 124 * 10_i128.pow(1)),
6827                    12,
6828                    3
6829                ],
6830                // diff precision and scale for decimal data type
6831                [
6832                    Some(123),
6833                    10,
6834                    2,
6835                    Some(124),
6836                    11,
6837                    3,
6838                    Some(123 * 10_i128.pow(3 - 2) + 124),
6839                    12,
6840                    3
6841                ]
6842            ]
6843        );
6844    }
6845
6846    #[test]
6847    fn decimal_operations_with_nulls() {
6848        decimal_op_test_cases!(
6849            add,
6850            [
6851                // Case: (None, Some, 0)
6852                [None, 10, 2, Some(123), 10, 2, None, 11, 2],
6853                // Case: (Some, None, 0)
6854                [Some(123), 10, 2, None, 10, 2, None, 11, 2],
6855                // Case: (Some, None, _) + Side=False
6856                [Some(123), 8, 2, None, 10, 3, None, 11, 3],
6857                // Case: (None, Some, _) + Side=False
6858                [None, 8, 2, Some(123), 10, 3, None, 11, 3],
6859                // Case: (Some, None, _) + Side=True
6860                [Some(123), 8, 4, None, 10, 3, None, 12, 4],
6861                // Case: (None, Some, _) + Side=True
6862                [None, 10, 3, Some(123), 8, 4, None, 12, 4]
6863            ]
6864        );
6865    }
6866
6867    #[test]
6868    fn test_scalar_distance() {
6869        let cases = [
6870            // scalar (lhs), scalar (rhs), expected distance
6871            // ---------------------------------------------
6872            (ScalarValue::Int8(Some(1)), ScalarValue::Int8(Some(2)), 1),
6873            (ScalarValue::Int8(Some(2)), ScalarValue::Int8(Some(1)), 1),
6874            (
6875                ScalarValue::Int16(Some(-5)),
6876                ScalarValue::Int16(Some(5)),
6877                10,
6878            ),
6879            (
6880                ScalarValue::Int16(Some(5)),
6881                ScalarValue::Int16(Some(-5)),
6882                10,
6883            ),
6884            (ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(0)), 0),
6885            (
6886                ScalarValue::Int32(Some(-5)),
6887                ScalarValue::Int32(Some(-10)),
6888                5,
6889            ),
6890            (
6891                ScalarValue::Int64(Some(-10)),
6892                ScalarValue::Int64(Some(-5)),
6893                5,
6894            ),
6895            (ScalarValue::UInt8(Some(1)), ScalarValue::UInt8(Some(2)), 1),
6896            (ScalarValue::UInt8(Some(0)), ScalarValue::UInt8(Some(0)), 0),
6897            (
6898                ScalarValue::UInt16(Some(5)),
6899                ScalarValue::UInt16(Some(10)),
6900                5,
6901            ),
6902            (
6903                ScalarValue::UInt32(Some(10)),
6904                ScalarValue::UInt32(Some(5)),
6905                5,
6906            ),
6907            (
6908                ScalarValue::UInt64(Some(5)),
6909                ScalarValue::UInt64(Some(10)),
6910                5,
6911            ),
6912            (
6913                ScalarValue::Float16(Some(f16::from_f32(1.1))),
6914                ScalarValue::Float16(Some(f16::from_f32(1.9))),
6915                1,
6916            ),
6917            (
6918                ScalarValue::Float16(Some(f16::from_f32(-5.3))),
6919                ScalarValue::Float16(Some(f16::from_f32(-9.2))),
6920                4,
6921            ),
6922            (
6923                ScalarValue::Float16(Some(f16::from_f32(-5.3))),
6924                ScalarValue::Float16(Some(f16::from_f32(-9.7))),
6925                4,
6926            ),
6927            (
6928                ScalarValue::Float32(Some(1.0)),
6929                ScalarValue::Float32(Some(2.0)),
6930                1,
6931            ),
6932            (
6933                ScalarValue::Float32(Some(2.0)),
6934                ScalarValue::Float32(Some(1.0)),
6935                1,
6936            ),
6937            (
6938                ScalarValue::Float64(Some(0.0)),
6939                ScalarValue::Float64(Some(0.0)),
6940                0,
6941            ),
6942            (
6943                ScalarValue::Float64(Some(-5.0)),
6944                ScalarValue::Float64(Some(-10.0)),
6945                5,
6946            ),
6947            (
6948                ScalarValue::Float64(Some(-10.0)),
6949                ScalarValue::Float64(Some(-5.0)),
6950                5,
6951            ),
6952            // Floats are currently special cased to f64/f32 and the result is rounded
6953            // rather than ceiled/floored. In the future we might want to take a mode
6954            // which specified the rounding behavior.
6955            (
6956                ScalarValue::Float32(Some(1.2)),
6957                ScalarValue::Float32(Some(1.3)),
6958                0,
6959            ),
6960            (
6961                ScalarValue::Float32(Some(1.1)),
6962                ScalarValue::Float32(Some(1.9)),
6963                1,
6964            ),
6965            (
6966                ScalarValue::Float64(Some(-5.3)),
6967                ScalarValue::Float64(Some(-9.2)),
6968                4,
6969            ),
6970            (
6971                ScalarValue::Float64(Some(-5.3)),
6972                ScalarValue::Float64(Some(-9.7)),
6973                4,
6974            ),
6975            (
6976                ScalarValue::Float64(Some(-5.3)),
6977                ScalarValue::Float64(Some(-9.9)),
6978                5,
6979            ),
6980        ];
6981        for (lhs, rhs, expected) in cases.iter() {
6982            let distance = lhs.distance(rhs).unwrap();
6983            assert_eq!(distance, *expected);
6984        }
6985    }
6986
6987    #[test]
6988    fn test_scalar_distance_invalid() {
6989        let cases = [
6990            // scalar (lhs), scalar (rhs)
6991            // --------------------------
6992            // Same type but with nulls
6993            (ScalarValue::Int8(None), ScalarValue::Int8(None)),
6994            (ScalarValue::Int8(None), ScalarValue::Int8(Some(1))),
6995            (ScalarValue::Int8(Some(1)), ScalarValue::Int8(None)),
6996            // Different type
6997            (ScalarValue::Int8(Some(1)), ScalarValue::Int16(Some(1))),
6998            (ScalarValue::Int8(Some(1)), ScalarValue::Float32(Some(1.0))),
6999            (
7000                ScalarValue::Float16(Some(f16::from_f32(1.0))),
7001                ScalarValue::Float32(Some(1.0)),
7002            ),
7003            (
7004                ScalarValue::Float16(Some(f16::from_f32(1.0))),
7005                ScalarValue::Int32(Some(1)),
7006            ),
7007            (
7008                ScalarValue::Float64(Some(1.1)),
7009                ScalarValue::Float32(Some(2.2)),
7010            ),
7011            (
7012                ScalarValue::UInt64(Some(777)),
7013                ScalarValue::Int32(Some(111)),
7014            ),
7015            // Different types with nulls
7016            (ScalarValue::Int8(None), ScalarValue::Int16(Some(1))),
7017            (ScalarValue::Int8(Some(1)), ScalarValue::Int16(None)),
7018            // Unsupported types
7019            (ScalarValue::from("foo"), ScalarValue::from("bar")),
7020            (
7021                ScalarValue::Boolean(Some(true)),
7022                ScalarValue::Boolean(Some(false)),
7023            ),
7024            (ScalarValue::Date32(Some(0)), ScalarValue::Date32(Some(1))),
7025            (ScalarValue::Date64(Some(0)), ScalarValue::Date64(Some(1))),
7026            (
7027                ScalarValue::Decimal128(Some(123), 5, 5),
7028                ScalarValue::Decimal128(Some(120), 5, 5),
7029            ),
7030        ];
7031        for (lhs, rhs) in cases {
7032            let distance = lhs.distance(&rhs);
7033            assert!(distance.is_none());
7034        }
7035    }
7036
7037    #[test]
7038    fn test_scalar_interval_negate() {
7039        let cases = [
7040            (
7041                ScalarValue::new_interval_ym(1, 12),
7042                ScalarValue::new_interval_ym(-1, -12),
7043            ),
7044            (
7045                ScalarValue::new_interval_dt(1, 999),
7046                ScalarValue::new_interval_dt(-1, -999),
7047            ),
7048            (
7049                ScalarValue::new_interval_mdn(12, 15, 123_456),
7050                ScalarValue::new_interval_mdn(-12, -15, -123_456),
7051            ),
7052        ];
7053        for (expr, expected) in cases.iter() {
7054            let result = expr.arithmetic_negate().unwrap();
7055            assert_eq!(*expected, result, "-expr:{expr:?}");
7056        }
7057    }
7058
7059    #[test]
7060    fn test_scalar_interval_add() {
7061        let cases = [
7062            (
7063                ScalarValue::new_interval_ym(1, 12),
7064                ScalarValue::new_interval_ym(1, 12),
7065                ScalarValue::new_interval_ym(2, 24),
7066            ),
7067            (
7068                ScalarValue::new_interval_dt(1, 999),
7069                ScalarValue::new_interval_dt(1, 999),
7070                ScalarValue::new_interval_dt(2, 1998),
7071            ),
7072            (
7073                ScalarValue::new_interval_mdn(12, 15, 123_456),
7074                ScalarValue::new_interval_mdn(12, 15, 123_456),
7075                ScalarValue::new_interval_mdn(24, 30, 246_912),
7076            ),
7077        ];
7078        for (lhs, rhs, expected) in cases.iter() {
7079            let result = lhs.add(rhs).unwrap();
7080            let result_commute = rhs.add(lhs).unwrap();
7081            assert_eq!(*expected, result, "lhs:{lhs:?} + rhs:{rhs:?}");
7082            assert_eq!(*expected, result_commute, "lhs:{rhs:?} + rhs:{lhs:?}");
7083        }
7084    }
7085
7086    #[test]
7087    fn test_scalar_interval_sub() {
7088        let cases = [
7089            (
7090                ScalarValue::new_interval_ym(1, 12),
7091                ScalarValue::new_interval_ym(1, 12),
7092                ScalarValue::new_interval_ym(0, 0),
7093            ),
7094            (
7095                ScalarValue::new_interval_dt(1, 999),
7096                ScalarValue::new_interval_dt(1, 999),
7097                ScalarValue::new_interval_dt(0, 0),
7098            ),
7099            (
7100                ScalarValue::new_interval_mdn(12, 15, 123_456),
7101                ScalarValue::new_interval_mdn(12, 15, 123_456),
7102                ScalarValue::new_interval_mdn(0, 0, 0),
7103            ),
7104        ];
7105        for (lhs, rhs, expected) in cases.iter() {
7106            let result = lhs.sub(rhs).unwrap();
7107            assert_eq!(*expected, result, "lhs:{lhs:?} - rhs:{rhs:?}");
7108        }
7109    }
7110
7111    #[test]
7112    fn timestamp_op_random_tests() {
7113        // timestamp1 + (or -) interval = timestamp2
7114        // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ?
7115        let sample_size = 1000;
7116        let timestamps1 = get_random_timestamps(sample_size);
7117        let intervals = get_random_intervals(sample_size);
7118        // ts(sec) + interval(ns) = ts(sec); however,
7119        // ts(sec) - ts(sec) cannot be = interval(ns). Therefore,
7120        // timestamps are more precise than intervals in tests.
7121        for (idx, ts1) in timestamps1.iter().enumerate() {
7122            if idx % 2 == 0 {
7123                let timestamp2 = ts1.add(intervals[idx].clone()).unwrap();
7124                let back = timestamp2.sub(intervals[idx].clone()).unwrap();
7125                assert_eq!(ts1, &back);
7126            } else {
7127                let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap();
7128                let back = timestamp2.add(intervals[idx].clone()).unwrap();
7129                assert_eq!(ts1, &back);
7130            };
7131        }
7132    }
7133
7134    #[test]
7135    fn test_struct_nulls() {
7136        let fields_b = Fields::from(vec![
7137            Field::new("ba", DataType::UInt64, true),
7138            Field::new("bb", DataType::UInt64, true),
7139        ]);
7140        let fields = Fields::from(vec![
7141            Field::new("a", DataType::UInt64, true),
7142            Field::new("b", DataType::Struct(fields_b.clone()), true),
7143        ]);
7144
7145        let struct_value = vec![
7146            (
7147                Arc::clone(&fields[0]),
7148                Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
7149            ),
7150            (
7151                Arc::clone(&fields[1]),
7152                Arc::new(StructArray::from(vec![
7153                    (
7154                        Arc::clone(&fields_b[0]),
7155                        Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
7156                    ),
7157                    (
7158                        Arc::clone(&fields_b[1]),
7159                        Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
7160                    ),
7161                ])) as ArrayRef,
7162            ),
7163        ];
7164
7165        let struct_value_with_nulls = vec![
7166            (
7167                Arc::clone(&fields[0]),
7168                Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
7169            ),
7170            (
7171                Arc::clone(&fields[1]),
7172                Arc::new(StructArray::from((
7173                    vec![
7174                        (
7175                            Arc::clone(&fields_b[0]),
7176                            Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
7177                        ),
7178                        (
7179                            Arc::clone(&fields_b[1]),
7180                            Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
7181                        ),
7182                    ],
7183                    Buffer::from(&[0]),
7184                ))) as ArrayRef,
7185            ),
7186        ];
7187
7188        let scalars = vec![
7189            // all null
7190            ScalarValue::Struct(Arc::new(StructArray::from((
7191                struct_value.clone(),
7192                Buffer::from(&[0]),
7193            )))),
7194            // field 1 valid, field 2 null
7195            ScalarValue::Struct(Arc::new(StructArray::from((
7196                struct_value_with_nulls.clone(),
7197                Buffer::from(&[1]),
7198            )))),
7199            // all valid
7200            ScalarValue::Struct(Arc::new(StructArray::from((
7201                struct_value.clone(),
7202                Buffer::from(&[1]),
7203            )))),
7204        ];
7205
7206        let check_array = |array| {
7207            let is_null = is_null(&array).unwrap();
7208            assert_eq!(is_null, BooleanArray::from(vec![true, false, false]));
7209
7210            let formatted = pretty_format_columns("col", &[array]).unwrap().to_string();
7211            let formatted = formatted.split('\n').collect::<Vec<_>>();
7212            let expected = vec![
7213                "+---------------------------+",
7214                "| col                       |",
7215                "+---------------------------+",
7216                "|                           |",
7217                "| {a: 1, b: }               |",
7218                "| {a: 1, b: {ba: 2, bb: 3}} |",
7219                "+---------------------------+",
7220            ];
7221            assert_eq!(
7222                formatted, expected,
7223                "Actual:\n{formatted:#?}\n\nExpected:\n{expected:#?}"
7224            );
7225        };
7226
7227        // test `ScalarValue::iter_to_array`
7228        let array = ScalarValue::iter_to_array(scalars.clone()).unwrap();
7229        check_array(array);
7230
7231        // test `ScalarValue::to_array` / `ScalarValue::to_array_of_size`
7232        let arrays = scalars
7233            .iter()
7234            .map(ScalarValue::to_array)
7235            .collect::<Result<Vec<_>>>()
7236            .expect("Failed to convert to array");
7237        let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
7238        let array = arrow::compute::concat(&arrays).unwrap();
7239        check_array(array);
7240    }
7241
7242    #[test]
7243    fn test_struct_display() {
7244        let field_a = Field::new("a", DataType::Int32, true);
7245        let field_b = Field::new("b", DataType::Utf8, true);
7246
7247        let s = ScalarStructBuilder::new()
7248            .with_scalar(field_a, ScalarValue::from(1i32))
7249            .with_scalar(field_b, ScalarValue::Utf8(None))
7250            .build()
7251            .unwrap();
7252
7253        assert_eq!(s.to_string(), "{a:1,b:}");
7254        assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:})"#);
7255
7256        let ScalarValue::Struct(arr) = s else {
7257            panic!("Expected struct");
7258        };
7259
7260        //verify compared to arrow display
7261        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
7262        assert_snapshot!(batches_to_string(&[batch]), @r"
7263        +-------------+
7264        | s           |
7265        +-------------+
7266        | {a: 1, b: } |
7267        +-------------+
7268        ");
7269    }
7270
7271    #[test]
7272    fn test_null_bug() {
7273        let field_a = Field::new("a", DataType::Int32, true);
7274        let field_b = Field::new("b", DataType::Int32, true);
7275        let fields = Fields::from(vec![field_a, field_b]);
7276
7277        let array_a = Arc::new(Int32Array::from_iter_values([1]));
7278        let array_b = Arc::new(Int32Array::from_iter_values([2]));
7279        let arrays: Vec<ArrayRef> = vec![array_a, array_b];
7280
7281        let mut not_nulls = NullBufferBuilder::new(1);
7282
7283        not_nulls.append_non_null();
7284
7285        let ar = StructArray::new(fields, arrays, not_nulls.finish());
7286        let s = ScalarValue::Struct(Arc::new(ar));
7287
7288        assert_eq!(s.to_string(), "{a:1,b:2}");
7289        assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:2})"#);
7290
7291        let ScalarValue::Struct(arr) = s else {
7292            panic!("Expected struct");
7293        };
7294
7295        //verify compared to arrow display
7296        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
7297        assert_snapshot!(batches_to_string(&[batch]), @r"
7298        +--------------+
7299        | s            |
7300        +--------------+
7301        | {a: 1, b: 2} |
7302        +--------------+
7303        ");
7304    }
7305
7306    #[test]
7307    fn test_display_date64_large_values() {
7308        assert_eq!(
7309            format!("{}", ScalarValue::Date64(Some(790179464505))),
7310            "1995-01-15"
7311        );
7312        // This used to panic, see https://github.com/apache/arrow-rs/issues/7728
7313        assert_eq!(
7314            format!("{}", ScalarValue::Date64(Some(-790179464505600000))),
7315            ""
7316        );
7317    }
7318
7319    #[test]
7320    fn test_struct_display_null() {
7321        let fields = vec![Field::new("a", DataType::Int32, false)];
7322        let s = ScalarStructBuilder::new_null(fields);
7323        assert_eq!(s.to_string(), "NULL");
7324
7325        let ScalarValue::Struct(arr) = s else {
7326            panic!("Expected struct");
7327        };
7328
7329        //verify compared to arrow display
7330        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
7331
7332        assert_snapshot!(batches_to_string(&[batch]), @r"
7333        +---+
7334        | s |
7335        +---+
7336        |   |
7337        +---+
7338        ");
7339    }
7340
7341    #[test]
7342    fn test_map_display_and_debug() {
7343        let string_builder = StringBuilder::new();
7344        let int_builder = Int32Builder::with_capacity(4);
7345        let mut builder = MapBuilder::new(None, string_builder, int_builder);
7346        builder.keys().append_value("joe");
7347        builder.values().append_value(1);
7348        builder.append(true).unwrap();
7349
7350        builder.keys().append_value("blogs");
7351        builder.values().append_value(2);
7352        builder.keys().append_value("foo");
7353        builder.values().append_value(4);
7354        builder.append(true).unwrap();
7355        builder.append(true).unwrap();
7356        builder.append(false).unwrap();
7357
7358        let map_value = ScalarValue::Map(Arc::new(builder.finish()));
7359
7360        assert_eq!(map_value.to_string(), "[{joe:1},{blogs:2,foo:4},{},NULL]");
7361        assert_eq!(
7362            format!("{map_value:?}"),
7363            r#"Map([{"joe":"1"},{"blogs":"2","foo":"4"},{},NULL])"#
7364        );
7365
7366        let ScalarValue::Map(arr) = map_value else {
7367            panic!("Expected map");
7368        };
7369
7370        //verify compared to arrow display
7371        let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap();
7372        assert_snapshot!(batches_to_string(&[batch]), @r"
7373        +--------------------+
7374        | m                  |
7375        +--------------------+
7376        | {joe: 1}           |
7377        | {blogs: 2, foo: 4} |
7378        | {}                 |
7379        |                    |
7380        +--------------------+
7381        ");
7382    }
7383
7384    #[test]
7385    fn test_binary_display() {
7386        let no_binary_value = ScalarValue::Binary(None);
7387        assert_eq!(format!("{no_binary_value}"), "NULL");
7388        let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
7389        assert_eq!(format!("{single_binary_value}"), "2A");
7390        let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
7391        assert_eq!(format!("{small_binary_value}"), "010203");
7392        let large_binary_value =
7393            ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7394        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7395
7396        let no_binary_value = ScalarValue::BinaryView(None);
7397        assert_eq!(format!("{no_binary_value}"), "NULL");
7398        let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
7399        assert_eq!(format!("{small_binary_value}"), "010203");
7400        let large_binary_value =
7401            ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7402        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7403
7404        let no_binary_value = ScalarValue::LargeBinary(None);
7405        assert_eq!(format!("{no_binary_value}"), "NULL");
7406        let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
7407        assert_eq!(format!("{small_binary_value}"), "010203");
7408        let large_binary_value =
7409            ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7410        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7411
7412        let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
7413        assert_eq!(format!("{no_binary_value}"), "NULL");
7414        let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
7415        assert_eq!(format!("{small_binary_value}"), "010203");
7416        let large_binary_value = ScalarValue::FixedSizeBinary(
7417            11,
7418            Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
7419        );
7420        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
7421    }
7422
7423    #[test]
7424    fn test_binary_debug() {
7425        let no_binary_value = ScalarValue::Binary(None);
7426        assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)");
7427        let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
7428        assert_eq!(format!("{single_binary_value:?}"), "Binary(\"42\")");
7429        let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
7430        assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")");
7431        let large_binary_value =
7432            ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7433        assert_eq!(
7434            format!("{large_binary_value:?}"),
7435            "Binary(\"1,2,3,4,5,6,7,8,9,10,11\")"
7436        );
7437
7438        let no_binary_value = ScalarValue::BinaryView(None);
7439        assert_eq!(format!("{no_binary_value:?}"), "BinaryView(NULL)");
7440        let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
7441        assert_eq!(format!("{small_binary_value:?}"), "BinaryView(\"1,2,3\")");
7442        let large_binary_value =
7443            ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7444        assert_eq!(
7445            format!("{large_binary_value:?}"),
7446            "BinaryView(\"1,2,3,4,5,6,7,8,9,10,11\")"
7447        );
7448
7449        let no_binary_value = ScalarValue::LargeBinary(None);
7450        assert_eq!(format!("{no_binary_value:?}"), "LargeBinary(NULL)");
7451        let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
7452        assert_eq!(format!("{small_binary_value:?}"), "LargeBinary(\"1,2,3\")");
7453        let large_binary_value =
7454            ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
7455        assert_eq!(
7456            format!("{large_binary_value:?}"),
7457            "LargeBinary(\"1,2,3,4,5,6,7,8,9,10,11\")"
7458        );
7459
7460        let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
7461        assert_eq!(format!("{no_binary_value:?}"), "FixedSizeBinary(3, NULL)");
7462        let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
7463        assert_eq!(
7464            format!("{small_binary_value:?}"),
7465            "FixedSizeBinary(3, \"1,2,3\")"
7466        );
7467        let large_binary_value = ScalarValue::FixedSizeBinary(
7468            11,
7469            Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
7470        );
7471        assert_eq!(
7472            format!("{large_binary_value:?}"),
7473            "FixedSizeBinary(11, \"1,2,3,4,5,6,7,8,9,10,11\")"
7474        );
7475    }
7476
7477    #[test]
7478    fn test_build_timestamp_millisecond_list() {
7479        let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)];
7480        let arr = ScalarValue::new_list_nullable(
7481            &values,
7482            &DataType::Timestamp(TimeUnit::Millisecond, None),
7483        );
7484        assert_eq!(1, arr.len());
7485    }
7486
7487    #[test]
7488    fn test_newlist_timestamp_zone() {
7489        let s: &'static str = "UTC";
7490        let values = vec![ScalarValue::TimestampMillisecond(Some(1), Some(s.into()))];
7491        let arr = ScalarValue::new_list_nullable(
7492            &values,
7493            &DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
7494        );
7495        assert_eq!(1, arr.len());
7496        assert_eq!(
7497            arr.data_type(),
7498            &DataType::List(Arc::new(Field::new_list_field(
7499                DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
7500                true,
7501            )))
7502        );
7503    }
7504
7505    fn get_random_timestamps(sample_size: u64) -> Vec<ScalarValue> {
7506        let vector_size = sample_size;
7507        let mut timestamp = vec![];
7508        let mut rng = rand::rng();
7509        for i in 0..vector_size {
7510            let year = rng.random_range(1995..=2050);
7511            let month = rng.random_range(1..=12);
7512            let day = rng.random_range(1..=28); // to exclude invalid dates
7513            let hour = rng.random_range(0..=23);
7514            let minute = rng.random_range(0..=59);
7515            let second = rng.random_range(0..=59);
7516            if i % 4 == 0 {
7517                timestamp.push(ScalarValue::TimestampSecond(
7518                    Some(
7519                        NaiveDate::from_ymd_opt(year, month, day)
7520                            .unwrap()
7521                            .and_hms_opt(hour, minute, second)
7522                            .unwrap()
7523                            .and_utc()
7524                            .timestamp(),
7525                    ),
7526                    None,
7527                ))
7528            } else if i % 4 == 1 {
7529                let millisec = rng.random_range(0..=999);
7530                timestamp.push(ScalarValue::TimestampMillisecond(
7531                    Some(
7532                        NaiveDate::from_ymd_opt(year, month, day)
7533                            .unwrap()
7534                            .and_hms_milli_opt(hour, minute, second, millisec)
7535                            .unwrap()
7536                            .and_utc()
7537                            .timestamp_millis(),
7538                    ),
7539                    None,
7540                ))
7541            } else if i % 4 == 2 {
7542                let microsec = rng.random_range(0..=999_999);
7543                timestamp.push(ScalarValue::TimestampMicrosecond(
7544                    Some(
7545                        NaiveDate::from_ymd_opt(year, month, day)
7546                            .unwrap()
7547                            .and_hms_micro_opt(hour, minute, second, microsec)
7548                            .unwrap()
7549                            .and_utc()
7550                            .timestamp_micros(),
7551                    ),
7552                    None,
7553                ))
7554            } else if i % 4 == 3 {
7555                let nanosec = rng.random_range(0..=999_999_999);
7556                timestamp.push(ScalarValue::TimestampNanosecond(
7557                    Some(
7558                        NaiveDate::from_ymd_opt(year, month, day)
7559                            .unwrap()
7560                            .and_hms_nano_opt(hour, minute, second, nanosec)
7561                            .unwrap()
7562                            .and_utc()
7563                            .timestamp_nanos_opt()
7564                            .unwrap(),
7565                    ),
7566                    None,
7567                ))
7568            }
7569        }
7570        timestamp
7571    }
7572
7573    fn get_random_intervals(sample_size: u64) -> Vec<ScalarValue> {
7574        const MILLISECS_IN_ONE_DAY: i64 = 86_400_000;
7575        const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000;
7576
7577        let vector_size = sample_size;
7578        let mut intervals = vec![];
7579        let mut rng = rand::rng();
7580        const SECS_IN_ONE_DAY: i32 = 86_400;
7581        const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000;
7582        for i in 0..vector_size {
7583            if i % 4 == 0 {
7584                let days = rng.random_range(0..5000);
7585                // to not break second precision
7586                let millis = rng.random_range(0..SECS_IN_ONE_DAY) * 1000;
7587                intervals.push(ScalarValue::new_interval_dt(days, millis));
7588            } else if i % 4 == 1 {
7589                let days = rng.random_range(0..5000);
7590                let millisec = rng.random_range(0..(MILLISECS_IN_ONE_DAY as i32));
7591                intervals.push(ScalarValue::new_interval_dt(days, millisec));
7592            } else if i % 4 == 2 {
7593                let days = rng.random_range(0..5000);
7594                // to not break microsec precision
7595                let nanosec = rng.random_range(0..MICROSECS_IN_ONE_DAY) * 1000;
7596                intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
7597            } else {
7598                let days = rng.random_range(0..5000);
7599                let nanosec = rng.random_range(0..NANOSECS_IN_ONE_DAY);
7600                intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
7601            }
7602        }
7603        intervals
7604    }
7605
7606    fn union_fields() -> UnionFields {
7607        [
7608            (0, Arc::new(Field::new("A", DataType::Int32, true))),
7609            (1, Arc::new(Field::new("B", DataType::Float64, true))),
7610        ]
7611        .into_iter()
7612        .collect()
7613    }
7614
7615    #[test]
7616    fn sparse_scalar_union_is_null() {
7617        let sparse_scalar = ScalarValue::Union(
7618            Some((0_i8, Box::new(ScalarValue::Int32(None)))),
7619            union_fields(),
7620            UnionMode::Sparse,
7621        );
7622        assert!(sparse_scalar.is_null());
7623    }
7624
7625    #[test]
7626    fn dense_scalar_union_is_null() {
7627        let dense_scalar = ScalarValue::Union(
7628            Some((0_i8, Box::new(ScalarValue::Int32(None)))),
7629            union_fields(),
7630            UnionMode::Dense,
7631        );
7632        assert!(dense_scalar.is_null());
7633    }
7634
7635    #[test]
7636    fn null_dictionary_scalar_produces_null_dictionary_array() {
7637        let dictionary_scalar = ScalarValue::Dictionary(
7638            Box::new(DataType::Int32),
7639            Box::new(ScalarValue::Null),
7640        );
7641        assert!(dictionary_scalar.is_null());
7642        let dictionary_array = dictionary_scalar.to_array().unwrap();
7643        assert!(dictionary_array.is_null(0));
7644    }
7645
7646    #[test]
7647    fn test_scalar_value_try_new_null() {
7648        let scalars = vec![
7649            ScalarValue::try_new_null(&DataType::Boolean).unwrap(),
7650            ScalarValue::try_new_null(&DataType::Int8).unwrap(),
7651            ScalarValue::try_new_null(&DataType::Int16).unwrap(),
7652            ScalarValue::try_new_null(&DataType::Int32).unwrap(),
7653            ScalarValue::try_new_null(&DataType::Int64).unwrap(),
7654            ScalarValue::try_new_null(&DataType::UInt8).unwrap(),
7655            ScalarValue::try_new_null(&DataType::UInt16).unwrap(),
7656            ScalarValue::try_new_null(&DataType::UInt32).unwrap(),
7657            ScalarValue::try_new_null(&DataType::UInt64).unwrap(),
7658            ScalarValue::try_new_null(&DataType::Float16).unwrap(),
7659            ScalarValue::try_new_null(&DataType::Float32).unwrap(),
7660            ScalarValue::try_new_null(&DataType::Float64).unwrap(),
7661            ScalarValue::try_new_null(&DataType::Decimal128(42, 42)).unwrap(),
7662            ScalarValue::try_new_null(&DataType::Decimal256(42, 42)).unwrap(),
7663            ScalarValue::try_new_null(&DataType::Utf8).unwrap(),
7664            ScalarValue::try_new_null(&DataType::LargeUtf8).unwrap(),
7665            ScalarValue::try_new_null(&DataType::Utf8View).unwrap(),
7666            ScalarValue::try_new_null(&DataType::Binary).unwrap(),
7667            ScalarValue::try_new_null(&DataType::BinaryView).unwrap(),
7668            ScalarValue::try_new_null(&DataType::FixedSizeBinary(42)).unwrap(),
7669            ScalarValue::try_new_null(&DataType::LargeBinary).unwrap(),
7670            ScalarValue::try_new_null(&DataType::Date32).unwrap(),
7671            ScalarValue::try_new_null(&DataType::Date64).unwrap(),
7672            ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Second)).unwrap(),
7673            ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Millisecond)).unwrap(),
7674            ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Microsecond)).unwrap(),
7675            ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Nanosecond)).unwrap(),
7676            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Second, None))
7677                .unwrap(),
7678            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Millisecond, None))
7679                .unwrap(),
7680            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Microsecond, None))
7681                .unwrap(),
7682            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Nanosecond, None))
7683                .unwrap(),
7684            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::YearMonth))
7685                .unwrap(),
7686            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::DayTime))
7687                .unwrap(),
7688            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::MonthDayNano))
7689                .unwrap(),
7690            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Second)).unwrap(),
7691            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Microsecond))
7692                .unwrap(),
7693            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Nanosecond)).unwrap(),
7694            ScalarValue::try_new_null(&DataType::Null).unwrap(),
7695        ];
7696        assert!(scalars.iter().all(|s| s.is_null()));
7697
7698        let field_ref = Arc::new(Field::new("foo", DataType::Int32, true));
7699        let map_field_ref = Arc::new(Field::new(
7700            "foo",
7701            DataType::Struct(Fields::from(vec![
7702                Field::new("bar", DataType::Utf8, true),
7703                Field::new("baz", DataType::Int32, true),
7704            ])),
7705            true,
7706        ));
7707        let scalars = vec![
7708            ScalarValue::try_new_null(&DataType::List(Arc::clone(&field_ref))).unwrap(),
7709            ScalarValue::try_new_null(&DataType::LargeList(Arc::clone(&field_ref)))
7710                .unwrap(),
7711            ScalarValue::try_new_null(&DataType::FixedSizeList(
7712                Arc::clone(&field_ref),
7713                42,
7714            ))
7715            .unwrap(),
7716            ScalarValue::try_new_null(&DataType::Struct(
7717                vec![Arc::clone(&field_ref)].into(),
7718            ))
7719            .unwrap(),
7720            ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(),
7721            ScalarValue::try_new_null(&DataType::Union(
7722                UnionFields::new(vec![42], vec![field_ref]),
7723                UnionMode::Dense,
7724            ))
7725            .unwrap(),
7726        ];
7727        assert!(scalars.iter().all(|s| s.is_null()));
7728    }
7729
7730    // `err.to_string()` depends on backtrace being present (may have backtrace appended)
7731    // `err.strip_backtrace()` also depends on backtrace being present (may have "This was likely caused by ..." stripped)
7732    fn assert_starts_with(actual: impl AsRef<str>, expected_prefix: impl AsRef<str>) {
7733        let actual = actual.as_ref();
7734        let expected_prefix = expected_prefix.as_ref();
7735        assert!(
7736            actual.starts_with(expected_prefix),
7737            "Expected '{actual}' to start with '{expected_prefix}'"
7738        );
7739    }
7740}