Skip to main content

datafusion_common/scalar/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarValue`]: stores single  values
19
20mod cache;
21mod consts;
22mod struct_builder;
23
24use std::borrow::Borrow;
25use std::cmp::Ordering;
26use std::collections::{HashSet, VecDeque};
27use std::convert::Infallible;
28use std::fmt;
29use std::hash::Hash;
30use std::hash::Hasher;
31use std::iter::repeat_n;
32use std::mem::{size_of, size_of_val};
33use std::str::FromStr;
34use std::sync::Arc;
35
36use crate::assert_or_internal_err;
37use crate::cast::{
38    as_binary_array, as_binary_view_array, as_boolean_array, as_date32_array,
39    as_date64_array, as_decimal32_array, as_decimal64_array, as_decimal128_array,
40    as_decimal256_array, as_dictionary_array, as_duration_microsecond_array,
41    as_duration_millisecond_array, as_duration_nanosecond_array,
42    as_duration_second_array, as_fixed_size_binary_array, as_fixed_size_list_array,
43    as_float16_array, as_float32_array, as_float64_array, as_int8_array, as_int16_array,
44    as_int32_array, as_int64_array, as_interval_dt_array, as_interval_mdn_array,
45    as_interval_ym_array, as_large_binary_array, as_large_list_array,
46    as_large_string_array, as_run_array, as_string_array, as_string_view_array,
47    as_time32_millisecond_array, as_time32_second_array, as_time64_microsecond_array,
48    as_time64_nanosecond_array, as_timestamp_microsecond_array,
49    as_timestamp_millisecond_array, as_timestamp_nanosecond_array,
50    as_timestamp_second_array, as_uint8_array, as_uint16_array, as_uint32_array,
51    as_uint64_array, as_union_array,
52};
53use crate::error::{_exec_err, _internal_err, _not_impl_err, DataFusionError, Result};
54use crate::format::DEFAULT_CAST_OPTIONS;
55use crate::hash_utils::create_hashes;
56use crate::utils::SingleRowListArrayBuilder;
57use crate::{_internal_datafusion_err, arrow_datafusion_err};
58use arrow::array::{
59    Array, ArrayData, ArrayDataBuilder, ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType,
60    AsArray, BinaryArray, BinaryViewArray, BinaryViewBuilder, BooleanArray, Date32Array,
61    Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
62    DictionaryArray, DurationMicrosecondArray, DurationMillisecondArray,
63    DurationNanosecondArray, DurationSecondArray, FixedSizeBinaryArray,
64    FixedSizeListArray, Float16Array, Float32Array, Float64Array, GenericListArray,
65    Int8Array, Int16Array, Int32Array, Int64Array, IntervalDayTimeArray,
66    IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeListArray,
67    LargeStringArray, ListArray, MapArray, MutableArrayData, OffsetSizeTrait,
68    PrimitiveArray, RunArray, Scalar, StringArray, StringViewArray, StringViewBuilder,
69    StructArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
70    Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
71    TimestampNanosecondArray, TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array,
72    UInt64Array, UnionArray, downcast_run_array, new_empty_array, new_null_array,
73};
74use arrow::buffer::{BooleanBuffer, ScalarBuffer};
75use arrow::compute::kernels::cast::{CastOptions, cast_with_options};
76use arrow::compute::kernels::numeric::{
77    add, add_wrapping, div, mul, mul_wrapping, rem, sub, sub_wrapping,
78};
79use arrow::datatypes::{
80    ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType, Date32Type,
81    Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, DecimalType, Field,
82    FieldRef, Float32Type, Int8Type, Int16Type, Int32Type, Int64Type, IntervalDayTime,
83    IntervalDayTimeType, IntervalMonthDayNano, IntervalMonthDayNanoType, IntervalUnit,
84    IntervalYearMonthType, RunEndIndexType, TimeUnit, TimestampMicrosecondType,
85    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt8Type,
86    UInt16Type, UInt32Type, UInt64Type, UnionFields, UnionMode, i256,
87    validate_decimal_precision_and_scale,
88};
89use arrow::util::display::{ArrayFormatter, FormatOptions, array_value_to_string};
90use cache::{get_or_create_cached_key_array, get_or_create_cached_null_array};
91use chrono::{Duration, NaiveDate};
92use half::f16;
93pub use struct_builder::ScalarStructBuilder;
94
95const SECONDS_PER_DAY: i64 = 86_400;
96const MILLIS_PER_DAY: i64 = SECONDS_PER_DAY * 1_000;
97const MICROS_PER_DAY: i64 = MILLIS_PER_DAY * 1_000;
98const NANOS_PER_DAY: i64 = MICROS_PER_DAY * 1_000;
99const MICROS_PER_MILLISECOND: i64 = 1_000;
100const NANOS_PER_MILLISECOND: i64 = 1_000_000;
101
102/// Returns the multiplier that converts the input date representation into the
103/// desired timestamp unit, if the conversion requires a multiplication that can
104/// overflow an `i64`.
105pub fn date_to_timestamp_multiplier(
106    source_type: &DataType,
107    target_type: &DataType,
108) -> Option<i64> {
109    let DataType::Timestamp(target_unit, _) = target_type else {
110        return None;
111    };
112
113    // Only `Timestamp` target types have a time unit; otherwise no
114    // multiplier applies (handled above). The function returns `Some(m)`
115    // when converting the `source_type` to `target_type` requires a
116    // multiplication that could overflow `i64`. It returns `None` when
117    // the conversion is a division or otherwise doesn't require a
118    // multiplication (e.g. Date64 -> Second).
119    match source_type {
120        // Date32 stores days since epoch. Converting to any timestamp
121        // unit requires multiplying by the per-day factor (seconds,
122        // milliseconds, microseconds, nanoseconds).
123        DataType::Date32 => Some(match target_unit {
124            TimeUnit::Second => SECONDS_PER_DAY,
125            TimeUnit::Millisecond => MILLIS_PER_DAY,
126            TimeUnit::Microsecond => MICROS_PER_DAY,
127            TimeUnit::Nanosecond => NANOS_PER_DAY,
128        }),
129
130        // Date64 stores milliseconds since epoch. Converting to
131        // seconds is a division (no multiplication), so return `None`.
132        // Converting to milliseconds is 1:1 (multiplier 1). Converting
133        // to micro/nano requires multiplying by 1_000 / 1_000_000.
134        DataType::Date64 => match target_unit {
135            TimeUnit::Second => None,
136            // Converting Date64 (ms since epoch) to millisecond timestamps
137            // is an identity conversion and does not require multiplication.
138            // Returning `None` indicates no multiplication-based overflow
139            // check is necessary.
140            TimeUnit::Millisecond => None,
141            TimeUnit::Microsecond => Some(MICROS_PER_MILLISECOND),
142            TimeUnit::Nanosecond => Some(NANOS_PER_MILLISECOND),
143        },
144
145        _ => None,
146    }
147}
148
149/// Ensures the provided value can be represented as a timestamp with the given
150/// multiplier. Returns an [`DataFusionError::Execution`] when the converted
151/// value would overflow the timestamp range.
152pub fn ensure_timestamp_in_bounds(
153    value: i64,
154    multiplier: i64,
155    source_type: &DataType,
156    target_type: &DataType,
157) -> Result<()> {
158    if multiplier <= 1 {
159        return Ok(());
160    }
161
162    if value.checked_mul(multiplier).is_none() {
163        let target = format_timestamp_type_for_error(target_type);
164        _exec_err!(
165            "Cannot cast {} value {} to {}: converted value exceeds the representable i64 range",
166            source_type,
167            value,
168            target
169        )
170    } else {
171        Ok(())
172    }
173}
174
175/// Format a `DataType::Timestamp` into a short, stable string used in
176/// user-facing error messages.
177pub(crate) fn format_timestamp_type_for_error(target_type: &DataType) -> String {
178    match target_type {
179        DataType::Timestamp(unit, _) => {
180            let s = match unit {
181                TimeUnit::Second => "s",
182                TimeUnit::Millisecond => "ms",
183                TimeUnit::Microsecond => "us",
184                TimeUnit::Nanosecond => "ns",
185            };
186            format!("Timestamp({s})")
187        }
188        other => format!("{other}"),
189    }
190}
191
192/// A dynamically typed, nullable single value.
193///
194/// While an arrow  [`Array`]) stores one or more values of the same type, in a
195/// single column, a `ScalarValue` stores a single value of a single type, the
196/// equivalent of 1 row and one column.
197///
198/// ```text
199///  ┌────────┐
200///  │ value1 │
201///  │ value2 │                  ┌────────┐
202///  │ value3 │                  │ value2 │
203///  │  ...   │                  └────────┘
204///  │ valueN │
205///  └────────┘
206///
207///    Array                     ScalarValue
208///
209/// stores multiple,             stores a single,
210/// possibly null, values of     possible null, value
211/// the same type
212/// ```
213///
214/// # Performance
215///
216/// In general, performance will be better using arrow [`Array`]s rather than
217/// [`ScalarValue`], as it is far more efficient to process multiple values at
218/// once (vectorized processing).
219///
220/// # Example
221/// ```
222/// # use datafusion_common::ScalarValue;
223/// // Create single scalar value for an Int32 value
224/// let s1 = ScalarValue::Int32(Some(10));
225///
226/// // You can also create values using the From impl:
227/// let s2 = ScalarValue::from(10i32);
228/// assert_eq!(s1, s2);
229/// ```
230///
231/// # Null Handling
232///
233/// `ScalarValue` represents null values in the same way as Arrow. Nulls are
234/// "typed" in the sense that a null value in an [`Int32Array`] is different
235/// from a null value in a [`Float64Array`], and is different from the values in
236/// a [`NullArray`].
237///
238/// ```
239/// # fn main() -> datafusion_common::Result<()> {
240/// # use std::collections::hash_set::Difference;
241/// # use datafusion_common::ScalarValue;
242/// # use arrow::datatypes::DataType;
243/// // You can create a 'null' Int32 value directly:
244/// let s1 = ScalarValue::Int32(None);
245///
246/// // You can also create a null value for a given datatype:
247/// let s2 = ScalarValue::try_from(&DataType::Int32)?;
248/// assert_eq!(s1, s2);
249///
250/// // Note that this is DIFFERENT than a `ScalarValue::Null`
251/// let s3 = ScalarValue::Null;
252/// assert_ne!(s1, s3);
253/// # Ok(())
254/// # }
255/// ```
256///
257/// # Nested Types
258///
259/// `List` / `LargeList` / `FixedSizeList` / `Struct` / `Map` are represented as a
260/// single element array of the corresponding type.
261///
262/// ## Example: Creating [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
263/// ```
264/// # use std::sync::Arc;
265/// # use arrow::datatypes::{DataType, Field};
266/// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder};
267/// // Build a struct like: {a: 1, b: "foo"}
268/// let field_a = Field::new("a", DataType::Int32, false);
269/// let field_b = Field::new("b", DataType::Utf8, false);
270///
271/// let s1 = ScalarStructBuilder::new()
272///     .with_scalar(field_a, ScalarValue::from(1i32))
273///     .with_scalar(field_b, ScalarValue::from("foo"))
274///     .build();
275/// ```
276///
277/// ## Example: Creating a null [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
278/// ```
279/// # use std::sync::Arc;
280/// # use arrow::datatypes::{DataType, Field};
281/// # use datafusion_common::{ScalarValue, scalar::ScalarStructBuilder};
282/// // Build a struct representing a NULL value
283/// let fields = vec![
284///     Field::new("a", DataType::Int32, false),
285///     Field::new("b", DataType::Utf8, false),
286/// ];
287///
288/// let s1 = ScalarStructBuilder::new_null(fields);
289/// ```
290///
291/// ## Example: Creating [`ScalarValue::Struct`] directly
292/// ```
293/// # use std::sync::Arc;
294/// # use arrow::datatypes::{DataType, Field, Fields};
295/// # use arrow::array::{ArrayRef, Int32Array, StructArray, StringArray};
296/// # use datafusion_common::ScalarValue;
297/// // Build a struct like: {a: 1, b: "foo"}
298/// // Field description
299/// let fields = Fields::from(vec![
300///     Field::new("a", DataType::Int32, false),
301///     Field::new("b", DataType::Utf8, false),
302/// ]);
303/// // one row arrays for each field
304/// let arrays: Vec<ArrayRef> = vec![
305///     Arc::new(Int32Array::from(vec![1])),
306///     Arc::new(StringArray::from(vec!["foo"])),
307/// ];
308/// // no nulls for this array
309/// let nulls = None;
310/// let arr = StructArray::new(fields, arrays, nulls);
311///
312/// // Create a ScalarValue::Struct directly
313/// let s1 = ScalarValue::Struct(Arc::new(arr));
314/// ```
315///
316///
317/// # Further Reading
318/// See [datatypes](https://arrow.apache.org/docs/python/api/datatypes.html) for
319/// details on datatypes and the [format](https://github.com/apache/arrow/blob/master/format/Schema.fbs#L354-L375)
320/// for the definitive reference.
321///
322/// [`NullArray`]: arrow::array::NullArray
323#[derive(Clone)]
324pub enum ScalarValue {
325    /// represents `DataType::Null` (castable to/from any other type)
326    Null,
327    /// true or false value
328    Boolean(Option<bool>),
329    /// 16bit float
330    Float16(Option<f16>),
331    /// 32bit float
332    Float32(Option<f32>),
333    /// 64bit float
334    Float64(Option<f64>),
335    /// 32bit decimal, using the i32 to represent the decimal, precision scale
336    Decimal32(Option<i32>, u8, i8),
337    /// 64bit decimal, using the i64 to represent the decimal, precision scale
338    Decimal64(Option<i64>, u8, i8),
339    /// 128bit decimal, using the i128 to represent the decimal, precision scale
340    Decimal128(Option<i128>, u8, i8),
341    /// 256bit decimal, using the i256 to represent the decimal, precision scale
342    Decimal256(Option<i256>, u8, i8),
343    /// signed 8bit int
344    Int8(Option<i8>),
345    /// signed 16bit int
346    Int16(Option<i16>),
347    /// signed 32bit int
348    Int32(Option<i32>),
349    /// signed 64bit int
350    Int64(Option<i64>),
351    /// unsigned 8bit int
352    UInt8(Option<u8>),
353    /// unsigned 16bit int
354    UInt16(Option<u16>),
355    /// unsigned 32bit int
356    UInt32(Option<u32>),
357    /// unsigned 64bit int
358    UInt64(Option<u64>),
359    /// utf-8 encoded string.
360    Utf8(Option<String>),
361    /// utf-8 encoded string but from view types.
362    Utf8View(Option<String>),
363    /// utf-8 encoded string representing a LargeString's arrow type.
364    LargeUtf8(Option<String>),
365    /// binary
366    Binary(Option<Vec<u8>>),
367    /// binary but from view types.
368    BinaryView(Option<Vec<u8>>),
369    /// fixed size binary
370    FixedSizeBinary(i32, Option<Vec<u8>>),
371    /// large binary
372    LargeBinary(Option<Vec<u8>>),
373    /// Fixed size list scalar.
374    ///
375    /// The array must be a FixedSizeListArray with length 1.
376    FixedSizeList(Arc<FixedSizeListArray>),
377    /// Represents a single element of a [`ListArray`] as an [`ArrayRef`]
378    ///
379    /// The array must be a ListArray with length 1.
380    List(Arc<ListArray>),
381    /// The array must be a LargeListArray with length 1.
382    LargeList(Arc<LargeListArray>),
383    /// Represents a single element [`StructArray`] as an [`ArrayRef`]. See
384    /// [`ScalarValue`] for examples of how to create instances of this type.
385    Struct(Arc<StructArray>),
386    /// Represents a single element [`MapArray`] as an [`ArrayRef`].
387    Map(Arc<MapArray>),
388    /// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01
389    Date32(Option<i32>),
390    /// Date stored as a signed 64bit int milliseconds since UNIX epoch 1970-01-01
391    Date64(Option<i64>),
392    /// Time stored as a signed 32bit int as seconds since midnight
393    Time32Second(Option<i32>),
394    /// Time stored as a signed 32bit int as milliseconds since midnight
395    Time32Millisecond(Option<i32>),
396    /// Time stored as a signed 64bit int as microseconds since midnight
397    Time64Microsecond(Option<i64>),
398    /// Time stored as a signed 64bit int as nanoseconds since midnight
399    Time64Nanosecond(Option<i64>),
400    /// Timestamp Second
401    TimestampSecond(Option<i64>, Option<Arc<str>>),
402    /// Timestamp Milliseconds
403    TimestampMillisecond(Option<i64>, Option<Arc<str>>),
404    /// Timestamp Microseconds
405    TimestampMicrosecond(Option<i64>, Option<Arc<str>>),
406    /// Timestamp Nanoseconds
407    TimestampNanosecond(Option<i64>, Option<Arc<str>>),
408    /// Number of elapsed whole months
409    IntervalYearMonth(Option<i32>),
410    /// Number of elapsed days and milliseconds (no leap seconds)
411    /// stored as 2 contiguous 32-bit signed integers
412    IntervalDayTime(Option<IntervalDayTime>),
413    /// A triple of the number of elapsed months, days, and nanoseconds.
414    /// Months and days are encoded as 32-bit signed integers.
415    /// Nanoseconds is encoded as a 64-bit signed integer (no leap seconds).
416    IntervalMonthDayNano(Option<IntervalMonthDayNano>),
417    /// Duration in seconds
418    DurationSecond(Option<i64>),
419    /// Duration in milliseconds
420    DurationMillisecond(Option<i64>),
421    /// Duration in microseconds
422    DurationMicrosecond(Option<i64>),
423    /// Duration in nanoseconds
424    DurationNanosecond(Option<i64>),
425    /// A nested datatype that can represent slots of differing types. Components:
426    /// `.0`: a tuple of union `type_id` and the single value held by this Scalar
427    /// `.1`: the list of fields, zero-to-one of which will by set in `.0`
428    /// `.2`: the physical storage of the source/destination UnionArray from which this Scalar came
429    Union(Option<(i8, Box<ScalarValue>)>, UnionFields, UnionMode),
430    /// Dictionary type: index type and value
431    Dictionary(Box<DataType>, Box<ScalarValue>),
432    /// (run-ends field, value field, value)
433    RunEndEncoded(FieldRef, FieldRef, Box<ScalarValue>),
434}
435
436impl Hash for Fl<f16> {
437    fn hash<H: Hasher>(&self, state: &mut H) {
438        self.0.to_bits().hash(state);
439    }
440}
441
442// manual implementation of `PartialEq`
443impl PartialEq for ScalarValue {
444    fn eq(&self, other: &Self) -> bool {
445        use ScalarValue::*;
446        // This purposely doesn't have a catch-all "(_, _)" so that
447        // any newly added enum variant will require editing this list
448        // or else face a compile error
449        match (self, other) {
450            (Decimal32(v1, p1, s1), Decimal32(v2, p2, s2)) => {
451                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
452            }
453            (Decimal32(_, _, _), _) => false,
454            (Decimal64(v1, p1, s1), Decimal64(v2, p2, s2)) => {
455                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
456            }
457            (Decimal64(_, _, _), _) => false,
458            (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
459                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
460            }
461            (Decimal128(_, _, _), _) => false,
462            (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
463                v1.eq(v2) && p1.eq(p2) && s1.eq(s2)
464            }
465            (Decimal256(_, _, _), _) => false,
466            (Boolean(v1), Boolean(v2)) => v1.eq(v2),
467            (Boolean(_), _) => false,
468            (Float32(v1), Float32(v2)) => match (v1, v2) {
469                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
470                _ => v1.eq(v2),
471            },
472            (Float16(v1), Float16(v2)) => match (v1, v2) {
473                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
474                _ => v1.eq(v2),
475            },
476            (Float32(_), _) => false,
477            (Float16(_), _) => false,
478            (Float64(v1), Float64(v2)) => match (v1, v2) {
479                (Some(f1), Some(f2)) => f1.to_bits() == f2.to_bits(),
480                _ => v1.eq(v2),
481            },
482            (Float64(_), _) => false,
483            (Int8(v1), Int8(v2)) => v1.eq(v2),
484            (Int8(_), _) => false,
485            (Int16(v1), Int16(v2)) => v1.eq(v2),
486            (Int16(_), _) => false,
487            (Int32(v1), Int32(v2)) => v1.eq(v2),
488            (Int32(_), _) => false,
489            (Int64(v1), Int64(v2)) => v1.eq(v2),
490            (Int64(_), _) => false,
491            (UInt8(v1), UInt8(v2)) => v1.eq(v2),
492            (UInt8(_), _) => false,
493            (UInt16(v1), UInt16(v2)) => v1.eq(v2),
494            (UInt16(_), _) => false,
495            (UInt32(v1), UInt32(v2)) => v1.eq(v2),
496            (UInt32(_), _) => false,
497            (UInt64(v1), UInt64(v2)) => v1.eq(v2),
498            (UInt64(_), _) => false,
499            (Utf8(v1), Utf8(v2)) => v1.eq(v2),
500            (Utf8(_), _) => false,
501            (Utf8View(v1), Utf8View(v2)) => v1.eq(v2),
502            (Utf8View(_), _) => false,
503            (LargeUtf8(v1), LargeUtf8(v2)) => v1.eq(v2),
504            (LargeUtf8(_), _) => false,
505            (Binary(v1), Binary(v2)) => v1.eq(v2),
506            (Binary(_), _) => false,
507            (BinaryView(v1), BinaryView(v2)) => v1.eq(v2),
508            (BinaryView(_), _) => false,
509            (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.eq(v2),
510            (FixedSizeBinary(_, _), _) => false,
511            (LargeBinary(v1), LargeBinary(v2)) => v1.eq(v2),
512            (LargeBinary(_), _) => false,
513            (FixedSizeList(v1), FixedSizeList(v2)) => v1.eq(v2),
514            (FixedSizeList(_), _) => false,
515            (List(v1), List(v2)) => v1.eq(v2),
516            (List(_), _) => false,
517            (LargeList(v1), LargeList(v2)) => v1.eq(v2),
518            (LargeList(_), _) => false,
519            (Struct(v1), Struct(v2)) => v1.eq(v2),
520            (Struct(_), _) => false,
521            (Map(v1), Map(v2)) => v1.eq(v2),
522            (Map(_), _) => false,
523            (Date32(v1), Date32(v2)) => v1.eq(v2),
524            (Date32(_), _) => false,
525            (Date64(v1), Date64(v2)) => v1.eq(v2),
526            (Date64(_), _) => false,
527            (Time32Second(v1), Time32Second(v2)) => v1.eq(v2),
528            (Time32Second(_), _) => false,
529            (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.eq(v2),
530            (Time32Millisecond(_), _) => false,
531            (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.eq(v2),
532            (Time64Microsecond(_), _) => false,
533            (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.eq(v2),
534            (Time64Nanosecond(_), _) => false,
535            (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2),
536            (TimestampSecond(_, _), _) => false,
537            (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2),
538            (TimestampMillisecond(_, _), _) => false,
539            (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => v1.eq(v2),
540            (TimestampMicrosecond(_, _), _) => false,
541            (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2),
542            (TimestampNanosecond(_, _), _) => false,
543            (DurationSecond(v1), DurationSecond(v2)) => v1.eq(v2),
544            (DurationSecond(_), _) => false,
545            (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.eq(v2),
546            (DurationMillisecond(_), _) => false,
547            (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.eq(v2),
548            (DurationMicrosecond(_), _) => false,
549            (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.eq(v2),
550            (DurationNanosecond(_), _) => false,
551            (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2),
552            (IntervalYearMonth(_), _) => false,
553            (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2),
554            (IntervalDayTime(_), _) => false,
555            (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2),
556            (IntervalMonthDayNano(_), _) => false,
557            (Union(val1, fields1, mode1), Union(val2, fields2, mode2)) => {
558                val1.eq(val2) && fields1.eq(fields2) && mode1.eq(mode2)
559            }
560            (Union(_, _, _), _) => false,
561            (Dictionary(k1, v1), Dictionary(k2, v2)) => k1.eq(k2) && v1.eq(v2),
562            (Dictionary(_, _), _) => false,
563            (RunEndEncoded(rf1, vf1, v1), RunEndEncoded(rf2, vf2, v2)) => {
564                rf1.eq(rf2) && vf1.eq(vf2) && v1.eq(v2)
565            }
566            (RunEndEncoded(_, _, _), _) => false,
567            (Null, Null) => true,
568            (Null, _) => false,
569        }
570    }
571}
572
573// manual implementation of `PartialOrd`
574impl PartialOrd for ScalarValue {
575    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
576        use ScalarValue::*;
577        // This purposely doesn't have a catch-all "(_, _)" so that
578        // any newly added enum variant will require editing this list
579        // or else face a compile error
580        match (self, other) {
581            (Decimal32(v1, p1, s1), Decimal32(v2, p2, s2)) => {
582                if p1.eq(p2) && s1.eq(s2) {
583                    v1.partial_cmp(v2)
584                } else {
585                    // Two decimal values can be compared if they have the same precision and scale.
586                    None
587                }
588            }
589            (Decimal32(_, _, _), _) => None,
590            (Decimal64(v1, p1, s1), Decimal64(v2, p2, s2)) => {
591                if p1.eq(p2) && s1.eq(s2) {
592                    v1.partial_cmp(v2)
593                } else {
594                    // Two decimal values can be compared if they have the same precision and scale.
595                    None
596                }
597            }
598            (Decimal64(_, _, _), _) => None,
599            (Decimal128(v1, p1, s1), Decimal128(v2, p2, s2)) => {
600                if p1.eq(p2) && s1.eq(s2) {
601                    v1.partial_cmp(v2)
602                } else {
603                    // Two decimal values can be compared if they have the same precision and scale.
604                    None
605                }
606            }
607            (Decimal128(_, _, _), _) => None,
608            (Decimal256(v1, p1, s1), Decimal256(v2, p2, s2)) => {
609                if p1.eq(p2) && s1.eq(s2) {
610                    v1.partial_cmp(v2)
611                } else {
612                    // Two decimal values can be compared if they have the same precision and scale.
613                    None
614                }
615            }
616            (Decimal256(_, _, _), _) => None,
617            (Boolean(v1), Boolean(v2)) => v1.partial_cmp(v2),
618            (Boolean(_), _) => None,
619            (Float32(v1), Float32(v2)) => match (v1, v2) {
620                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
621                _ => v1.partial_cmp(v2),
622            },
623            (Float16(v1), Float16(v2)) => match (v1, v2) {
624                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
625                _ => v1.partial_cmp(v2),
626            },
627            (Float32(_), _) => None,
628            (Float16(_), _) => None,
629            (Float64(v1), Float64(v2)) => match (v1, v2) {
630                (Some(f1), Some(f2)) => Some(f1.total_cmp(f2)),
631                _ => v1.partial_cmp(v2),
632            },
633            (Float64(_), _) => None,
634            (Int8(v1), Int8(v2)) => v1.partial_cmp(v2),
635            (Int8(_), _) => None,
636            (Int16(v1), Int16(v2)) => v1.partial_cmp(v2),
637            (Int16(_), _) => None,
638            (Int32(v1), Int32(v2)) => v1.partial_cmp(v2),
639            (Int32(_), _) => None,
640            (Int64(v1), Int64(v2)) => v1.partial_cmp(v2),
641            (Int64(_), _) => None,
642            (UInt8(v1), UInt8(v2)) => v1.partial_cmp(v2),
643            (UInt8(_), _) => None,
644            (UInt16(v1), UInt16(v2)) => v1.partial_cmp(v2),
645            (UInt16(_), _) => None,
646            (UInt32(v1), UInt32(v2)) => v1.partial_cmp(v2),
647            (UInt32(_), _) => None,
648            (UInt64(v1), UInt64(v2)) => v1.partial_cmp(v2),
649            (UInt64(_), _) => None,
650            (Utf8(v1), Utf8(v2)) => v1.partial_cmp(v2),
651            (Utf8(_), _) => None,
652            (LargeUtf8(v1), LargeUtf8(v2)) => v1.partial_cmp(v2),
653            (LargeUtf8(_), _) => None,
654            (Utf8View(v1), Utf8View(v2)) => v1.partial_cmp(v2),
655            (Utf8View(_), _) => None,
656            (Binary(v1), Binary(v2)) => v1.partial_cmp(v2),
657            (Binary(_), _) => None,
658            (BinaryView(v1), BinaryView(v2)) => v1.partial_cmp(v2),
659            (BinaryView(_), _) => None,
660            (FixedSizeBinary(_, v1), FixedSizeBinary(_, v2)) => v1.partial_cmp(v2),
661            (FixedSizeBinary(_, _), _) => None,
662            (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
663            (LargeBinary(_), _) => None,
664            // ScalarValue::List / ScalarValue::FixedSizeList / ScalarValue::LargeList are ensure to have length 1
665            (List(arr1), List(arr2)) => partial_cmp_list(arr1.as_ref(), arr2.as_ref()),
666            (FixedSizeList(arr1), FixedSizeList(arr2)) => {
667                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
668            }
669            (LargeList(arr1), LargeList(arr2)) => {
670                partial_cmp_list(arr1.as_ref(), arr2.as_ref())
671            }
672            (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None,
673            (Struct(struct_arr1), Struct(struct_arr2)) => {
674                partial_cmp_struct(struct_arr1.as_ref(), struct_arr2.as_ref())
675            }
676            (Struct(_), _) => None,
677            (Map(map_arr1), Map(map_arr2)) => partial_cmp_map(map_arr1, map_arr2),
678            (Map(_), _) => None,
679            (Date32(v1), Date32(v2)) => v1.partial_cmp(v2),
680            (Date32(_), _) => None,
681            (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
682            (Date64(_), _) => None,
683            (Time32Second(v1), Time32Second(v2)) => v1.partial_cmp(v2),
684            (Time32Second(_), _) => None,
685            (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.partial_cmp(v2),
686            (Time32Millisecond(_), _) => None,
687            (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.partial_cmp(v2),
688            (Time64Microsecond(_), _) => None,
689            (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.partial_cmp(v2),
690            (Time64Nanosecond(_), _) => None,
691            (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2),
692            (TimestampSecond(_, _), _) => None,
693            (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => {
694                v1.partial_cmp(v2)
695            }
696            (TimestampMillisecond(_, _), _) => None,
697            (TimestampMicrosecond(v1, _), TimestampMicrosecond(v2, _)) => {
698                v1.partial_cmp(v2)
699            }
700            (TimestampMicrosecond(_, _), _) => None,
701            (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => {
702                v1.partial_cmp(v2)
703            }
704            (TimestampNanosecond(_, _), _) => None,
705            (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2),
706            (IntervalYearMonth(_), _) => None,
707            (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2),
708            (IntervalDayTime(_), _) => None,
709            (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2),
710            (IntervalMonthDayNano(_), _) => None,
711            (DurationSecond(v1), DurationSecond(v2)) => v1.partial_cmp(v2),
712            (DurationSecond(_), _) => None,
713            (DurationMillisecond(v1), DurationMillisecond(v2)) => v1.partial_cmp(v2),
714            (DurationMillisecond(_), _) => None,
715            (DurationMicrosecond(v1), DurationMicrosecond(v2)) => v1.partial_cmp(v2),
716            (DurationMicrosecond(_), _) => None,
717            (DurationNanosecond(v1), DurationNanosecond(v2)) => v1.partial_cmp(v2),
718            (DurationNanosecond(_), _) => None,
719            (Union(v1, t1, m1), Union(v2, t2, m2)) => {
720                if t1.eq(t2) && m1.eq(m2) {
721                    v1.partial_cmp(v2)
722                } else {
723                    None
724                }
725            }
726            (Union(_, _, _), _) => None,
727            (Dictionary(k1, v1), Dictionary(k2, v2)) => {
728                // Don't compare if the key types don't match (it is effectively a different datatype)
729                if k1 == k2 { v1.partial_cmp(v2) } else { None }
730            }
731            (Dictionary(_, _), _) => None,
732            (RunEndEncoded(rf1, vf1, v1), RunEndEncoded(rf2, vf2, v2)) => {
733                // Don't compare if the run ends fields don't match (it is effectively a different datatype)
734                if rf1 == rf2 && vf1 == vf2 {
735                    v1.partial_cmp(v2)
736                } else {
737                    None
738                }
739            }
740            (RunEndEncoded(_, _, _), _) => None,
741            (Null, Null) => Some(Ordering::Equal),
742            (Null, _) => None,
743        }
744    }
745}
746
747/// List/LargeList/FixedSizeList scalars always have a single element
748/// array. This function returns that array
749fn first_array_for_list(arr: &dyn Array) -> ArrayRef {
750    assert_eq!(arr.len(), 1);
751    if let Some(arr) = arr.as_list_opt::<i32>() {
752        arr.value(0)
753    } else if let Some(arr) = arr.as_list_opt::<i64>() {
754        arr.value(0)
755    } else if let Some(arr) = arr.as_fixed_size_list_opt() {
756        arr.value(0)
757    } else {
758        unreachable!(
759            "Since only List / LargeList / FixedSizeList are supported, this should never happen"
760        )
761    }
762}
763
764/// Compares two List/LargeList/FixedSizeList scalars
765fn partial_cmp_list(arr1: &dyn Array, arr2: &dyn Array) -> Option<Ordering> {
766    if arr1.data_type() != arr2.data_type() {
767        return None;
768    }
769    let arr1 = first_array_for_list(arr1);
770    let arr2 = first_array_for_list(arr2);
771
772    let min_length = arr1.len().min(arr2.len());
773    let arr1_trimmed = arr1.slice(0, min_length);
774    let arr2_trimmed = arr2.slice(0, min_length);
775
776    let lt_res = arrow::compute::kernels::cmp::lt(&arr1_trimmed, &arr2_trimmed).ok()?;
777    let eq_res = arrow::compute::kernels::cmp::eq(&arr1_trimmed, &arr2_trimmed).ok()?;
778
779    for j in 0..lt_res.len() {
780        // In Postgres, NULL values in lists are always considered to be greater than non-NULL values:
781        //
782        // $ SELECT ARRAY[NULL]::integer[] > ARRAY[1]
783        // true
784        //
785        // These next two if statements are introduced for replicating Postgres behavior, as
786        // arrow::compute does not account for this.
787        if arr1_trimmed.is_null(j) && !arr2_trimmed.is_null(j) {
788            return Some(Ordering::Greater);
789        }
790        if !arr1_trimmed.is_null(j) && arr2_trimmed.is_null(j) {
791            return Some(Ordering::Less);
792        }
793
794        if lt_res.is_valid(j) && lt_res.value(j) {
795            return Some(Ordering::Less);
796        }
797        if eq_res.is_valid(j) && !eq_res.value(j) {
798            return Some(Ordering::Greater);
799        }
800    }
801
802    Some(arr1.len().cmp(&arr2.len()))
803}
804
805fn flatten<'a>(array: &'a StructArray, columns: &mut Vec<&'a ArrayRef>) {
806    for i in 0..array.num_columns() {
807        let column = array.column(i);
808        if let Some(nested_struct) = column.as_any().downcast_ref::<StructArray>() {
809            // If it's a nested struct, recursively expand
810            flatten(nested_struct, columns);
811        } else {
812            // If it's a primitive type, add directly
813            columns.push(column);
814        }
815    }
816}
817
818pub fn partial_cmp_struct(s1: &StructArray, s2: &StructArray) -> Option<Ordering> {
819    if s1.len() != s2.len() {
820        return None;
821    }
822
823    if s1.data_type() != s2.data_type() {
824        return None;
825    }
826
827    let mut expanded_columns1 = Vec::with_capacity(s1.num_columns());
828    let mut expanded_columns2 = Vec::with_capacity(s2.num_columns());
829
830    flatten(s1, &mut expanded_columns1);
831    flatten(s2, &mut expanded_columns2);
832
833    for col_index in 0..expanded_columns1.len() {
834        let arr1 = expanded_columns1[col_index];
835        let arr2 = expanded_columns2[col_index];
836
837        let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
838        let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
839
840        for j in 0..lt_res.len() {
841            if lt_res.is_valid(j) && lt_res.value(j) {
842                return Some(Ordering::Less);
843            }
844            if eq_res.is_valid(j) && !eq_res.value(j) {
845                return Some(Ordering::Greater);
846            }
847        }
848    }
849    Some(Ordering::Equal)
850}
851
852fn partial_cmp_map(m1: &Arc<MapArray>, m2: &Arc<MapArray>) -> Option<Ordering> {
853    if m1.len() != m2.len() {
854        return None;
855    }
856
857    if m1.data_type() != m2.data_type() {
858        return None;
859    }
860
861    for col_index in 0..m1.len() {
862        let arr1 = m1.entries().column(col_index);
863        let arr2 = m2.entries().column(col_index);
864
865        let lt_res = arrow::compute::kernels::cmp::lt(arr1, arr2).ok()?;
866        let eq_res = arrow::compute::kernels::cmp::eq(arr1, arr2).ok()?;
867
868        for j in 0..lt_res.len() {
869            if lt_res.is_valid(j) && lt_res.value(j) {
870                return Some(Ordering::Less);
871            }
872            if eq_res.is_valid(j) && !eq_res.value(j) {
873                return Some(Ordering::Greater);
874            }
875        }
876    }
877    Some(Ordering::Equal)
878}
879
880impl Eq for ScalarValue {}
881
882//Float wrapper over f32/f64. Just because we cannot build std::hash::Hash for floats directly we have to do it through type wrapper
883struct Fl<T>(T);
884
885macro_rules! hash_float_value {
886    ($(($t:ty, $i:ty)),+) => {
887        $(impl std::hash::Hash for Fl<$t> {
888            #[inline]
889            fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
890                state.write(&<$i>::from_ne_bytes(self.0.to_ne_bytes()).to_ne_bytes())
891            }
892        })+
893    };
894}
895
896hash_float_value!((f64, u64), (f32, u32));
897
898// manual implementation of `Hash`
899//
900// # Panics
901//
902// Panics if there is an error when creating hash values for rows
903impl Hash for ScalarValue {
904    fn hash<H: Hasher>(&self, state: &mut H) {
905        use ScalarValue::*;
906        match self {
907            Decimal32(v, p, s) => {
908                v.hash(state);
909                p.hash(state);
910                s.hash(state)
911            }
912            Decimal64(v, p, s) => {
913                v.hash(state);
914                p.hash(state);
915                s.hash(state)
916            }
917            Decimal128(v, p, s) => {
918                v.hash(state);
919                p.hash(state);
920                s.hash(state)
921            }
922            Decimal256(v, p, s) => {
923                v.hash(state);
924                p.hash(state);
925                s.hash(state)
926            }
927            Boolean(v) => v.hash(state),
928            Float16(v) => v.map(Fl).hash(state),
929            Float32(v) => v.map(Fl).hash(state),
930            Float64(v) => v.map(Fl).hash(state),
931            Int8(v) => v.hash(state),
932            Int16(v) => v.hash(state),
933            Int32(v) => v.hash(state),
934            Int64(v) => v.hash(state),
935            UInt8(v) => v.hash(state),
936            UInt16(v) => v.hash(state),
937            UInt32(v) => v.hash(state),
938            UInt64(v) => v.hash(state),
939            Utf8(v) | LargeUtf8(v) | Utf8View(v) => v.hash(state),
940            Binary(v) | FixedSizeBinary(_, v) | LargeBinary(v) | BinaryView(v) => {
941                v.hash(state)
942            }
943            List(arr) => {
944                hash_nested_array(arr.to_owned() as ArrayRef, state);
945            }
946            LargeList(arr) => {
947                hash_nested_array(arr.to_owned() as ArrayRef, state);
948            }
949            FixedSizeList(arr) => {
950                hash_nested_array(arr.to_owned() as ArrayRef, state);
951            }
952            Struct(arr) => {
953                hash_nested_array(arr.to_owned() as ArrayRef, state);
954            }
955            Map(arr) => {
956                hash_nested_array(arr.to_owned() as ArrayRef, state);
957            }
958            Date32(v) => v.hash(state),
959            Date64(v) => v.hash(state),
960            Time32Second(v) => v.hash(state),
961            Time32Millisecond(v) => v.hash(state),
962            Time64Microsecond(v) => v.hash(state),
963            Time64Nanosecond(v) => v.hash(state),
964            TimestampSecond(v, _) => v.hash(state),
965            TimestampMillisecond(v, _) => v.hash(state),
966            TimestampMicrosecond(v, _) => v.hash(state),
967            TimestampNanosecond(v, _) => v.hash(state),
968            DurationSecond(v) => v.hash(state),
969            DurationMillisecond(v) => v.hash(state),
970            DurationMicrosecond(v) => v.hash(state),
971            DurationNanosecond(v) => v.hash(state),
972            IntervalYearMonth(v) => v.hash(state),
973            IntervalDayTime(v) => v.hash(state),
974            IntervalMonthDayNano(v) => v.hash(state),
975            Union(v, t, m) => {
976                v.hash(state);
977                t.hash(state);
978                m.hash(state);
979            }
980            Dictionary(k, v) => {
981                k.hash(state);
982                v.hash(state);
983            }
984            RunEndEncoded(rf, vf, v) => {
985                rf.hash(state);
986                vf.hash(state);
987                v.hash(state);
988            }
989            // stable hash for Null value
990            Null => 1.hash(state),
991        }
992    }
993}
994
995fn hash_nested_array<H: Hasher>(arr: ArrayRef, state: &mut H) {
996    let len = arr.len();
997    let hashes_buffer = &mut vec![0; len];
998    let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0);
999    let hashes = create_hashes(&[arr], &random_state, hashes_buffer)
1000        .expect("hash_nested_array: failed to create row hashes");
1001    // Hash back to std::hash::Hasher
1002    hashes.hash(state);
1003}
1004
1005/// Return a reference to the values array and the index into it for a
1006/// dictionary array
1007///
1008/// # Errors
1009///
1010/// Errors if the array cannot be downcasted to DictionaryArray
1011#[inline]
1012pub fn get_dict_value<K: ArrowDictionaryKeyType>(
1013    array: &dyn Array,
1014    index: usize,
1015) -> Result<(&ArrayRef, Option<usize>)> {
1016    let dict_array = as_dictionary_array::<K>(array)?;
1017    Ok((dict_array.values(), dict_array.key(index)))
1018}
1019
1020/// Create a dictionary array representing `value` repeated `size`
1021/// times
1022fn dict_from_scalar<K: ArrowDictionaryKeyType>(
1023    value: &ScalarValue,
1024    size: usize,
1025) -> Result<ArrayRef> {
1026    // values array is one element long (the value)
1027    let values_array = value.to_array_of_size(1)?;
1028
1029    // Create a key array with `size` elements, each of 0
1030    // Use cache to avoid repeated allocations for the same size
1031    let key_array: PrimitiveArray<K> =
1032        get_or_create_cached_key_array::<K>(size, value.is_null());
1033
1034    // create a new DictionaryArray
1035    //
1036    // Note: this path could be made faster by using the ArrayData
1037    // APIs and skipping validation, if it every comes up in
1038    // performance traces.
1039    Ok(Arc::new(
1040        DictionaryArray::<K>::try_new(key_array, values_array)?, // should always be valid by construction above
1041    ))
1042}
1043
1044/// Create a `DictionaryArray` from the provided values array.
1045///
1046/// Each element gets a unique key (`0..N-1`), without deduplication.
1047/// Useful for wrapping arrays in dictionary form.
1048///
1049/// # Input
1050/// ["alice", "bob", "alice", null, "carol"]
1051///
1052/// # Output
1053/// `DictionaryArray<Int32>`
1054/// {
1055///   keys:   [0, 1, 2, 3, 4],
1056///   values: ["alice", "bob", "alice", null, "carol"]
1057/// }
1058pub fn dict_from_values<K: ArrowDictionaryKeyType>(
1059    values_array: ArrayRef,
1060) -> Result<ArrayRef> {
1061    // Create a key array with `size` elements of 0..array_len for all
1062    // non-null value elements
1063    let key_array: PrimitiveArray<K> = (0..values_array.len())
1064        .map(|index| {
1065            if values_array.is_valid(index) {
1066                let native_index = K::Native::from_usize(index).ok_or_else(|| {
1067                    _internal_datafusion_err!(
1068                        "Can not create index of type {} from value {index}",
1069                        K::DATA_TYPE
1070                    )
1071                })?;
1072                Ok(Some(native_index))
1073            } else {
1074                Ok(None)
1075            }
1076        })
1077        .collect::<Result<Vec<_>>>()?
1078        .into_iter()
1079        .collect();
1080
1081    // create a new DictionaryArray
1082    //
1083    // Note: this path could be made faster by using the ArrayData
1084    // APIs and skipping validation, if it every comes up in
1085    // performance traces.
1086    let dict_array = DictionaryArray::<K>::try_new(key_array, values_array)?;
1087    Ok(Arc::new(dict_array))
1088}
1089
1090macro_rules! typed_cast_tz {
1091    ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident, $TZ:expr) => {{
1092        let array = $array_cast($array)?;
1093        Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
1094            match array.is_null($index) {
1095                true => None,
1096                false => Some(array.value($index).into()),
1097            },
1098            $TZ.clone(),
1099        ))
1100    }};
1101}
1102
1103macro_rules! typed_cast {
1104    ($array:expr, $index:expr, $array_cast:ident, $SCALAR:ident) => {{
1105        let array = $array_cast($array)?;
1106        Ok::<ScalarValue, DataFusionError>(ScalarValue::$SCALAR(
1107            match array.is_null($index) {
1108                true => None,
1109                false => Some(array.value($index).into()),
1110            },
1111        ))
1112    }};
1113}
1114
1115macro_rules! build_array_from_option {
1116    ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
1117        match $EXPR {
1118            Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
1119            None => new_null_array(&DataType::$DATA_TYPE, $SIZE),
1120        }
1121    }};
1122    ($DATA_TYPE:ident, $ENUM:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{
1123        match $EXPR {
1124            Some(value) => Arc::new($ARRAY_TYPE::from_value(*value, $SIZE)),
1125            None => new_null_array(&DataType::$DATA_TYPE($ENUM), $SIZE),
1126        }
1127    }};
1128}
1129
1130macro_rules! build_timestamp_array_from_option {
1131    ($TIME_UNIT:expr, $TZ:expr, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {
1132        match $EXPR {
1133            Some(value) => {
1134                Arc::new($ARRAY_TYPE::from_value(*value, $SIZE).with_timezone_opt($TZ))
1135            }
1136            None => new_null_array(&DataType::Timestamp($TIME_UNIT, $TZ), $SIZE),
1137        }
1138    };
1139}
1140
1141macro_rules! eq_array_primitive {
1142    ($array:expr, $index:expr, $array_cast:ident, $VALUE:expr) => {{
1143        let array = $array_cast($array)?;
1144        let is_valid = array.is_valid($index);
1145        Ok::<bool, DataFusionError>(match $VALUE {
1146            Some(val) => is_valid && &array.value($index) == val,
1147            None => !is_valid,
1148        })
1149    }};
1150}
1151
1152impl ScalarValue {
1153    /// Create a [`Result<ScalarValue>`] with the provided value and datatype
1154    ///
1155    /// # Panics
1156    ///
1157    /// Panics if d is not compatible with T
1158    pub fn new_primitive<T: ArrowPrimitiveType>(
1159        a: Option<T::Native>,
1160        d: &DataType,
1161    ) -> Result<Self> {
1162        match a {
1163            None => d.try_into(),
1164            Some(v) => {
1165                let array = PrimitiveArray::<T>::new(vec![v].into(), None)
1166                    .with_data_type(d.clone());
1167                Self::try_from_array(&array, 0)
1168            }
1169        }
1170    }
1171
1172    /// Create a decimal Scalar from value/precision and scale.
1173    pub fn try_new_decimal128(value: i128, precision: u8, scale: i8) -> Result<Self> {
1174        Self::validate_decimal_or_internal_err::<Decimal128Type>(precision, scale)?;
1175        Ok(ScalarValue::Decimal128(Some(value), precision, scale))
1176    }
1177
1178    /// Create a Null instance of ScalarValue for this datatype
1179    ///
1180    /// Example
1181    /// ```
1182    /// use arrow::datatypes::DataType;
1183    /// use datafusion_common::ScalarValue;
1184    ///
1185    /// let scalar = ScalarValue::try_new_null(&DataType::Int32).unwrap();
1186    /// assert_eq!(scalar.is_null(), true);
1187    /// assert_eq!(scalar.data_type(), DataType::Int32);
1188    /// ```
1189    pub fn try_new_null(data_type: &DataType) -> Result<Self> {
1190        Ok(match data_type {
1191            DataType::Boolean => ScalarValue::Boolean(None),
1192            DataType::Float16 => ScalarValue::Float16(None),
1193            DataType::Float64 => ScalarValue::Float64(None),
1194            DataType::Float32 => ScalarValue::Float32(None),
1195            DataType::Int8 => ScalarValue::Int8(None),
1196            DataType::Int16 => ScalarValue::Int16(None),
1197            DataType::Int32 => ScalarValue::Int32(None),
1198            DataType::Int64 => ScalarValue::Int64(None),
1199            DataType::UInt8 => ScalarValue::UInt8(None),
1200            DataType::UInt16 => ScalarValue::UInt16(None),
1201            DataType::UInt32 => ScalarValue::UInt32(None),
1202            DataType::UInt64 => ScalarValue::UInt64(None),
1203            DataType::Decimal32(precision, scale) => {
1204                ScalarValue::Decimal32(None, *precision, *scale)
1205            }
1206            DataType::Decimal64(precision, scale) => {
1207                ScalarValue::Decimal64(None, *precision, *scale)
1208            }
1209            DataType::Decimal128(precision, scale) => {
1210                ScalarValue::Decimal128(None, *precision, *scale)
1211            }
1212            DataType::Decimal256(precision, scale) => {
1213                ScalarValue::Decimal256(None, *precision, *scale)
1214            }
1215            DataType::Utf8 => ScalarValue::Utf8(None),
1216            DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
1217            DataType::Utf8View => ScalarValue::Utf8View(None),
1218            DataType::Binary => ScalarValue::Binary(None),
1219            DataType::BinaryView => ScalarValue::BinaryView(None),
1220            DataType::FixedSizeBinary(len) => ScalarValue::FixedSizeBinary(*len, None),
1221            DataType::LargeBinary => ScalarValue::LargeBinary(None),
1222            DataType::Date32 => ScalarValue::Date32(None),
1223            DataType::Date64 => ScalarValue::Date64(None),
1224            DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(None),
1225            DataType::Time32(TimeUnit::Millisecond) => {
1226                ScalarValue::Time32Millisecond(None)
1227            }
1228            DataType::Time64(TimeUnit::Microsecond) => {
1229                ScalarValue::Time64Microsecond(None)
1230            }
1231            DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64Nanosecond(None),
1232            DataType::Timestamp(TimeUnit::Second, tz_opt) => {
1233                ScalarValue::TimestampSecond(None, tz_opt.clone())
1234            }
1235            DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => {
1236                ScalarValue::TimestampMillisecond(None, tz_opt.clone())
1237            }
1238            DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => {
1239                ScalarValue::TimestampMicrosecond(None, tz_opt.clone())
1240            }
1241            DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => {
1242                ScalarValue::TimestampNanosecond(None, tz_opt.clone())
1243            }
1244            DataType::Interval(IntervalUnit::YearMonth) => {
1245                ScalarValue::IntervalYearMonth(None)
1246            }
1247            DataType::Interval(IntervalUnit::DayTime) => {
1248                ScalarValue::IntervalDayTime(None)
1249            }
1250            DataType::Interval(IntervalUnit::MonthDayNano) => {
1251                ScalarValue::IntervalMonthDayNano(None)
1252            }
1253            DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None),
1254            DataType::Duration(TimeUnit::Millisecond) => {
1255                ScalarValue::DurationMillisecond(None)
1256            }
1257            DataType::Duration(TimeUnit::Microsecond) => {
1258                ScalarValue::DurationMicrosecond(None)
1259            }
1260            DataType::Duration(TimeUnit::Nanosecond) => {
1261                ScalarValue::DurationNanosecond(None)
1262            }
1263            DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary(
1264                index_type.clone(),
1265                Box::new(value_type.as_ref().try_into()?),
1266            ),
1267            DataType::RunEndEncoded(run_ends_field, value_field) => {
1268                ScalarValue::RunEndEncoded(
1269                    Arc::clone(run_ends_field),
1270                    Arc::clone(value_field),
1271                    Box::new(value_field.data_type().try_into()?),
1272                )
1273            }
1274            // `ScalarValue::List` contains single element `ListArray`.
1275            DataType::List(field_ref) => ScalarValue::List(Arc::new(
1276                GenericListArray::new_null(Arc::clone(field_ref), 1),
1277            )),
1278            // `ScalarValue::LargeList` contains single element `LargeListArray`.
1279            DataType::LargeList(field_ref) => ScalarValue::LargeList(Arc::new(
1280                GenericListArray::new_null(Arc::clone(field_ref), 1),
1281            )),
1282            // `ScalarValue::FixedSizeList` contains single element `FixedSizeList`.
1283            DataType::FixedSizeList(field_ref, fixed_length) => {
1284                ScalarValue::FixedSizeList(Arc::new(FixedSizeListArray::new_null(
1285                    Arc::clone(field_ref),
1286                    *fixed_length,
1287                    1,
1288                )))
1289            }
1290            DataType::Struct(fields) => ScalarValue::Struct(
1291                new_null_array(&DataType::Struct(fields.to_owned()), 1)
1292                    .as_struct()
1293                    .to_owned()
1294                    .into(),
1295            ),
1296            DataType::Map(fields, sorted) => ScalarValue::Map(
1297                new_null_array(&DataType::Map(fields.to_owned(), sorted.to_owned()), 1)
1298                    .as_map()
1299                    .to_owned()
1300                    .into(),
1301            ),
1302            DataType::Union(fields, mode) => {
1303                ScalarValue::Union(None, fields.clone(), *mode)
1304            }
1305            DataType::Null => ScalarValue::Null,
1306            _ => {
1307                return _not_impl_err!(
1308                    "Can't create a null scalar from data_type \"{data_type}\""
1309                );
1310            }
1311        })
1312    }
1313
1314    /// Returns a [`ScalarValue::Utf8`] representing `val`
1315    pub fn new_utf8(val: impl Into<String>) -> Self {
1316        ScalarValue::from(val.into())
1317    }
1318
1319    /// Returns a [`ScalarValue::Utf8View`] representing `val`
1320    pub fn new_utf8view(val: impl Into<String>) -> Self {
1321        ScalarValue::Utf8View(Some(val.into()))
1322    }
1323
1324    /// Returns a [`ScalarValue::IntervalYearMonth`] representing
1325    /// `years` years and `months` months
1326    pub fn new_interval_ym(years: i32, months: i32) -> Self {
1327        let val = IntervalYearMonthType::make_value(years, months);
1328        ScalarValue::IntervalYearMonth(Some(val))
1329    }
1330
1331    /// Returns a [`ScalarValue::IntervalDayTime`] representing
1332    /// `days` days and `millis` milliseconds
1333    pub fn new_interval_dt(days: i32, millis: i32) -> Self {
1334        let val = IntervalDayTimeType::make_value(days, millis);
1335        Self::IntervalDayTime(Some(val))
1336    }
1337
1338    /// Returns a [`ScalarValue::IntervalMonthDayNano`] representing
1339    /// `months` months and `days` days, and `nanos` nanoseconds
1340    pub fn new_interval_mdn(months: i32, days: i32, nanos: i64) -> Self {
1341        let val = IntervalMonthDayNanoType::make_value(months, days, nanos);
1342        ScalarValue::IntervalMonthDayNano(Some(val))
1343    }
1344
1345    /// Returns a [`ScalarValue`] representing
1346    /// `value` and `tz_opt` timezone
1347    pub fn new_timestamp<T: ArrowTimestampType>(
1348        value: Option<i64>,
1349        tz_opt: Option<Arc<str>>,
1350    ) -> Self {
1351        match T::UNIT {
1352            TimeUnit::Second => ScalarValue::TimestampSecond(value, tz_opt),
1353            TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(value, tz_opt),
1354            TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(value, tz_opt),
1355            TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(value, tz_opt),
1356        }
1357    }
1358
1359    /// Returns a [`ScalarValue`] representing PI
1360    pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
1361        match datatype {
1362            DataType::Float16 => Ok(ScalarValue::from(f16::PI)),
1363            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
1364            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
1365            _ => _internal_err!("PI is not supported for data type: {}", datatype),
1366        }
1367    }
1368
1369    /// Returns a [`ScalarValue`] representing PI's upper bound
1370    pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
1371        match datatype {
1372            DataType::Float16 => Ok(ScalarValue::Float16(Some(consts::PI_UPPER_F16))),
1373            DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
1374            DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
1375            _ => {
1376                _internal_err!("PI_UPPER is not supported for data type: {}", datatype)
1377            }
1378        }
1379    }
1380
1381    /// Returns a [`ScalarValue`] representing -PI's lower bound
1382    pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
1383        match datatype {
1384            DataType::Float16 => {
1385                Ok(ScalarValue::Float16(Some(consts::NEGATIVE_PI_LOWER_F16)))
1386            }
1387            DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
1388            DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
1389            _ => {
1390                _internal_err!("-PI_LOWER is not supported for data type: {}", datatype)
1391            }
1392        }
1393    }
1394
1395    /// Returns a [`ScalarValue`] representing FRAC_PI_2's upper bound
1396    pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
1397        match datatype {
1398            DataType::Float16 => {
1399                Ok(ScalarValue::Float16(Some(consts::FRAC_PI_2_UPPER_F16)))
1400            }
1401            DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
1402            DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
1403            _ => {
1404                _internal_err!("PI_UPPER/2 is not supported for data type: {}", datatype)
1405            }
1406        }
1407    }
1408
1409    // Returns a [`ScalarValue`] representing FRAC_PI_2's lower bound
1410    pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
1411        match datatype {
1412            DataType::Float16 => Ok(ScalarValue::Float16(Some(
1413                consts::NEGATIVE_FRAC_PI_2_LOWER_F16,
1414            ))),
1415            DataType::Float32 => {
1416                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
1417            }
1418            DataType::Float64 => {
1419                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
1420            }
1421            _ => {
1422                _internal_err!("-PI/2_LOWER is not supported for data type: {}", datatype)
1423            }
1424        }
1425    }
1426
1427    /// Returns a [`ScalarValue`] representing -PI
1428    pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
1429        match datatype {
1430            DataType::Float16 => Ok(ScalarValue::from(-f16::PI)),
1431            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
1432            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
1433            _ => _internal_err!("-PI is not supported for data type: {}", datatype),
1434        }
1435    }
1436
1437    /// Returns a [`ScalarValue`] representing PI/2
1438    pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1439        match datatype {
1440            DataType::Float16 => Ok(ScalarValue::from(f16::FRAC_PI_2)),
1441            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
1442            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
1443            _ => _internal_err!("PI/2 is not supported for data type: {}", datatype),
1444        }
1445    }
1446
1447    /// Returns a [`ScalarValue`] representing -PI/2
1448    pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1449        match datatype {
1450            DataType::Float16 => Ok(ScalarValue::from(-f16::FRAC_PI_2)),
1451            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
1452            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
1453            _ => _internal_err!("-PI/2 is not supported for data type: {}", datatype),
1454        }
1455    }
1456
1457    /// Returns a [`ScalarValue`] representing infinity
1458    pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
1459        match datatype {
1460            DataType::Float16 => Ok(ScalarValue::from(f16::INFINITY)),
1461            DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
1462            DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
1463            _ => {
1464                _internal_err!("Infinity is not supported for data type: {}", datatype)
1465            }
1466        }
1467    }
1468
1469    /// Returns a [`ScalarValue`] representing negative infinity
1470    pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
1471        match datatype {
1472            DataType::Float16 => Ok(ScalarValue::from(f16::NEG_INFINITY)),
1473            DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
1474            DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
1475            _ => {
1476                _internal_err!(
1477                    "Negative Infinity is not supported for data type: {}",
1478                    datatype
1479                )
1480            }
1481        }
1482    }
1483
1484    /// Create a zero value in the given type.
1485    pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
1486        Ok(match datatype {
1487            DataType::Boolean => ScalarValue::Boolean(Some(false)),
1488            DataType::Int8 => ScalarValue::Int8(Some(0)),
1489            DataType::Int16 => ScalarValue::Int16(Some(0)),
1490            DataType::Int32 => ScalarValue::Int32(Some(0)),
1491            DataType::Int64 => ScalarValue::Int64(Some(0)),
1492            DataType::UInt8 => ScalarValue::UInt8(Some(0)),
1493            DataType::UInt16 => ScalarValue::UInt16(Some(0)),
1494            DataType::UInt32 => ScalarValue::UInt32(Some(0)),
1495            DataType::UInt64 => ScalarValue::UInt64(Some(0)),
1496            DataType::Float16 => ScalarValue::Float16(Some(f16::ZERO)),
1497            DataType::Float32 => ScalarValue::Float32(Some(0.0)),
1498            DataType::Float64 => ScalarValue::Float64(Some(0.0)),
1499            DataType::Decimal32(precision, scale) => {
1500                ScalarValue::Decimal32(Some(0), *precision, *scale)
1501            }
1502            DataType::Decimal64(precision, scale) => {
1503                ScalarValue::Decimal64(Some(0), *precision, *scale)
1504            }
1505            DataType::Decimal128(precision, scale) => {
1506                ScalarValue::Decimal128(Some(0), *precision, *scale)
1507            }
1508            DataType::Decimal256(precision, scale) => {
1509                ScalarValue::Decimal256(Some(i256::ZERO), *precision, *scale)
1510            }
1511            DataType::Timestamp(TimeUnit::Second, tz) => {
1512                ScalarValue::TimestampSecond(Some(0), tz.clone())
1513            }
1514            DataType::Timestamp(TimeUnit::Millisecond, tz) => {
1515                ScalarValue::TimestampMillisecond(Some(0), tz.clone())
1516            }
1517            DataType::Timestamp(TimeUnit::Microsecond, tz) => {
1518                ScalarValue::TimestampMicrosecond(Some(0), tz.clone())
1519            }
1520            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
1521                ScalarValue::TimestampNanosecond(Some(0), tz.clone())
1522            }
1523            DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(Some(0)),
1524            DataType::Time32(TimeUnit::Millisecond) => {
1525                ScalarValue::Time32Millisecond(Some(0))
1526            }
1527            DataType::Time64(TimeUnit::Microsecond) => {
1528                ScalarValue::Time64Microsecond(Some(0))
1529            }
1530            DataType::Time64(TimeUnit::Nanosecond) => {
1531                ScalarValue::Time64Nanosecond(Some(0))
1532            }
1533            DataType::Interval(IntervalUnit::YearMonth) => {
1534                ScalarValue::IntervalYearMonth(Some(0))
1535            }
1536            DataType::Interval(IntervalUnit::DayTime) => {
1537                ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
1538            }
1539            DataType::Interval(IntervalUnit::MonthDayNano) => {
1540                ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
1541            }
1542            DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(Some(0)),
1543            DataType::Duration(TimeUnit::Millisecond) => {
1544                ScalarValue::DurationMillisecond(Some(0))
1545            }
1546            DataType::Duration(TimeUnit::Microsecond) => {
1547                ScalarValue::DurationMicrosecond(Some(0))
1548            }
1549            DataType::Duration(TimeUnit::Nanosecond) => {
1550                ScalarValue::DurationNanosecond(Some(0))
1551            }
1552            DataType::Date32 => ScalarValue::Date32(Some(0)),
1553            DataType::Date64 => ScalarValue::Date64(Some(0)),
1554            _ => {
1555                return _not_impl_err!(
1556                    "Can't create a zero scalar from data_type \"{datatype}\""
1557                );
1558            }
1559        })
1560    }
1561
1562    /// Returns a default value for the given `DataType`.
1563    ///
1564    /// This function is useful when you need to initialize a column with
1565    /// non-null values in a DataFrame or when you need a "zero" value
1566    /// for a specific data type.
1567    ///
1568    /// # Default Values
1569    ///
1570    /// - **Numeric types**: Returns zero (via [`new_zero`])
1571    /// - **String types**: Returns empty string (`""`)
1572    /// - **Binary types**: Returns empty byte array
1573    /// - **Temporal types**: Returns zero/epoch value
1574    /// - **List types**: Returns empty list
1575    /// - **Struct types**: Returns struct with all fields set to their defaults
1576    /// - **Dictionary types**: Returns dictionary with default value
1577    /// - **Map types**: Returns empty map
1578    /// - **Union types**: Returns first variant with default value
1579    ///
1580    /// # Errors
1581    ///
1582    /// Returns an error for data types that don't have a clear default value
1583    /// or are not yet supported (e.g., `RunEndEncoded`).
1584    ///
1585    /// [`new_zero`]: Self::new_zero
1586    pub fn new_default(datatype: &DataType) -> Result<ScalarValue> {
1587        match datatype {
1588            // Null type
1589            DataType::Null => Ok(ScalarValue::Null),
1590
1591            // Numeric types
1592            DataType::Boolean
1593            | DataType::Int8
1594            | DataType::Int16
1595            | DataType::Int32
1596            | DataType::Int64
1597            | DataType::UInt8
1598            | DataType::UInt16
1599            | DataType::UInt32
1600            | DataType::UInt64
1601            | DataType::Float16
1602            | DataType::Float32
1603            | DataType::Float64
1604            | DataType::Decimal32(_, _)
1605            | DataType::Decimal64(_, _)
1606            | DataType::Decimal128(_, _)
1607            | DataType::Decimal256(_, _)
1608            | DataType::Timestamp(_, _)
1609            | DataType::Time32(_)
1610            | DataType::Time64(_)
1611            | DataType::Interval(_)
1612            | DataType::Duration(_)
1613            | DataType::Date32
1614            | DataType::Date64 => ScalarValue::new_zero(datatype),
1615
1616            // String types
1617            DataType::Utf8 => Ok(ScalarValue::Utf8(Some("".to_string()))),
1618            DataType::LargeUtf8 => Ok(ScalarValue::LargeUtf8(Some("".to_string()))),
1619            DataType::Utf8View => Ok(ScalarValue::Utf8View(Some("".to_string()))),
1620
1621            // Binary types
1622            DataType::Binary => Ok(ScalarValue::Binary(Some(vec![]))),
1623            DataType::LargeBinary => Ok(ScalarValue::LargeBinary(Some(vec![]))),
1624            DataType::BinaryView => Ok(ScalarValue::BinaryView(Some(vec![]))),
1625
1626            // Fixed-size binary
1627            DataType::FixedSizeBinary(size) => Ok(ScalarValue::FixedSizeBinary(
1628                *size,
1629                Some(vec![0; *size as usize]),
1630            )),
1631
1632            // List types
1633            DataType::List(field) => {
1634                let list =
1635                    ScalarValue::new_list(&[], field.data_type(), field.is_nullable());
1636                Ok(ScalarValue::List(list))
1637            }
1638            DataType::FixedSizeList(field, _size) => {
1639                let empty_arr = new_empty_array(field.data_type());
1640                let values = Arc::new(
1641                    SingleRowListArrayBuilder::new(empty_arr)
1642                        .with_nullable(field.is_nullable())
1643                        .build_fixed_size_list_array(0),
1644                );
1645                Ok(ScalarValue::FixedSizeList(values))
1646            }
1647            DataType::LargeList(field) => {
1648                let list = ScalarValue::new_large_list(&[], field.data_type());
1649                Ok(ScalarValue::LargeList(list))
1650            }
1651
1652            // Struct types
1653            DataType::Struct(fields) => {
1654                let values = fields
1655                    .iter()
1656                    .map(|f| ScalarValue::new_default(f.data_type()))
1657                    .collect::<Result<Vec<_>>>()?;
1658                Ok(ScalarValue::Struct(Arc::new(StructArray::new(
1659                    fields.clone(),
1660                    values
1661                        .into_iter()
1662                        .map(|v| v.to_array())
1663                        .collect::<Result<_>>()?,
1664                    None,
1665                ))))
1666            }
1667
1668            // Dictionary types
1669            DataType::Dictionary(key_type, value_type) => Ok(ScalarValue::Dictionary(
1670                key_type.clone(),
1671                Box::new(ScalarValue::new_default(value_type)?),
1672            )),
1673
1674            DataType::RunEndEncoded(run_ends_field, value_field) => {
1675                Ok(ScalarValue::RunEndEncoded(
1676                    Arc::clone(run_ends_field),
1677                    Arc::clone(value_field),
1678                    Box::new(ScalarValue::new_default(value_field.data_type())?),
1679                ))
1680            }
1681
1682            // Map types
1683            DataType::Map(field, _) => Ok(ScalarValue::Map(Arc::new(MapArray::from(
1684                ArrayData::new_empty(field.data_type()),
1685            )))),
1686
1687            // Union types - return first variant with default value
1688            DataType::Union(fields, mode) => {
1689                if let Some((type_id, field)) = fields.iter().next() {
1690                    let default_value = ScalarValue::new_default(field.data_type())?;
1691                    Ok(ScalarValue::Union(
1692                        Some((type_id, Box::new(default_value))),
1693                        fields.clone(),
1694                        *mode,
1695                    ))
1696                } else {
1697                    _internal_err!("Union type must have at least one field")
1698                }
1699            }
1700
1701            DataType::ListView(_) | DataType::LargeListView(_) => {
1702                _not_impl_err!(
1703                    "Default value for data_type \"{datatype}\" is not implemented yet"
1704                )
1705            }
1706        }
1707    }
1708
1709    /// Create an one value in the given type.
1710    pub fn new_one(datatype: &DataType) -> Result<ScalarValue> {
1711        Ok(match datatype {
1712            DataType::Int8 => ScalarValue::Int8(Some(1)),
1713            DataType::Int16 => ScalarValue::Int16(Some(1)),
1714            DataType::Int32 => ScalarValue::Int32(Some(1)),
1715            DataType::Int64 => ScalarValue::Int64(Some(1)),
1716            DataType::UInt8 => ScalarValue::UInt8(Some(1)),
1717            DataType::UInt16 => ScalarValue::UInt16(Some(1)),
1718            DataType::UInt32 => ScalarValue::UInt32(Some(1)),
1719            DataType::UInt64 => ScalarValue::UInt64(Some(1)),
1720            DataType::Float16 => ScalarValue::Float16(Some(f16::ONE)),
1721            DataType::Float32 => ScalarValue::Float32(Some(1.0)),
1722            DataType::Float64 => ScalarValue::Float64(Some(1.0)),
1723            DataType::Decimal32(precision, scale) => {
1724                Self::validate_decimal_or_internal_err::<Decimal32Type>(
1725                    *precision, *scale,
1726                )?;
1727                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1728                match 10_i32.checked_pow(*scale as u32) {
1729                    Some(value) => {
1730                        ScalarValue::Decimal32(Some(value), *precision, *scale)
1731                    }
1732                    None => return _internal_err!("Unsupported scale {scale}"),
1733                }
1734            }
1735            DataType::Decimal64(precision, scale) => {
1736                Self::validate_decimal_or_internal_err::<Decimal64Type>(
1737                    *precision, *scale,
1738                )?;
1739                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1740                match i64::from(10).checked_pow(*scale as u32) {
1741                    Some(value) => {
1742                        ScalarValue::Decimal64(Some(value), *precision, *scale)
1743                    }
1744                    None => return _internal_err!("Unsupported scale {scale}"),
1745                }
1746            }
1747            DataType::Decimal128(precision, scale) => {
1748                Self::validate_decimal_or_internal_err::<Decimal128Type>(
1749                    *precision, *scale,
1750                )?;
1751                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1752                match i128::from(10).checked_pow(*scale as u32) {
1753                    Some(value) => {
1754                        ScalarValue::Decimal128(Some(value), *precision, *scale)
1755                    }
1756                    None => return _internal_err!("Unsupported scale {scale}"),
1757                }
1758            }
1759            DataType::Decimal256(precision, scale) => {
1760                Self::validate_decimal_or_internal_err::<Decimal256Type>(
1761                    *precision, *scale,
1762                )?;
1763                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1764                match i256::from(10).checked_pow(*scale as u32) {
1765                    Some(value) => {
1766                        ScalarValue::Decimal256(Some(value), *precision, *scale)
1767                    }
1768                    None => return _internal_err!("Unsupported scale {scale}"),
1769                }
1770            }
1771            _ => {
1772                return _not_impl_err!(
1773                    "Can't create an one scalar from data_type \"{datatype}\""
1774                );
1775            }
1776        })
1777    }
1778
1779    /// Create a negative one value in the given type.
1780    pub fn new_negative_one(datatype: &DataType) -> Result<ScalarValue> {
1781        Ok(match datatype {
1782            DataType::Int8 | DataType::UInt8 => ScalarValue::Int8(Some(-1)),
1783            DataType::Int16 | DataType::UInt16 => ScalarValue::Int16(Some(-1)),
1784            DataType::Int32 | DataType::UInt32 => ScalarValue::Int32(Some(-1)),
1785            DataType::Int64 | DataType::UInt64 => ScalarValue::Int64(Some(-1)),
1786            DataType::Float16 => ScalarValue::Float16(Some(f16::NEG_ONE)),
1787            DataType::Float32 => ScalarValue::Float32(Some(-1.0)),
1788            DataType::Float64 => ScalarValue::Float64(Some(-1.0)),
1789            DataType::Decimal32(precision, scale) => {
1790                Self::validate_decimal_or_internal_err::<Decimal32Type>(
1791                    *precision, *scale,
1792                )?;
1793                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1794                match 10_i32.checked_pow(*scale as u32) {
1795                    Some(value) => {
1796                        ScalarValue::Decimal32(Some(-value), *precision, *scale)
1797                    }
1798                    None => return _internal_err!("Unsupported scale {scale}"),
1799                }
1800            }
1801            DataType::Decimal64(precision, scale) => {
1802                Self::validate_decimal_or_internal_err::<Decimal64Type>(
1803                    *precision, *scale,
1804                )?;
1805                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1806                match i64::from(10).checked_pow(*scale as u32) {
1807                    Some(value) => {
1808                        ScalarValue::Decimal64(Some(-value), *precision, *scale)
1809                    }
1810                    None => return _internal_err!("Unsupported scale {scale}"),
1811                }
1812            }
1813            DataType::Decimal128(precision, scale) => {
1814                Self::validate_decimal_or_internal_err::<Decimal128Type>(
1815                    *precision, *scale,
1816                )?;
1817                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1818                match i128::from(10).checked_pow(*scale as u32) {
1819                    Some(value) => {
1820                        ScalarValue::Decimal128(Some(-value), *precision, *scale)
1821                    }
1822                    None => return _internal_err!("Unsupported scale {scale}"),
1823                }
1824            }
1825            DataType::Decimal256(precision, scale) => {
1826                Self::validate_decimal_or_internal_err::<Decimal256Type>(
1827                    *precision, *scale,
1828                )?;
1829                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1830                match i256::from(10).checked_pow(*scale as u32) {
1831                    Some(value) => {
1832                        ScalarValue::Decimal256(Some(-value), *precision, *scale)
1833                    }
1834                    None => return _internal_err!("Unsupported scale {scale}"),
1835                }
1836            }
1837            _ => {
1838                return _not_impl_err!(
1839                    "Can't create a negative one scalar from data_type \"{datatype}\""
1840                );
1841            }
1842        })
1843    }
1844
1845    pub fn new_ten(datatype: &DataType) -> Result<ScalarValue> {
1846        Ok(match datatype {
1847            DataType::Int8 => ScalarValue::Int8(Some(10)),
1848            DataType::Int16 => ScalarValue::Int16(Some(10)),
1849            DataType::Int32 => ScalarValue::Int32(Some(10)),
1850            DataType::Int64 => ScalarValue::Int64(Some(10)),
1851            DataType::UInt8 => ScalarValue::UInt8(Some(10)),
1852            DataType::UInt16 => ScalarValue::UInt16(Some(10)),
1853            DataType::UInt32 => ScalarValue::UInt32(Some(10)),
1854            DataType::UInt64 => ScalarValue::UInt64(Some(10)),
1855            DataType::Float16 => ScalarValue::Float16(Some(f16::from_f32(10.0))),
1856            DataType::Float32 => ScalarValue::Float32(Some(10.0)),
1857            DataType::Float64 => ScalarValue::Float64(Some(10.0)),
1858            DataType::Decimal32(precision, scale) => {
1859                Self::validate_decimal_or_internal_err::<Decimal32Type>(
1860                    *precision, *scale,
1861                )?;
1862                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1863                match 10_i32.checked_pow((*scale + 1) as u32) {
1864                    Some(value) => {
1865                        ScalarValue::Decimal32(Some(value), *precision, *scale)
1866                    }
1867                    None => return _internal_err!("Unsupported scale {scale}"),
1868                }
1869            }
1870            DataType::Decimal64(precision, scale) => {
1871                Self::validate_decimal_or_internal_err::<Decimal64Type>(
1872                    *precision, *scale,
1873                )?;
1874                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1875                match i64::from(10).checked_pow((*scale + 1) as u32) {
1876                    Some(value) => {
1877                        ScalarValue::Decimal64(Some(value), *precision, *scale)
1878                    }
1879                    None => return _internal_err!("Unsupported scale {scale}"),
1880                }
1881            }
1882            DataType::Decimal128(precision, scale) => {
1883                Self::validate_decimal_or_internal_err::<Decimal128Type>(
1884                    *precision, *scale,
1885                )?;
1886                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1887                match i128::from(10).checked_pow((*scale + 1) as u32) {
1888                    Some(value) => {
1889                        ScalarValue::Decimal128(Some(value), *precision, *scale)
1890                    }
1891                    None => return _internal_err!("Unsupported scale {scale}"),
1892                }
1893            }
1894            DataType::Decimal256(precision, scale) => {
1895                Self::validate_decimal_or_internal_err::<Decimal256Type>(
1896                    *precision, *scale,
1897                )?;
1898                assert_or_internal_err!(*scale >= 0, "Negative scale is not supported");
1899                match i256::from(10).checked_pow((*scale + 1) as u32) {
1900                    Some(value) => {
1901                        ScalarValue::Decimal256(Some(value), *precision, *scale)
1902                    }
1903                    None => return _internal_err!("Unsupported scale {scale}"),
1904                }
1905            }
1906            _ => {
1907                return _not_impl_err!(
1908                    "Can't create a ten scalar from data_type \"{datatype}\""
1909                );
1910            }
1911        })
1912    }
1913
1914    /// return the [`DataType`] of this `ScalarValue`
1915    pub fn data_type(&self) -> DataType {
1916        match self {
1917            ScalarValue::Boolean(_) => DataType::Boolean,
1918            ScalarValue::UInt8(_) => DataType::UInt8,
1919            ScalarValue::UInt16(_) => DataType::UInt16,
1920            ScalarValue::UInt32(_) => DataType::UInt32,
1921            ScalarValue::UInt64(_) => DataType::UInt64,
1922            ScalarValue::Int8(_) => DataType::Int8,
1923            ScalarValue::Int16(_) => DataType::Int16,
1924            ScalarValue::Int32(_) => DataType::Int32,
1925            ScalarValue::Int64(_) => DataType::Int64,
1926            ScalarValue::Decimal32(_, precision, scale) => {
1927                DataType::Decimal32(*precision, *scale)
1928            }
1929            ScalarValue::Decimal64(_, precision, scale) => {
1930                DataType::Decimal64(*precision, *scale)
1931            }
1932            ScalarValue::Decimal128(_, precision, scale) => {
1933                DataType::Decimal128(*precision, *scale)
1934            }
1935            ScalarValue::Decimal256(_, precision, scale) => {
1936                DataType::Decimal256(*precision, *scale)
1937            }
1938            ScalarValue::TimestampSecond(_, tz_opt) => {
1939                DataType::Timestamp(TimeUnit::Second, tz_opt.clone())
1940            }
1941            ScalarValue::TimestampMillisecond(_, tz_opt) => {
1942                DataType::Timestamp(TimeUnit::Millisecond, tz_opt.clone())
1943            }
1944            ScalarValue::TimestampMicrosecond(_, tz_opt) => {
1945                DataType::Timestamp(TimeUnit::Microsecond, tz_opt.clone())
1946            }
1947            ScalarValue::TimestampNanosecond(_, tz_opt) => {
1948                DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1949            }
1950            ScalarValue::Float16(_) => DataType::Float16,
1951            ScalarValue::Float32(_) => DataType::Float32,
1952            ScalarValue::Float64(_) => DataType::Float64,
1953            ScalarValue::Utf8(_) => DataType::Utf8,
1954            ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
1955            ScalarValue::Utf8View(_) => DataType::Utf8View,
1956            ScalarValue::Binary(_) => DataType::Binary,
1957            ScalarValue::BinaryView(_) => DataType::BinaryView,
1958            ScalarValue::FixedSizeBinary(sz, _) => DataType::FixedSizeBinary(*sz),
1959            ScalarValue::LargeBinary(_) => DataType::LargeBinary,
1960            ScalarValue::List(arr) => arr.data_type().to_owned(),
1961            ScalarValue::LargeList(arr) => arr.data_type().to_owned(),
1962            ScalarValue::FixedSizeList(arr) => arr.data_type().to_owned(),
1963            ScalarValue::Struct(arr) => arr.data_type().to_owned(),
1964            ScalarValue::Map(arr) => arr.data_type().to_owned(),
1965            ScalarValue::Date32(_) => DataType::Date32,
1966            ScalarValue::Date64(_) => DataType::Date64,
1967            ScalarValue::Time32Second(_) => DataType::Time32(TimeUnit::Second),
1968            ScalarValue::Time32Millisecond(_) => DataType::Time32(TimeUnit::Millisecond),
1969            ScalarValue::Time64Microsecond(_) => DataType::Time64(TimeUnit::Microsecond),
1970            ScalarValue::Time64Nanosecond(_) => DataType::Time64(TimeUnit::Nanosecond),
1971            ScalarValue::IntervalYearMonth(_) => {
1972                DataType::Interval(IntervalUnit::YearMonth)
1973            }
1974            ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
1975            ScalarValue::IntervalMonthDayNano(_) => {
1976                DataType::Interval(IntervalUnit::MonthDayNano)
1977            }
1978            ScalarValue::DurationSecond(_) => DataType::Duration(TimeUnit::Second),
1979            ScalarValue::DurationMillisecond(_) => {
1980                DataType::Duration(TimeUnit::Millisecond)
1981            }
1982            ScalarValue::DurationMicrosecond(_) => {
1983                DataType::Duration(TimeUnit::Microsecond)
1984            }
1985            ScalarValue::DurationNanosecond(_) => {
1986                DataType::Duration(TimeUnit::Nanosecond)
1987            }
1988            ScalarValue::Union(_, fields, mode) => DataType::Union(fields.clone(), *mode),
1989            ScalarValue::Dictionary(k, v) => {
1990                DataType::Dictionary(k.clone(), Box::new(v.data_type()))
1991            }
1992            ScalarValue::RunEndEncoded(run_ends_field, value_field, _) => {
1993                DataType::RunEndEncoded(
1994                    Arc::clone(run_ends_field),
1995                    Arc::clone(value_field),
1996                )
1997            }
1998            ScalarValue::Null => DataType::Null,
1999        }
2000    }
2001
2002    /// Calculate arithmetic negation for a scalar value
2003    pub fn arithmetic_negate(&self) -> Result<Self> {
2004        fn neg_checked_with_ctx<T: ArrowNativeTypeOp>(
2005            v: T,
2006            ctx: impl Fn() -> String,
2007        ) -> Result<T> {
2008            v.neg_checked()
2009                .map_err(|e| arrow_datafusion_err!(e).context(ctx()))
2010        }
2011        match self {
2012            ScalarValue::Int8(None)
2013            | ScalarValue::Int16(None)
2014            | ScalarValue::Int32(None)
2015            | ScalarValue::Int64(None)
2016            | ScalarValue::Float16(None)
2017            | ScalarValue::Float32(None)
2018            | ScalarValue::Float64(None) => Ok(self.clone()),
2019            ScalarValue::Float16(Some(v)) => Ok(ScalarValue::Float16(Some(-v))),
2020            ScalarValue::Float64(Some(v)) => Ok(ScalarValue::Float64(Some(-v))),
2021            ScalarValue::Float32(Some(v)) => Ok(ScalarValue::Float32(Some(-v))),
2022            ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(v.neg_checked()?))),
2023            ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(v.neg_checked()?))),
2024            ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(v.neg_checked()?))),
2025            ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(v.neg_checked()?))),
2026            ScalarValue::IntervalYearMonth(Some(v)) => Ok(
2027                ScalarValue::IntervalYearMonth(Some(neg_checked_with_ctx(*v, || {
2028                    format!("In negation of IntervalYearMonth({v})")
2029                })?)),
2030            ),
2031            ScalarValue::IntervalDayTime(Some(v)) => {
2032                let (days, ms) = IntervalDayTimeType::to_parts(*v);
2033                let val = IntervalDayTimeType::make_value(
2034                    neg_checked_with_ctx(days, || {
2035                        format!("In negation of days {days} in IntervalDayTime")
2036                    })?,
2037                    neg_checked_with_ctx(ms, || {
2038                        format!("In negation of milliseconds {ms} in IntervalDayTime")
2039                    })?,
2040                );
2041                Ok(ScalarValue::IntervalDayTime(Some(val)))
2042            }
2043            ScalarValue::IntervalMonthDayNano(Some(v)) => {
2044                let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
2045                let val = IntervalMonthDayNanoType::make_value(
2046                    neg_checked_with_ctx(months, || {
2047                        format!("In negation of months {months} of IntervalMonthDayNano")
2048                    })?,
2049                    neg_checked_with_ctx(days, || {
2050                        format!("In negation of days {days} of IntervalMonthDayNano")
2051                    })?,
2052                    neg_checked_with_ctx(nanos, || {
2053                        format!("In negation of nanos {nanos} of IntervalMonthDayNano")
2054                    })?,
2055                );
2056                Ok(ScalarValue::IntervalMonthDayNano(Some(val)))
2057            }
2058            ScalarValue::Decimal32(Some(v), precision, scale) => {
2059                Ok(ScalarValue::Decimal32(
2060                    Some(neg_checked_with_ctx(*v, || {
2061                        format!("In negation of Decimal32({v}, {precision}, {scale})")
2062                    })?),
2063                    *precision,
2064                    *scale,
2065                ))
2066            }
2067            ScalarValue::Decimal64(Some(v), precision, scale) => {
2068                Ok(ScalarValue::Decimal64(
2069                    Some(neg_checked_with_ctx(*v, || {
2070                        format!("In negation of Decimal64({v}, {precision}, {scale})")
2071                    })?),
2072                    *precision,
2073                    *scale,
2074                ))
2075            }
2076            ScalarValue::Decimal128(Some(v), precision, scale) => {
2077                Ok(ScalarValue::Decimal128(
2078                    Some(neg_checked_with_ctx(*v, || {
2079                        format!("In negation of Decimal128({v}, {precision}, {scale})")
2080                    })?),
2081                    *precision,
2082                    *scale,
2083                ))
2084            }
2085            ScalarValue::Decimal256(Some(v), precision, scale) => {
2086                Ok(ScalarValue::Decimal256(
2087                    Some(neg_checked_with_ctx(*v, || {
2088                        format!("In negation of Decimal256({v}, {precision}, {scale})")
2089                    })?),
2090                    *precision,
2091                    *scale,
2092                ))
2093            }
2094            ScalarValue::TimestampSecond(Some(v), tz) => {
2095                Ok(ScalarValue::TimestampSecond(
2096                    Some(neg_checked_with_ctx(*v, || {
2097                        format!("In negation of TimestampSecond({v})")
2098                    })?),
2099                    tz.clone(),
2100                ))
2101            }
2102            ScalarValue::TimestampNanosecond(Some(v), tz) => {
2103                Ok(ScalarValue::TimestampNanosecond(
2104                    Some(neg_checked_with_ctx(*v, || {
2105                        format!("In negation of TimestampNanoSecond({v})")
2106                    })?),
2107                    tz.clone(),
2108                ))
2109            }
2110            ScalarValue::TimestampMicrosecond(Some(v), tz) => {
2111                Ok(ScalarValue::TimestampMicrosecond(
2112                    Some(neg_checked_with_ctx(*v, || {
2113                        format!("In negation of TimestampMicroSecond({v})")
2114                    })?),
2115                    tz.clone(),
2116                ))
2117            }
2118            ScalarValue::TimestampMillisecond(Some(v), tz) => {
2119                Ok(ScalarValue::TimestampMillisecond(
2120                    Some(neg_checked_with_ctx(*v, || {
2121                        format!("In negation of TimestampMilliSecond({v})")
2122                    })?),
2123                    tz.clone(),
2124                ))
2125            }
2126            value => _internal_err!(
2127                "Can not run arithmetic negative on scalar value {value:?}"
2128            ),
2129        }
2130    }
2131
2132    /// Wrapping addition of `ScalarValue`
2133    ///
2134    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2135    /// should operate on Arrays directly, using vectorized array kernels
2136    pub fn add<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2137        let r = add_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2138        Self::try_from_array(r.as_ref(), 0)
2139    }
2140
2141    /// Checked addition of `ScalarValue`
2142    ///
2143    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2144    /// should operate on Arrays directly, using vectorized array kernels
2145    pub fn add_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2146        let r = add(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2147        Self::try_from_array(r.as_ref(), 0)
2148    }
2149
2150    /// Wrapping subtraction of `ScalarValue`
2151    ///
2152    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2153    /// should operate on Arrays directly, using vectorized array kernels
2154    pub fn sub<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2155        let r = sub_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2156        Self::try_from_array(r.as_ref(), 0)
2157    }
2158
2159    /// Checked subtraction of `ScalarValue`
2160    ///
2161    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2162    /// should operate on Arrays directly, using vectorized array kernels
2163    pub fn sub_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2164        let r = sub(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2165        Self::try_from_array(r.as_ref(), 0)
2166    }
2167
2168    /// Wrapping multiplication of `ScalarValue`
2169    ///
2170    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2171    /// should operate on Arrays directly, using vectorized array kernels.
2172    pub fn mul<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2173        let r = mul_wrapping(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2174        Self::try_from_array(r.as_ref(), 0)
2175    }
2176
2177    /// Checked multiplication of `ScalarValue`
2178    ///
2179    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2180    /// should operate on Arrays directly, using vectorized array kernels.
2181    pub fn mul_checked<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2182        let r = mul(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2183        Self::try_from_array(r.as_ref(), 0)
2184    }
2185
2186    /// Performs `lhs / rhs`
2187    ///
2188    /// Overflow or division by zero will result in an error, with exception to
2189    /// floating point numbers, which instead follow the IEEE 754 rules.
2190    ///
2191    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2192    /// should operate on Arrays directly, using vectorized array kernels.
2193    pub fn div<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2194        let r = div(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2195        Self::try_from_array(r.as_ref(), 0)
2196    }
2197
2198    /// Performs `lhs % rhs`
2199    ///
2200    /// Overflow or division by zero will result in an error, with exception to
2201    /// floating point numbers, which instead follow the IEEE 754 rules.
2202    ///
2203    /// NB: operating on `ScalarValue` directly is not efficient, performance sensitive code
2204    /// should operate on Arrays directly, using vectorized array kernels.
2205    pub fn rem<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
2206        let r = rem(&self.to_scalar()?, &other.borrow().to_scalar()?)?;
2207        Self::try_from_array(r.as_ref(), 0)
2208    }
2209
2210    pub fn is_unsigned(&self) -> bool {
2211        matches!(
2212            self,
2213            ScalarValue::UInt8(_)
2214                | ScalarValue::UInt16(_)
2215                | ScalarValue::UInt32(_)
2216                | ScalarValue::UInt64(_)
2217        )
2218    }
2219
2220    /// whether this value is null or not.
2221    pub fn is_null(&self) -> bool {
2222        match self {
2223            ScalarValue::Boolean(v) => v.is_none(),
2224            ScalarValue::Null => true,
2225            ScalarValue::Float16(v) => v.is_none(),
2226            ScalarValue::Float32(v) => v.is_none(),
2227            ScalarValue::Float64(v) => v.is_none(),
2228            ScalarValue::Decimal32(v, _, _) => v.is_none(),
2229            ScalarValue::Decimal64(v, _, _) => v.is_none(),
2230            ScalarValue::Decimal128(v, _, _) => v.is_none(),
2231            ScalarValue::Decimal256(v, _, _) => v.is_none(),
2232            ScalarValue::Int8(v) => v.is_none(),
2233            ScalarValue::Int16(v) => v.is_none(),
2234            ScalarValue::Int32(v) => v.is_none(),
2235            ScalarValue::Int64(v) => v.is_none(),
2236            ScalarValue::UInt8(v) => v.is_none(),
2237            ScalarValue::UInt16(v) => v.is_none(),
2238            ScalarValue::UInt32(v) => v.is_none(),
2239            ScalarValue::UInt64(v) => v.is_none(),
2240            ScalarValue::Utf8(v)
2241            | ScalarValue::Utf8View(v)
2242            | ScalarValue::LargeUtf8(v) => v.is_none(),
2243            ScalarValue::Binary(v)
2244            | ScalarValue::BinaryView(v)
2245            | ScalarValue::FixedSizeBinary(_, v)
2246            | ScalarValue::LargeBinary(v) => v.is_none(),
2247            // arr.len() should be 1 for a list scalar, but we don't seem to
2248            // enforce that anywhere, so we still check against array length.
2249            ScalarValue::List(arr) => arr.len() == arr.null_count(),
2250            ScalarValue::LargeList(arr) => arr.len() == arr.null_count(),
2251            ScalarValue::FixedSizeList(arr) => arr.len() == arr.null_count(),
2252            ScalarValue::Struct(arr) => arr.len() == arr.null_count(),
2253            ScalarValue::Map(arr) => arr.len() == arr.null_count(),
2254            ScalarValue::Date32(v) => v.is_none(),
2255            ScalarValue::Date64(v) => v.is_none(),
2256            ScalarValue::Time32Second(v) => v.is_none(),
2257            ScalarValue::Time32Millisecond(v) => v.is_none(),
2258            ScalarValue::Time64Microsecond(v) => v.is_none(),
2259            ScalarValue::Time64Nanosecond(v) => v.is_none(),
2260            ScalarValue::TimestampSecond(v, _) => v.is_none(),
2261            ScalarValue::TimestampMillisecond(v, _) => v.is_none(),
2262            ScalarValue::TimestampMicrosecond(v, _) => v.is_none(),
2263            ScalarValue::TimestampNanosecond(v, _) => v.is_none(),
2264            ScalarValue::IntervalYearMonth(v) => v.is_none(),
2265            ScalarValue::IntervalDayTime(v) => v.is_none(),
2266            ScalarValue::IntervalMonthDayNano(v) => v.is_none(),
2267            ScalarValue::DurationSecond(v) => v.is_none(),
2268            ScalarValue::DurationMillisecond(v) => v.is_none(),
2269            ScalarValue::DurationMicrosecond(v) => v.is_none(),
2270            ScalarValue::DurationNanosecond(v) => v.is_none(),
2271            ScalarValue::Union(v, _, _) => match v {
2272                Some((_, s)) => s.is_null(),
2273                None => true,
2274            },
2275            ScalarValue::Dictionary(_, v) => v.is_null(),
2276            ScalarValue::RunEndEncoded(_, _, v) => v.is_null(),
2277        }
2278    }
2279
2280    /// Absolute distance between two numeric values (of the same type). This method will return
2281    /// None if either one of the arguments are null. It might also return None if the resulting
2282    /// distance is greater than [`usize::MAX`]. If the type is a float, then the distance will be
2283    /// rounded to the nearest integer.
2284    ///
2285    ///
2286    /// Note: the datatype itself must support subtraction.
2287    pub fn distance(&self, other: &ScalarValue) -> Option<usize> {
2288        match (self, other) {
2289            (Self::Int8(Some(l)), Self::Int8(Some(r))) => Some(l.abs_diff(*r) as _),
2290            (Self::Int16(Some(l)), Self::Int16(Some(r))) => Some(l.abs_diff(*r) as _),
2291            (Self::Int32(Some(l)), Self::Int32(Some(r))) => Some(l.abs_diff(*r) as _),
2292            (Self::Int64(Some(l)), Self::Int64(Some(r))) => Some(l.abs_diff(*r) as _),
2293            (Self::UInt8(Some(l)), Self::UInt8(Some(r))) => Some(l.abs_diff(*r) as _),
2294            (Self::UInt16(Some(l)), Self::UInt16(Some(r))) => Some(l.abs_diff(*r) as _),
2295            (Self::UInt32(Some(l)), Self::UInt32(Some(r))) => Some(l.abs_diff(*r) as _),
2296            (Self::UInt64(Some(l)), Self::UInt64(Some(r))) => Some(l.abs_diff(*r) as _),
2297            // TODO: we might want to look into supporting ceil/floor here for floats.
2298            (Self::Float16(Some(l)), Self::Float16(Some(r))) => {
2299                Some((f16::to_f32(*l) - f16::to_f32(*r)).abs().round() as _)
2300            }
2301            (Self::Float32(Some(l)), Self::Float32(Some(r))) => {
2302                Some((l - r).abs().round() as _)
2303            }
2304            (Self::Float64(Some(l)), Self::Float64(Some(r))) => {
2305                Some((l - r).abs().round() as _)
2306            }
2307            (
2308                Self::Decimal128(Some(l), lprecision, lscale),
2309                Self::Decimal128(Some(r), rprecision, rscale),
2310            ) => {
2311                if lprecision == rprecision && lscale == rscale {
2312                    l.checked_sub(*r)?.checked_abs()?.to_usize()
2313                } else {
2314                    None
2315                }
2316            }
2317            (
2318                Self::Decimal256(Some(l), lprecision, lscale),
2319                Self::Decimal256(Some(r), rprecision, rscale),
2320            ) => {
2321                if lprecision == rprecision && lscale == rscale {
2322                    l.checked_sub(*r)?.checked_abs()?.to_usize()
2323                } else {
2324                    None
2325                }
2326            }
2327            _ => None,
2328        }
2329    }
2330
2331    /// Converts a scalar value into an 1-row array.
2332    ///
2333    /// # Errors
2334    ///
2335    /// Errors if the ScalarValue cannot be converted into a 1-row array
2336    pub fn to_array(&self) -> Result<ArrayRef> {
2337        self.to_array_of_size(1)
2338    }
2339
2340    /// Converts a scalar into an arrow [`Scalar`] (which implements
2341    /// the [`Datum`] interface).
2342    ///
2343    /// This can be used to call arrow compute kernels such as `lt`
2344    ///
2345    /// # Errors
2346    ///
2347    /// Errors if the ScalarValue cannot be converted into a 1-row array
2348    ///
2349    /// # Example
2350    /// ```
2351    /// use arrow::array::{BooleanArray, Int32Array};
2352    /// use datafusion_common::ScalarValue;
2353    ///
2354    /// let arr = Int32Array::from(vec![Some(1), None, Some(10)]);
2355    /// let five = ScalarValue::Int32(Some(5));
2356    ///
2357    /// let result =
2358    ///     arrow::compute::kernels::cmp::lt(&arr, &five.to_scalar().unwrap()).unwrap();
2359    ///
2360    /// let expected = BooleanArray::from(vec![Some(true), None, Some(false)]);
2361    ///
2362    /// assert_eq!(&result, &expected);
2363    /// ```
2364    /// [`Datum`]: arrow::array::Datum
2365    pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
2366        Ok(Scalar::new(self.to_array_of_size(1)?))
2367    }
2368
2369    /// Converts an iterator of references [`ScalarValue`] into an [`ArrayRef`]
2370    /// corresponding to those values. For example, an iterator of
2371    /// [`ScalarValue::Int32`] would be converted to an [`Int32Array`].
2372    ///
2373    /// Returns an error if the iterator is empty or if the
2374    /// [`ScalarValue`]s are not all the same type
2375    ///
2376    /// # Example
2377    /// ```
2378    /// use arrow::array::{ArrayRef, BooleanArray};
2379    /// use datafusion_common::ScalarValue;
2380    ///
2381    /// let scalars = vec![
2382    ///     ScalarValue::Boolean(Some(true)),
2383    ///     ScalarValue::Boolean(None),
2384    ///     ScalarValue::Boolean(Some(false)),
2385    /// ];
2386    ///
2387    /// // Build an Array from the list of ScalarValues
2388    /// let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
2389    ///
2390    /// let expected: ArrayRef =
2391    ///     std::sync::Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)]));
2392    ///
2393    /// assert_eq!(&array, &expected);
2394    /// ```
2395    pub fn iter_to_array(
2396        scalars: impl IntoIterator<Item = ScalarValue>,
2397    ) -> Result<ArrayRef> {
2398        let mut scalars = scalars.into_iter().peekable();
2399
2400        // figure out the type based on the first element
2401        let data_type = match scalars.peek() {
2402            None => {
2403                return _exec_err!("Empty iterator passed to ScalarValue::iter_to_array");
2404            }
2405            Some(sv) => sv.data_type(),
2406        };
2407
2408        /// Creates an array of $ARRAY_TY by unpacking values of
2409        /// SCALAR_TY for primitive types
2410        macro_rules! build_array_primitive {
2411            ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
2412                {
2413                    let array = scalars
2414                        .map(|sv| {
2415                            if let ScalarValue::$SCALAR_TY(v) = sv {
2416                                Ok(v)
2417                            } else {
2418                                _exec_err!(
2419                                    "Inconsistent types in ScalarValue::iter_to_array. \
2420                                    Expected {:?}, got {:?}",
2421                                    data_type,
2422                                    sv
2423                                )
2424                            }
2425                        })
2426                        .collect::<Result<$ARRAY_TY>>()?;
2427                    Arc::new(array)
2428                }
2429            }};
2430        }
2431
2432        macro_rules! build_array_primitive_tz {
2433            ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
2434                {
2435                    let array = scalars
2436                        .map(|sv| {
2437                            if let ScalarValue::$SCALAR_TY(v, _) = sv {
2438                                Ok(v)
2439                            } else {
2440                                _exec_err!(
2441                                    "Inconsistent types in ScalarValue::iter_to_array. \
2442                                    Expected {:?}, got {:?}",
2443                                    data_type,
2444                                    sv
2445                                )
2446                            }
2447                        })
2448                        .collect::<Result<$ARRAY_TY>>()?;
2449                    Arc::new(array.with_timezone_opt($TZ.clone()))
2450                }
2451            }};
2452        }
2453
2454        /// Creates an array of $ARRAY_TY by unpacking values of
2455        /// SCALAR_TY for "string-like" types.
2456        macro_rules! build_array_string {
2457            ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{
2458                {
2459                    let array = scalars
2460                        .map(|sv| {
2461                            if let ScalarValue::$SCALAR_TY(v) = sv {
2462                                Ok(v)
2463                            } else {
2464                                _exec_err!(
2465                                    "Inconsistent types in ScalarValue::iter_to_array. \
2466                                    Expected {:?}, got {:?}",
2467                                    data_type,
2468                                    sv
2469                                )
2470                            }
2471                        })
2472                        .collect::<Result<$ARRAY_TY>>()?;
2473                    Arc::new(array)
2474                }
2475            }};
2476        }
2477
2478        let array: ArrayRef = match &data_type {
2479            DataType::Decimal32(precision, scale) => {
2480                let decimal_array =
2481                    ScalarValue::iter_to_decimal32_array(scalars, *precision, *scale)?;
2482                Arc::new(decimal_array)
2483            }
2484            DataType::Decimal64(precision, scale) => {
2485                let decimal_array =
2486                    ScalarValue::iter_to_decimal64_array(scalars, *precision, *scale)?;
2487                Arc::new(decimal_array)
2488            }
2489            DataType::Decimal128(precision, scale) => {
2490                let decimal_array =
2491                    ScalarValue::iter_to_decimal128_array(scalars, *precision, *scale)?;
2492                Arc::new(decimal_array)
2493            }
2494            DataType::Decimal256(precision, scale) => {
2495                let decimal_array =
2496                    ScalarValue::iter_to_decimal256_array(scalars, *precision, *scale)?;
2497                Arc::new(decimal_array)
2498            }
2499            DataType::Null => ScalarValue::iter_to_null_array(scalars)?,
2500            DataType::Boolean => build_array_primitive!(BooleanArray, Boolean),
2501            DataType::Float16 => build_array_primitive!(Float16Array, Float16),
2502            DataType::Float32 => build_array_primitive!(Float32Array, Float32),
2503            DataType::Float64 => build_array_primitive!(Float64Array, Float64),
2504            DataType::Int8 => build_array_primitive!(Int8Array, Int8),
2505            DataType::Int16 => build_array_primitive!(Int16Array, Int16),
2506            DataType::Int32 => build_array_primitive!(Int32Array, Int32),
2507            DataType::Int64 => build_array_primitive!(Int64Array, Int64),
2508            DataType::UInt8 => build_array_primitive!(UInt8Array, UInt8),
2509            DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
2510            DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
2511            DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
2512            DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
2513            DataType::Utf8 => build_array_string!(StringArray, Utf8),
2514            DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
2515            DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView),
2516            DataType::Binary => build_array_string!(BinaryArray, Binary),
2517            DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
2518            DataType::Date32 => build_array_primitive!(Date32Array, Date32),
2519            DataType::Date64 => build_array_primitive!(Date64Array, Date64),
2520            DataType::Time32(TimeUnit::Second) => {
2521                build_array_primitive!(Time32SecondArray, Time32Second)
2522            }
2523            DataType::Time32(TimeUnit::Millisecond) => {
2524                build_array_primitive!(Time32MillisecondArray, Time32Millisecond)
2525            }
2526            DataType::Time64(TimeUnit::Microsecond) => {
2527                build_array_primitive!(Time64MicrosecondArray, Time64Microsecond)
2528            }
2529            DataType::Time64(TimeUnit::Nanosecond) => {
2530                build_array_primitive!(Time64NanosecondArray, Time64Nanosecond)
2531            }
2532            DataType::Timestamp(TimeUnit::Second, tz) => {
2533                build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz)
2534            }
2535            DataType::Timestamp(TimeUnit::Millisecond, tz) => {
2536                build_array_primitive_tz!(
2537                    TimestampMillisecondArray,
2538                    TimestampMillisecond,
2539                    tz
2540                )
2541            }
2542            DataType::Timestamp(TimeUnit::Microsecond, tz) => {
2543                build_array_primitive_tz!(
2544                    TimestampMicrosecondArray,
2545                    TimestampMicrosecond,
2546                    tz
2547                )
2548            }
2549            DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
2550                build_array_primitive_tz!(
2551                    TimestampNanosecondArray,
2552                    TimestampNanosecond,
2553                    tz
2554                )
2555            }
2556            DataType::Duration(TimeUnit::Second) => {
2557                build_array_primitive!(DurationSecondArray, DurationSecond)
2558            }
2559            DataType::Duration(TimeUnit::Millisecond) => {
2560                build_array_primitive!(DurationMillisecondArray, DurationMillisecond)
2561            }
2562            DataType::Duration(TimeUnit::Microsecond) => {
2563                build_array_primitive!(DurationMicrosecondArray, DurationMicrosecond)
2564            }
2565            DataType::Duration(TimeUnit::Nanosecond) => {
2566                build_array_primitive!(DurationNanosecondArray, DurationNanosecond)
2567            }
2568            DataType::Interval(IntervalUnit::DayTime) => {
2569                build_array_primitive!(IntervalDayTimeArray, IntervalDayTime)
2570            }
2571            DataType::Interval(IntervalUnit::YearMonth) => {
2572                build_array_primitive!(IntervalYearMonthArray, IntervalYearMonth)
2573            }
2574            DataType::Interval(IntervalUnit::MonthDayNano) => {
2575                build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano)
2576            }
2577            DataType::FixedSizeList(_, _) => {
2578                // arrow::compute::concat does not allow inconsistent types including the size of FixedSizeList.
2579                // The length of nulls here we got is 1, so we need to resize the length of nulls to
2580                // the length of non-nulls.
2581                let mut arrays =
2582                    scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2583                let first_non_null_data_type = arrays
2584                    .iter()
2585                    .find(|sv| !sv.is_null(0))
2586                    .map(|sv| sv.data_type().to_owned());
2587                if let Some(DataType::FixedSizeList(f, l)) = first_non_null_data_type {
2588                    for array in arrays.iter_mut() {
2589                        if array.is_null(0) {
2590                            *array = Arc::new(FixedSizeListArray::new_null(
2591                                Arc::clone(&f),
2592                                l,
2593                                1,
2594                            ));
2595                        }
2596                    }
2597                }
2598                let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2599                arrow::compute::concat(arrays.as_slice())?
2600            }
2601            DataType::List(_)
2602            | DataType::LargeList(_)
2603            | DataType::Map(_, _)
2604            | DataType::Struct(_)
2605            | DataType::Union(_, _) => {
2606                let arrays = scalars.map(|s| s.to_array()).collect::<Result<Vec<_>>>()?;
2607                let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
2608                arrow::compute::concat(arrays.as_slice())?
2609            }
2610            DataType::Dictionary(key_type, value_type) => {
2611                // create the values array
2612                let value_scalars = scalars
2613                    .map(|scalar| match scalar {
2614                        ScalarValue::Dictionary(inner_key_type, scalar) => {
2615                            if &inner_key_type == key_type {
2616                                Ok(*scalar)
2617                            } else {
2618                                _exec_err!("Expected inner key type of {key_type} but found: {inner_key_type}, value was ({scalar:?})")
2619                            }
2620                        }
2621                        _ => {
2622                            _exec_err!(
2623                                "Expected scalar of type {value_type} but found: {scalar} {scalar:?}"
2624                            )
2625                        }
2626                    })
2627                    .collect::<Result<Vec<_>>>()?;
2628
2629                let values = Self::iter_to_array(value_scalars)?;
2630                assert_eq!(values.data_type(), value_type.as_ref());
2631
2632                match key_type.as_ref() {
2633                    DataType::Int8 => dict_from_values::<Int8Type>(values)?,
2634                    DataType::Int16 => dict_from_values::<Int16Type>(values)?,
2635                    DataType::Int32 => dict_from_values::<Int32Type>(values)?,
2636                    DataType::Int64 => dict_from_values::<Int64Type>(values)?,
2637                    DataType::UInt8 => dict_from_values::<UInt8Type>(values)?,
2638                    DataType::UInt16 => dict_from_values::<UInt16Type>(values)?,
2639                    DataType::UInt32 => dict_from_values::<UInt32Type>(values)?,
2640                    DataType::UInt64 => dict_from_values::<UInt64Type>(values)?,
2641                    _ => unreachable!("Invalid dictionary keys type: {}", key_type),
2642                }
2643            }
2644            DataType::RunEndEncoded(run_ends_field, value_field) => {
2645                fn make_run_array<R: RunEndIndexType>(
2646                    scalars: impl IntoIterator<Item = ScalarValue>,
2647                    run_ends_field: &FieldRef,
2648                    values_field: &FieldRef,
2649                ) -> Result<ArrayRef> {
2650                    let mut scalars = scalars.into_iter();
2651
2652                    let mut run_ends = vec![];
2653                    let mut value_scalars = vec![];
2654
2655                    let mut len = R::Native::ONE;
2656                    let mut current =
2657                        if let Some(ScalarValue::RunEndEncoded(_, _, scalar)) =
2658                            scalars.next()
2659                        {
2660                            *scalar
2661                        } else {
2662                            // We are guaranteed to have one element of correct
2663                            // type because we peeked above
2664                            unreachable!()
2665                        };
2666                    for scalar in scalars {
2667                        let scalar = match scalar {
2668                            ScalarValue::RunEndEncoded(
2669                                inner_run_ends_field,
2670                                inner_value_field,
2671                                scalar,
2672                            ) if &inner_run_ends_field == run_ends_field
2673                                && &inner_value_field == values_field =>
2674                            {
2675                                *scalar
2676                            }
2677                            _ => {
2678                                return _exec_err!(
2679                                    "Expected RunEndEncoded scalar with run-ends field {run_ends_field} but got: {scalar:?}"
2680                                );
2681                            }
2682                        };
2683
2684                        // new run
2685                        if scalar != current {
2686                            run_ends.push(len);
2687                            value_scalars.push(current);
2688                            current = scalar;
2689                        }
2690
2691                        len = len.add_checked(R::Native::ONE).map_err(|_| {
2692                            DataFusionError::Execution(format!(
2693                                "Cannot construct RunArray: Overflows run-ends type {}",
2694                                run_ends_field.data_type()
2695                            ))
2696                        })?;
2697                    }
2698
2699                    run_ends.push(len);
2700                    value_scalars.push(current);
2701
2702                    let run_ends = PrimitiveArray::<R>::from_iter_values(run_ends);
2703                    let values = ScalarValue::iter_to_array(value_scalars)?;
2704
2705                    // Using ArrayDataBuilder so we can maintain the fields
2706                    let dt = DataType::RunEndEncoded(
2707                        Arc::clone(run_ends_field),
2708                        Arc::clone(values_field),
2709                    );
2710                    let builder = ArrayDataBuilder::new(dt)
2711                        .len(RunArray::logical_len(&run_ends))
2712                        .add_child_data(run_ends.to_data())
2713                        .add_child_data(values.to_data());
2714                    let run_array = RunArray::<R>::from(builder.build()?);
2715
2716                    Ok(Arc::new(run_array))
2717                }
2718
2719                match run_ends_field.data_type() {
2720                    DataType::Int16 => {
2721                        make_run_array::<Int16Type>(scalars, run_ends_field, value_field)?
2722                    }
2723                    DataType::Int32 => {
2724                        make_run_array::<Int32Type>(scalars, run_ends_field, value_field)?
2725                    }
2726                    DataType::Int64 => {
2727                        make_run_array::<Int64Type>(scalars, run_ends_field, value_field)?
2728                    }
2729                    dt => unreachable!("Invalid run-ends type: {dt}"),
2730                }
2731            }
2732            DataType::FixedSizeBinary(size) => {
2733                let array = scalars
2734                    .map(|sv| {
2735                        if let ScalarValue::FixedSizeBinary(_, v) = sv {
2736                            Ok(v)
2737                        } else {
2738                            _exec_err!(
2739                                "Inconsistent types in ScalarValue::iter_to_array. \
2740                                Expected {data_type}, got {sv:?}"
2741                            )
2742                        }
2743                    })
2744                    .collect::<Result<Vec<_>>>()?;
2745                let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
2746                    array.into_iter(),
2747                    *size,
2748                )?;
2749                Arc::new(array)
2750            }
2751            // explicitly enumerate unsupported types so newly added
2752            // types must be acknowledged, Time32 and Time64 types are
2753            // not supported if the TimeUnit is not valid (Time32 can
2754            // only be used with Second and Millisecond, Time64 only
2755            // with Microsecond and Nanosecond)
2756            DataType::Time32(TimeUnit::Microsecond)
2757            | DataType::Time32(TimeUnit::Nanosecond)
2758            | DataType::Time64(TimeUnit::Second)
2759            | DataType::Time64(TimeUnit::Millisecond)
2760            | DataType::ListView(_)
2761            | DataType::LargeListView(_) => {
2762                return _not_impl_err!(
2763                    "Unsupported creation of {:?} array from ScalarValue {:?}",
2764                    data_type,
2765                    scalars.peek()
2766                );
2767            }
2768        };
2769        Ok(array)
2770    }
2771
2772    fn iter_to_null_array(
2773        scalars: impl IntoIterator<Item = ScalarValue>,
2774    ) -> Result<ArrayRef> {
2775        let length = scalars.into_iter().try_fold(
2776            0usize,
2777            |r, element: ScalarValue| match element {
2778                ScalarValue::Null => Ok::<usize, DataFusionError>(r + 1),
2779                s => {
2780                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2781                }
2782            },
2783        )?;
2784        Ok(new_null_array(&DataType::Null, length))
2785    }
2786
2787    fn iter_to_decimal32_array(
2788        scalars: impl IntoIterator<Item = ScalarValue>,
2789        precision: u8,
2790        scale: i8,
2791    ) -> Result<Decimal32Array> {
2792        let array = scalars
2793            .into_iter()
2794            .map(|element: ScalarValue| match element {
2795                ScalarValue::Decimal32(v1, _, _) => Ok(v1),
2796                s => {
2797                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2798                }
2799            })
2800            .collect::<Result<Decimal32Array>>()?
2801            .with_precision_and_scale(precision, scale)?;
2802        Ok(array)
2803    }
2804
2805    fn iter_to_decimal64_array(
2806        scalars: impl IntoIterator<Item = ScalarValue>,
2807        precision: u8,
2808        scale: i8,
2809    ) -> Result<Decimal64Array> {
2810        let array = scalars
2811            .into_iter()
2812            .map(|element: ScalarValue| match element {
2813                ScalarValue::Decimal64(v1, _, _) => Ok(v1),
2814                s => {
2815                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2816                }
2817            })
2818            .collect::<Result<Decimal64Array>>()?
2819            .with_precision_and_scale(precision, scale)?;
2820        Ok(array)
2821    }
2822
2823    fn iter_to_decimal128_array(
2824        scalars: impl IntoIterator<Item = ScalarValue>,
2825        precision: u8,
2826        scale: i8,
2827    ) -> Result<Decimal128Array> {
2828        let array = scalars
2829            .into_iter()
2830            .map(|element: ScalarValue| match element {
2831                ScalarValue::Decimal128(v1, _, _) => Ok(v1),
2832                s => {
2833                    _internal_err!("Expected ScalarValue::Null element. Received {s:?}")
2834                }
2835            })
2836            .collect::<Result<Decimal128Array>>()?
2837            .with_precision_and_scale(precision, scale)?;
2838        Ok(array)
2839    }
2840
2841    fn iter_to_decimal256_array(
2842        scalars: impl IntoIterator<Item = ScalarValue>,
2843        precision: u8,
2844        scale: i8,
2845    ) -> Result<Decimal256Array> {
2846        let array = scalars
2847            .into_iter()
2848            .map(|element: ScalarValue| match element {
2849                ScalarValue::Decimal256(v1, _, _) => Ok(v1),
2850                s => {
2851                    _internal_err!(
2852                        "Expected ScalarValue::Decimal256 element. Received {s:?}"
2853                    )
2854                }
2855            })
2856            .collect::<Result<Decimal256Array>>()?
2857            .with_precision_and_scale(precision, scale)?;
2858        Ok(array)
2859    }
2860
2861    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
2862    /// `data_type`, to a single element [`ListArray`].
2863    ///
2864    /// Example
2865    /// ```
2866    /// use arrow::array::{Int32Array, ListArray};
2867    /// use arrow::datatypes::{DataType, Int32Type};
2868    /// use datafusion_common::cast::as_list_array;
2869    /// use datafusion_common::ScalarValue;
2870    ///
2871    /// let scalars = vec![
2872    ///     ScalarValue::Int32(Some(1)),
2873    ///     ScalarValue::Int32(None),
2874    ///     ScalarValue::Int32(Some(2)),
2875    /// ];
2876    ///
2877    /// let result = ScalarValue::new_list(&scalars, &DataType::Int32, true);
2878    ///
2879    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
2880    ///     Some(1),
2881    ///     None,
2882    ///     Some(2),
2883    /// ])]);
2884    ///
2885    /// assert_eq!(*result, expected);
2886    /// ```
2887    pub fn new_list(
2888        values: &[ScalarValue],
2889        data_type: &DataType,
2890        nullable: bool,
2891    ) -> Arc<ListArray> {
2892        let values = if values.is_empty() {
2893            new_empty_array(data_type)
2894        } else {
2895            Self::iter_to_array(values.iter().cloned()).unwrap()
2896        };
2897        Arc::new(
2898            SingleRowListArrayBuilder::new(values)
2899                .with_nullable(nullable)
2900                .build_list_array(),
2901        )
2902    }
2903
2904    /// Same as [`ScalarValue::new_list`] but with nullable set to true.
2905    pub fn new_list_nullable(
2906        values: &[ScalarValue],
2907        data_type: &DataType,
2908    ) -> Arc<ListArray> {
2909        Self::new_list(values, data_type, true)
2910    }
2911
2912    /// Create ListArray with Null with specific data type
2913    ///
2914    /// - new_null_list(i32, nullable, 1): `ListArray[NULL]`
2915    pub fn new_null_list(data_type: DataType, nullable: bool, null_len: usize) -> Self {
2916        let data_type = DataType::List(Field::new_list_field(data_type, nullable).into());
2917        Self::List(Arc::new(ListArray::from(ArrayData::new_null(
2918            &data_type, null_len,
2919        ))))
2920    }
2921
2922    /// Converts `IntoIterator<Item = ScalarValue>` where each element has type corresponding to
2923    /// `data_type`, to a [`ListArray`].
2924    ///
2925    /// Example
2926    /// ```
2927    /// use arrow::array::{Int32Array, ListArray};
2928    /// use arrow::datatypes::{DataType, Int32Type};
2929    /// use datafusion_common::cast::as_list_array;
2930    /// use datafusion_common::ScalarValue;
2931    ///
2932    /// let scalars = vec![
2933    ///     ScalarValue::Int32(Some(1)),
2934    ///     ScalarValue::Int32(None),
2935    ///     ScalarValue::Int32(Some(2)),
2936    /// ];
2937    ///
2938    /// let result =
2939    ///     ScalarValue::new_list_from_iter(scalars.into_iter(), &DataType::Int32, true);
2940    ///
2941    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
2942    ///     Some(1),
2943    ///     None,
2944    ///     Some(2),
2945    /// ])]);
2946    ///
2947    /// assert_eq!(*result, expected);
2948    /// ```
2949    pub fn new_list_from_iter(
2950        values: impl IntoIterator<Item = ScalarValue> + ExactSizeIterator,
2951        data_type: &DataType,
2952        nullable: bool,
2953    ) -> Arc<ListArray> {
2954        let values = if values.len() == 0 {
2955            new_empty_array(data_type)
2956        } else {
2957            Self::iter_to_array(values).unwrap()
2958        };
2959        Arc::new(
2960            SingleRowListArrayBuilder::new(values)
2961                .with_nullable(nullable)
2962                .build_list_array(),
2963        )
2964    }
2965
2966    /// Converts `Vec<ScalarValue>` where each element has type corresponding to
2967    /// `data_type`, to a [`LargeListArray`].
2968    ///
2969    /// Example
2970    /// ```
2971    /// use arrow::array::{Int32Array, LargeListArray};
2972    /// use arrow::datatypes::{DataType, Int32Type};
2973    /// use datafusion_common::cast::as_large_list_array;
2974    /// use datafusion_common::ScalarValue;
2975    ///
2976    /// let scalars = vec![
2977    ///     ScalarValue::Int32(Some(1)),
2978    ///     ScalarValue::Int32(None),
2979    ///     ScalarValue::Int32(Some(2)),
2980    /// ];
2981    ///
2982    /// let result = ScalarValue::new_large_list(&scalars, &DataType::Int32);
2983    ///
2984    /// let expected =
2985    ///     LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
2986    ///         Some(1),
2987    ///         None,
2988    ///         Some(2),
2989    ///     ])]);
2990    ///
2991    /// assert_eq!(*result, expected);
2992    /// ```
2993    pub fn new_large_list(
2994        values: &[ScalarValue],
2995        data_type: &DataType,
2996    ) -> Arc<LargeListArray> {
2997        let values = if values.is_empty() {
2998            new_empty_array(data_type)
2999        } else {
3000            Self::iter_to_array(values.iter().cloned()).unwrap()
3001        };
3002        Arc::new(SingleRowListArrayBuilder::new(values).build_large_list_array())
3003    }
3004
3005    /// Converts a scalar value into an array of `size` rows.
3006    ///
3007    /// # Errors
3008    ///
3009    /// Errors if `self` is
3010    /// - a decimal that fails be converted to a decimal array of size
3011    /// - a `FixedSizeList` that fails to be concatenated into an array of size
3012    /// - a `List` that fails to be concatenated into an array of size
3013    /// - a `Dictionary` that fails be converted to a dictionary array of size
3014    pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
3015        Ok(match self {
3016            ScalarValue::Decimal32(Some(e), precision, scale) => Arc::new(
3017                Decimal32Array::from_value(*e, size)
3018                    .with_precision_and_scale(*precision, *scale)?,
3019            ),
3020            ScalarValue::Decimal32(None, precision, scale) => {
3021                new_null_array(&DataType::Decimal32(*precision, *scale), size)
3022            }
3023            ScalarValue::Decimal64(Some(e), precision, scale) => Arc::new(
3024                Decimal64Array::from_value(*e, size)
3025                    .with_precision_and_scale(*precision, *scale)?,
3026            ),
3027            ScalarValue::Decimal64(None, precision, scale) => {
3028                new_null_array(&DataType::Decimal64(*precision, *scale), size)
3029            }
3030            ScalarValue::Decimal128(Some(e), precision, scale) => Arc::new(
3031                Decimal128Array::from_value(*e, size)
3032                    .with_precision_and_scale(*precision, *scale)?,
3033            ),
3034            ScalarValue::Decimal128(None, precision, scale) => {
3035                new_null_array(&DataType::Decimal128(*precision, *scale), size)
3036            }
3037            ScalarValue::Decimal256(Some(e), precision, scale) => Arc::new(
3038                Decimal256Array::from_value(*e, size)
3039                    .with_precision_and_scale(*precision, *scale)?,
3040            ),
3041            ScalarValue::Decimal256(None, precision, scale) => {
3042                new_null_array(&DataType::Decimal256(*precision, *scale), size)
3043            }
3044
3045            ScalarValue::Boolean(e) => match e {
3046                None => new_null_array(&DataType::Boolean, size),
3047                Some(true) => {
3048                    Arc::new(BooleanArray::new(BooleanBuffer::new_set(size), None))
3049                        as ArrayRef
3050                }
3051                Some(false) => {
3052                    Arc::new(BooleanArray::new(BooleanBuffer::new_unset(size), None))
3053                        as ArrayRef
3054                }
3055            },
3056            ScalarValue::Float64(e) => {
3057                build_array_from_option!(Float64, Float64Array, e, size)
3058            }
3059            ScalarValue::Float32(e) => {
3060                build_array_from_option!(Float32, Float32Array, e, size)
3061            }
3062            ScalarValue::Float16(e) => {
3063                build_array_from_option!(Float16, Float16Array, e, size)
3064            }
3065            ScalarValue::Int8(e) => build_array_from_option!(Int8, Int8Array, e, size),
3066            ScalarValue::Int16(e) => build_array_from_option!(Int16, Int16Array, e, size),
3067            ScalarValue::Int32(e) => build_array_from_option!(Int32, Int32Array, e, size),
3068            ScalarValue::Int64(e) => build_array_from_option!(Int64, Int64Array, e, size),
3069            ScalarValue::UInt8(e) => build_array_from_option!(UInt8, UInt8Array, e, size),
3070            ScalarValue::UInt16(e) => {
3071                build_array_from_option!(UInt16, UInt16Array, e, size)
3072            }
3073            ScalarValue::UInt32(e) => {
3074                build_array_from_option!(UInt32, UInt32Array, e, size)
3075            }
3076            ScalarValue::UInt64(e) => {
3077                build_array_from_option!(UInt64, UInt64Array, e, size)
3078            }
3079            ScalarValue::TimestampSecond(e, tz_opt) => {
3080                build_timestamp_array_from_option!(
3081                    TimeUnit::Second,
3082                    tz_opt.clone(),
3083                    TimestampSecondArray,
3084                    e,
3085                    size
3086                )
3087            }
3088            ScalarValue::TimestampMillisecond(e, tz_opt) => {
3089                build_timestamp_array_from_option!(
3090                    TimeUnit::Millisecond,
3091                    tz_opt.clone(),
3092                    TimestampMillisecondArray,
3093                    e,
3094                    size
3095                )
3096            }
3097
3098            ScalarValue::TimestampMicrosecond(e, tz_opt) => {
3099                build_timestamp_array_from_option!(
3100                    TimeUnit::Microsecond,
3101                    tz_opt.clone(),
3102                    TimestampMicrosecondArray,
3103                    e,
3104                    size
3105                )
3106            }
3107            ScalarValue::TimestampNanosecond(e, tz_opt) => {
3108                build_timestamp_array_from_option!(
3109                    TimeUnit::Nanosecond,
3110                    tz_opt.clone(),
3111                    TimestampNanosecondArray,
3112                    e,
3113                    size
3114                )
3115            }
3116            ScalarValue::Utf8(e) => match e {
3117                Some(value) => Arc::new(StringArray::new_repeated(value, size)),
3118                None => new_null_array(&DataType::Utf8, size),
3119            },
3120            ScalarValue::Utf8View(e) => match e {
3121                Some(value) => {
3122                    let mut builder = StringViewBuilder::with_capacity(size);
3123                    builder.try_append_value_n(value, size)?;
3124                    let array = builder.finish();
3125                    Arc::new(array)
3126                }
3127                None => new_null_array(&DataType::Utf8View, size),
3128            },
3129            ScalarValue::LargeUtf8(e) => match e {
3130                Some(value) => Arc::new(LargeStringArray::new_repeated(value, size)),
3131                None => new_null_array(&DataType::LargeUtf8, size),
3132            },
3133            ScalarValue::Binary(e) => match e {
3134                Some(value) => {
3135                    Arc::new(BinaryArray::new_repeated(value.as_slice(), size))
3136                }
3137                None => new_null_array(&DataType::Binary, size),
3138            },
3139            ScalarValue::BinaryView(e) => match e {
3140                Some(value) => {
3141                    let mut builder = BinaryViewBuilder::with_capacity(size);
3142                    builder.try_append_value_n(value, size)?;
3143                    let array = builder.finish();
3144                    Arc::new(array)
3145                }
3146                None => new_null_array(&DataType::BinaryView, size),
3147            },
3148            ScalarValue::FixedSizeBinary(s, e) => match e {
3149                Some(value) => Arc::new(
3150                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
3151                        repeat_n(Some(value.as_slice()), size),
3152                        *s,
3153                    )
3154                    .unwrap(),
3155                ),
3156                None => Arc::new(FixedSizeBinaryArray::new_null(*s, size)),
3157            },
3158            ScalarValue::LargeBinary(e) => match e {
3159                Some(value) => {
3160                    Arc::new(LargeBinaryArray::new_repeated(value.as_slice(), size))
3161                }
3162                None => new_null_array(&DataType::LargeBinary, size),
3163            },
3164            ScalarValue::List(arr) => {
3165                if size == 1 {
3166                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3167                }
3168                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3169            }
3170            ScalarValue::LargeList(arr) => {
3171                if size == 1 {
3172                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3173                }
3174                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3175            }
3176            ScalarValue::FixedSizeList(arr) => {
3177                if size == 1 {
3178                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3179                }
3180                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3181            }
3182            ScalarValue::Struct(arr) => {
3183                if size == 1 {
3184                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3185                }
3186                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3187            }
3188            ScalarValue::Map(arr) => {
3189                if size == 1 {
3190                    return Ok(Arc::clone(arr) as Arc<dyn Array>);
3191                }
3192                Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
3193            }
3194            ScalarValue::Date32(e) => {
3195                build_array_from_option!(Date32, Date32Array, e, size)
3196            }
3197            ScalarValue::Date64(e) => {
3198                build_array_from_option!(Date64, Date64Array, e, size)
3199            }
3200            ScalarValue::Time32Second(e) => {
3201                build_array_from_option!(
3202                    Time32,
3203                    TimeUnit::Second,
3204                    Time32SecondArray,
3205                    e,
3206                    size
3207                )
3208            }
3209            ScalarValue::Time32Millisecond(e) => {
3210                build_array_from_option!(
3211                    Time32,
3212                    TimeUnit::Millisecond,
3213                    Time32MillisecondArray,
3214                    e,
3215                    size
3216                )
3217            }
3218            ScalarValue::Time64Microsecond(e) => {
3219                build_array_from_option!(
3220                    Time64,
3221                    TimeUnit::Microsecond,
3222                    Time64MicrosecondArray,
3223                    e,
3224                    size
3225                )
3226            }
3227            ScalarValue::Time64Nanosecond(e) => {
3228                build_array_from_option!(
3229                    Time64,
3230                    TimeUnit::Nanosecond,
3231                    Time64NanosecondArray,
3232                    e,
3233                    size
3234                )
3235            }
3236            ScalarValue::IntervalDayTime(e) => build_array_from_option!(
3237                Interval,
3238                IntervalUnit::DayTime,
3239                IntervalDayTimeArray,
3240                e,
3241                size
3242            ),
3243            ScalarValue::IntervalYearMonth(e) => build_array_from_option!(
3244                Interval,
3245                IntervalUnit::YearMonth,
3246                IntervalYearMonthArray,
3247                e,
3248                size
3249            ),
3250            ScalarValue::IntervalMonthDayNano(e) => build_array_from_option!(
3251                Interval,
3252                IntervalUnit::MonthDayNano,
3253                IntervalMonthDayNanoArray,
3254                e,
3255                size
3256            ),
3257            ScalarValue::DurationSecond(e) => build_array_from_option!(
3258                Duration,
3259                TimeUnit::Second,
3260                DurationSecondArray,
3261                e,
3262                size
3263            ),
3264            ScalarValue::DurationMillisecond(e) => build_array_from_option!(
3265                Duration,
3266                TimeUnit::Millisecond,
3267                DurationMillisecondArray,
3268                e,
3269                size
3270            ),
3271            ScalarValue::DurationMicrosecond(e) => build_array_from_option!(
3272                Duration,
3273                TimeUnit::Microsecond,
3274                DurationMicrosecondArray,
3275                e,
3276                size
3277            ),
3278            ScalarValue::DurationNanosecond(e) => build_array_from_option!(
3279                Duration,
3280                TimeUnit::Nanosecond,
3281                DurationNanosecondArray,
3282                e,
3283                size
3284            ),
3285            ScalarValue::Union(value, fields, mode) => match value {
3286                Some((v_id, value)) => {
3287                    let mut new_fields = Vec::with_capacity(fields.len());
3288                    let mut child_arrays = Vec::<ArrayRef>::with_capacity(fields.len());
3289                    for (f_id, field) in fields.iter() {
3290                        let ar = if f_id == *v_id {
3291                            value.to_array_of_size(size)?
3292                        } else {
3293                            let dt = field.data_type();
3294                            match mode {
3295                                UnionMode::Sparse => new_null_array(dt, size),
3296                                // In a dense union, only the child with values needs to be
3297                                // allocated
3298                                UnionMode::Dense => new_null_array(dt, 0),
3299                            }
3300                        };
3301                        let field = (**field).clone();
3302                        child_arrays.push(ar);
3303                        new_fields.push(field.clone());
3304                    }
3305                    let type_ids = repeat_n(*v_id, size);
3306                    let type_ids = ScalarBuffer::<i8>::from_iter(type_ids);
3307                    let value_offsets = match mode {
3308                        UnionMode::Sparse => None,
3309                        UnionMode::Dense => Some(ScalarBuffer::from_iter(0..size as i32)),
3310                    };
3311                    let ar = UnionArray::try_new(
3312                        fields.clone(),
3313                        type_ids,
3314                        value_offsets,
3315                        child_arrays,
3316                    )
3317                    .map_err(|e| DataFusionError::ArrowError(Box::new(e), None))?;
3318                    Arc::new(ar)
3319                }
3320                None => new_null_array(&DataType::Union(fields.clone(), *mode), size),
3321            },
3322            ScalarValue::Dictionary(key_type, v) => {
3323                // values array is one element long (the value)
3324                match key_type.as_ref() {
3325                    DataType::Int8 => dict_from_scalar::<Int8Type>(v, size)?,
3326                    DataType::Int16 => dict_from_scalar::<Int16Type>(v, size)?,
3327                    DataType::Int32 => dict_from_scalar::<Int32Type>(v, size)?,
3328                    DataType::Int64 => dict_from_scalar::<Int64Type>(v, size)?,
3329                    DataType::UInt8 => dict_from_scalar::<UInt8Type>(v, size)?,
3330                    DataType::UInt16 => dict_from_scalar::<UInt16Type>(v, size)?,
3331                    DataType::UInt32 => dict_from_scalar::<UInt32Type>(v, size)?,
3332                    DataType::UInt64 => dict_from_scalar::<UInt64Type>(v, size)?,
3333                    _ => unreachable!("Invalid dictionary keys type: {}", key_type),
3334                }
3335            }
3336            ScalarValue::RunEndEncoded(run_ends_field, values_field, value) => {
3337                fn make_run_array<R: RunEndIndexType>(
3338                    run_ends_field: &Arc<Field>,
3339                    values_field: &Arc<Field>,
3340                    value: &ScalarValue,
3341                    size: usize,
3342                ) -> Result<ArrayRef> {
3343                    let size_native = R::Native::from_usize(size)
3344                        .ok_or_else(|| DataFusionError::Execution(format!("Cannot construct RunArray of size {size}: Overflows run-ends type {}", R::DATA_TYPE)))?;
3345                    let values = value.to_array_of_size(1)?;
3346                    let run_ends =
3347                        PrimitiveArray::<R>::new(vec![size_native].into(), None);
3348
3349                    // Using ArrayDataBuilder so we can maintain the fields
3350                    let dt = DataType::RunEndEncoded(
3351                        Arc::clone(run_ends_field),
3352                        Arc::clone(values_field),
3353                    );
3354                    let builder = ArrayDataBuilder::new(dt)
3355                        .len(size)
3356                        .add_child_data(run_ends.to_data())
3357                        .add_child_data(values.to_data());
3358                    let run_array = RunArray::<R>::from(builder.build()?);
3359
3360                    Ok(Arc::new(run_array))
3361                }
3362                match run_ends_field.data_type() {
3363                    DataType::Int16 => make_run_array::<Int16Type>(
3364                        run_ends_field,
3365                        values_field,
3366                        value,
3367                        size,
3368                    )?,
3369                    DataType::Int32 => make_run_array::<Int32Type>(
3370                        run_ends_field,
3371                        values_field,
3372                        value,
3373                        size,
3374                    )?,
3375                    DataType::Int64 => make_run_array::<Int64Type>(
3376                        run_ends_field,
3377                        values_field,
3378                        value,
3379                        size,
3380                    )?,
3381                    dt => unreachable!("Invalid run-ends type: {dt}"),
3382                }
3383            }
3384            ScalarValue::Null => get_or_create_cached_null_array(size),
3385        })
3386    }
3387
3388    fn get_decimal_value_from_array(
3389        array: &dyn Array,
3390        index: usize,
3391        precision: u8,
3392        scale: i8,
3393    ) -> Result<ScalarValue> {
3394        match array.data_type() {
3395            DataType::Decimal32(_, _) => {
3396                let array = as_decimal32_array(array)?;
3397                if array.is_null(index) {
3398                    Ok(ScalarValue::Decimal32(None, precision, scale))
3399                } else {
3400                    let value = array.value(index);
3401                    Ok(ScalarValue::Decimal32(Some(value), precision, scale))
3402                }
3403            }
3404            DataType::Decimal64(_, _) => {
3405                let array = as_decimal64_array(array)?;
3406                if array.is_null(index) {
3407                    Ok(ScalarValue::Decimal64(None, precision, scale))
3408                } else {
3409                    let value = array.value(index);
3410                    Ok(ScalarValue::Decimal64(Some(value), precision, scale))
3411                }
3412            }
3413            DataType::Decimal128(_, _) => {
3414                let array = as_decimal128_array(array)?;
3415                if array.is_null(index) {
3416                    Ok(ScalarValue::Decimal128(None, precision, scale))
3417                } else {
3418                    let value = array.value(index);
3419                    Ok(ScalarValue::Decimal128(Some(value), precision, scale))
3420                }
3421            }
3422            DataType::Decimal256(_, _) => {
3423                let array = as_decimal256_array(array)?;
3424                if array.is_null(index) {
3425                    Ok(ScalarValue::Decimal256(None, precision, scale))
3426                } else {
3427                    let value = array.value(index);
3428                    Ok(ScalarValue::Decimal256(Some(value), precision, scale))
3429                }
3430            }
3431            other => {
3432                unreachable!("Invalid type isn't decimal: {other:?}")
3433            }
3434        }
3435    }
3436
3437    /// Repeats the rows of `arr` `size` times, producing an array with
3438    /// `arr.len() * size` total rows.
3439    fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
3440        if size == 0 {
3441            return Ok(arr.slice(0, 0));
3442        }
3443
3444        // Examples: given `arr = [[A, B, C]]` and `size = 3`, `indices = [0, 0, 0]` and
3445        // the result is `[[A, B, C], [A, B, C], [A, B, C]]`.
3446        //
3447        // Given `arr = [[A, B], [C]]` and `size = 2`, `indices = [0, 1, 0, 1]` and the
3448        // result is `[[A, B], [C], [A, B], [C]]`. (But in practice, we are always called
3449        // with `arr.len() == 1`.)
3450        let n = arr.len() as u32;
3451        let indices = UInt32Array::from_iter_values((0..size).flat_map(|_| 0..n));
3452        Ok(arrow::compute::take(arr, &indices, None)?)
3453    }
3454
3455    /// Retrieve ScalarValue for each row in `array`
3456    ///
3457    /// Elements in `array` may be NULL, in which case the corresponding element in the returned vector is None.
3458    ///
3459    /// Example 1: Array (ScalarValue::Int32)
3460    /// ```
3461    /// use arrow::array::ListArray;
3462    /// use arrow::datatypes::{DataType, Int32Type};
3463    /// use datafusion_common::ScalarValue;
3464    ///
3465    /// // Equivalent to [[1,2,3], [4,5]]
3466    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
3467    ///     Some(vec![Some(1), Some(2), Some(3)]),
3468    ///     Some(vec![Some(4), Some(5)]),
3469    /// ]);
3470    ///
3471    /// // Convert the array into Scalar Values for each row
3472    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
3473    ///
3474    /// let expected = vec![
3475    ///     Some(vec![
3476    ///         ScalarValue::Int32(Some(1)),
3477    ///         ScalarValue::Int32(Some(2)),
3478    ///         ScalarValue::Int32(Some(3)),
3479    ///     ]),
3480    ///     Some(vec![
3481    ///         ScalarValue::Int32(Some(4)),
3482    ///         ScalarValue::Int32(Some(5)),
3483    ///     ]),
3484    /// ];
3485    ///
3486    /// assert_eq!(scalar_vec, expected);
3487    /// ```
3488    ///
3489    /// Example 2: Nested array (ScalarValue::List)
3490    /// ```
3491    /// use arrow::array::ListArray;
3492    /// use arrow::datatypes::{DataType, Int32Type};
3493    /// use datafusion_common::utils::SingleRowListArrayBuilder;
3494    /// use datafusion_common::ScalarValue;
3495    /// use std::sync::Arc;
3496    ///
3497    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
3498    ///     Some(vec![Some(1), Some(2), Some(3)]),
3499    ///     Some(vec![Some(4), Some(5)]),
3500    /// ]);
3501    ///
3502    /// // Wrap into another layer of list, we got nested array as [ [[1,2,3], [4,5]] ]
3503    /// let list_arr = SingleRowListArrayBuilder::new(Arc::new(list_arr)).build_list_array();
3504    ///
3505    /// // Convert the array into Scalar Values for each row, we got 1D arrays in this example
3506    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
3507    ///
3508    /// let l1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
3509    ///     Some(1),
3510    ///     Some(2),
3511    ///     Some(3),
3512    /// ])]);
3513    /// let l2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
3514    ///     Some(4),
3515    ///     Some(5),
3516    /// ])]);
3517    ///
3518    /// let expected = vec![Some(vec![
3519    ///     ScalarValue::List(Arc::new(l1)),
3520    ///     ScalarValue::List(Arc::new(l2)),
3521    /// ])];
3522    ///
3523    /// assert_eq!(scalar_vec, expected);
3524    /// ```
3525    ///
3526    /// Example 3: Nullable array
3527    /// ```
3528    /// use arrow::array::ListArray;
3529    /// use arrow::datatypes::{DataType, Int32Type};
3530    /// use datafusion_common::ScalarValue;
3531    ///
3532    /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
3533    ///     Some(vec![Some(1), Some(2), Some(3)]),
3534    ///     None,
3535    ///     Some(vec![Some(4), Some(5)]),
3536    /// ]);
3537    ///
3538    /// // Convert the array into Scalar Values for each row
3539    /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
3540    ///
3541    /// let expected = vec![
3542    ///     Some(vec![
3543    ///         ScalarValue::Int32(Some(1)),
3544    ///         ScalarValue::Int32(Some(2)),
3545    ///         ScalarValue::Int32(Some(3)),
3546    ///     ]),
3547    ///     None,
3548    ///     Some(vec![
3549    ///         ScalarValue::Int32(Some(4)),
3550    ///         ScalarValue::Int32(Some(5)),
3551    ///     ]),
3552    /// ];
3553    ///
3554    /// assert_eq!(scalar_vec, expected);
3555    /// ```
3556    pub fn convert_array_to_scalar_vec(
3557        array: &dyn Array,
3558    ) -> Result<Vec<Option<Vec<Self>>>> {
3559        fn generic_collect<OffsetSize: OffsetSizeTrait>(
3560            array: &dyn Array,
3561        ) -> Result<Vec<Option<Vec<ScalarValue>>>> {
3562            array
3563                .as_list::<OffsetSize>()
3564                .iter()
3565                .map(|nested_array| {
3566                    nested_array
3567                        .map(|array| {
3568                            (0..array.len())
3569                                .map(|i| ScalarValue::try_from_array(&array, i))
3570                                .collect::<Result<Vec<_>>>()
3571                        })
3572                        .transpose()
3573                })
3574                .collect()
3575        }
3576
3577        match array.data_type() {
3578            DataType::List(_) => generic_collect::<i32>(array),
3579            DataType::LargeList(_) => generic_collect::<i64>(array),
3580            _ => _internal_err!(
3581                "ScalarValue::convert_array_to_scalar_vec input must be a List/LargeList type"
3582            ),
3583        }
3584    }
3585
3586    #[deprecated(
3587        since = "46.0.0",
3588        note = "This function is obsolete. Use `to_array` instead"
3589    )]
3590    pub fn raw_data(&self) -> Result<ArrayRef> {
3591        match self {
3592            ScalarValue::List(arr) => Ok(arr.to_owned()),
3593            _ => _internal_err!("ScalarValue is not a list"),
3594        }
3595    }
3596
3597    /// Converts a value in `array` at `index` into a ScalarValue
3598    pub fn try_from_array(array: &dyn Array, index: usize) -> Result<Self> {
3599        // handle NULL value
3600        if array.is_null(index) {
3601            return array.data_type().try_into();
3602        }
3603
3604        Ok(match array.data_type() {
3605            DataType::Null => ScalarValue::Null,
3606            DataType::Decimal32(precision, scale) => {
3607                ScalarValue::get_decimal_value_from_array(
3608                    array, index, *precision, *scale,
3609                )?
3610            }
3611            DataType::Decimal64(precision, scale) => {
3612                ScalarValue::get_decimal_value_from_array(
3613                    array, index, *precision, *scale,
3614                )?
3615            }
3616            DataType::Decimal128(precision, scale) => {
3617                ScalarValue::get_decimal_value_from_array(
3618                    array, index, *precision, *scale,
3619                )?
3620            }
3621            DataType::Decimal256(precision, scale) => {
3622                ScalarValue::get_decimal_value_from_array(
3623                    array, index, *precision, *scale,
3624                )?
3625            }
3626            DataType::Boolean => typed_cast!(array, index, as_boolean_array, Boolean)?,
3627            DataType::Float64 => typed_cast!(array, index, as_float64_array, Float64)?,
3628            DataType::Float32 => typed_cast!(array, index, as_float32_array, Float32)?,
3629            DataType::Float16 => typed_cast!(array, index, as_float16_array, Float16)?,
3630            DataType::UInt64 => typed_cast!(array, index, as_uint64_array, UInt64)?,
3631            DataType::UInt32 => typed_cast!(array, index, as_uint32_array, UInt32)?,
3632            DataType::UInt16 => typed_cast!(array, index, as_uint16_array, UInt16)?,
3633            DataType::UInt8 => typed_cast!(array, index, as_uint8_array, UInt8)?,
3634            DataType::Int64 => typed_cast!(array, index, as_int64_array, Int64)?,
3635            DataType::Int32 => typed_cast!(array, index, as_int32_array, Int32)?,
3636            DataType::Int16 => typed_cast!(array, index, as_int16_array, Int16)?,
3637            DataType::Int8 => typed_cast!(array, index, as_int8_array, Int8)?,
3638            DataType::Binary => typed_cast!(array, index, as_binary_array, Binary)?,
3639            DataType::LargeBinary => {
3640                typed_cast!(array, index, as_large_binary_array, LargeBinary)?
3641            }
3642            DataType::BinaryView => {
3643                typed_cast!(array, index, as_binary_view_array, BinaryView)?
3644            }
3645            DataType::Utf8 => typed_cast!(array, index, as_string_array, Utf8)?,
3646            DataType::LargeUtf8 => {
3647                typed_cast!(array, index, as_large_string_array, LargeUtf8)?
3648            }
3649            DataType::Utf8View => {
3650                typed_cast!(array, index, as_string_view_array, Utf8View)?
3651            }
3652            DataType::List(field) => {
3653                let list_array = array.as_list::<i32>();
3654                let nested_array = list_array.value(index);
3655                // Produces a single element `ListArray` with the value at `index`.
3656                SingleRowListArrayBuilder::new(nested_array)
3657                    .with_field(field)
3658                    .build_list_scalar()
3659            }
3660            DataType::LargeList(field) => {
3661                let list_array = as_large_list_array(array)?;
3662                let nested_array = list_array.value(index);
3663                // Produces a single element `LargeListArray` with the value at `index`.
3664                SingleRowListArrayBuilder::new(nested_array)
3665                    .with_field(field)
3666                    .build_large_list_scalar()
3667            }
3668            // TODO: There is no test for FixedSizeList now, add it later
3669            DataType::FixedSizeList(field, _) => {
3670                let list_array = as_fixed_size_list_array(array)?;
3671                let nested_array = list_array.value(index);
3672                // Produces a single element `FixedSizeListArray` with the value at `index`.
3673                let list_size = nested_array.len();
3674                SingleRowListArrayBuilder::new(nested_array)
3675                    .with_field(field)
3676                    .build_fixed_size_list_scalar(list_size)
3677            }
3678            DataType::Date32 => typed_cast!(array, index, as_date32_array, Date32)?,
3679            DataType::Date64 => typed_cast!(array, index, as_date64_array, Date64)?,
3680            DataType::Time32(TimeUnit::Second) => {
3681                typed_cast!(array, index, as_time32_second_array, Time32Second)?
3682            }
3683            DataType::Time32(TimeUnit::Millisecond) => {
3684                typed_cast!(array, index, as_time32_millisecond_array, Time32Millisecond)?
3685            }
3686            DataType::Time64(TimeUnit::Microsecond) => {
3687                typed_cast!(array, index, as_time64_microsecond_array, Time64Microsecond)?
3688            }
3689            DataType::Time64(TimeUnit::Nanosecond) => {
3690                typed_cast!(array, index, as_time64_nanosecond_array, Time64Nanosecond)?
3691            }
3692            DataType::Timestamp(TimeUnit::Second, tz_opt) => typed_cast_tz!(
3693                array,
3694                index,
3695                as_timestamp_second_array,
3696                TimestampSecond,
3697                tz_opt
3698            )?,
3699            DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_cast_tz!(
3700                array,
3701                index,
3702                as_timestamp_millisecond_array,
3703                TimestampMillisecond,
3704                tz_opt
3705            )?,
3706            DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_cast_tz!(
3707                array,
3708                index,
3709                as_timestamp_microsecond_array,
3710                TimestampMicrosecond,
3711                tz_opt
3712            )?,
3713            DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_cast_tz!(
3714                array,
3715                index,
3716                as_timestamp_nanosecond_array,
3717                TimestampNanosecond,
3718                tz_opt
3719            )?,
3720            DataType::Dictionary(key_type, _) => {
3721                let (values_array, values_index) = match key_type.as_ref() {
3722                    DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
3723                    DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
3724                    DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
3725                    DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
3726                    DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
3727                    DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
3728                    DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
3729                    DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
3730                    _ => unreachable!("Invalid dictionary keys type: {}", key_type),
3731                };
3732                // look up the index in the values dictionary
3733                let value = match values_index {
3734                    Some(values_index) => {
3735                        ScalarValue::try_from_array(values_array, values_index)
3736                    }
3737                    // else entry was null, so return null
3738                    None => values_array.data_type().try_into(),
3739                }?;
3740
3741                Self::Dictionary(key_type.clone(), Box::new(value))
3742            }
3743            DataType::RunEndEncoded(run_ends_field, value_field) => {
3744                // Explicitly check length here since get_physical_index() doesn't
3745                // bound check for us
3746                if index > array.len() {
3747                    return _exec_err!(
3748                        "Index {index} out of bounds for array of length {}",
3749                        array.len()
3750                    );
3751                }
3752                let scalar = downcast_run_array!(
3753                    array => {
3754                        let index = array.get_physical_index(index);
3755                        ScalarValue::try_from_array(array.values(), index)?
3756                    },
3757                    dt => unreachable!("Invalid run-ends type: {dt}")
3758                );
3759                Self::RunEndEncoded(
3760                    Arc::clone(run_ends_field),
3761                    Arc::clone(value_field),
3762                    Box::new(scalar),
3763                )
3764            }
3765            DataType::Struct(_) => {
3766                let a = array.slice(index, 1);
3767                Self::Struct(Arc::new(a.as_struct().to_owned()))
3768            }
3769            DataType::FixedSizeBinary(_) => {
3770                let array = as_fixed_size_binary_array(array)?;
3771                let size = match array.data_type() {
3772                    DataType::FixedSizeBinary(size) => *size,
3773                    _ => unreachable!(),
3774                };
3775                ScalarValue::FixedSizeBinary(
3776                    size,
3777                    match array.is_null(index) {
3778                        true => None,
3779                        false => Some(array.value(index).into()),
3780                    },
3781                )
3782            }
3783            DataType::Interval(IntervalUnit::DayTime) => {
3784                typed_cast!(array, index, as_interval_dt_array, IntervalDayTime)?
3785            }
3786            DataType::Interval(IntervalUnit::YearMonth) => {
3787                typed_cast!(array, index, as_interval_ym_array, IntervalYearMonth)?
3788            }
3789            DataType::Interval(IntervalUnit::MonthDayNano) => {
3790                typed_cast!(array, index, as_interval_mdn_array, IntervalMonthDayNano)?
3791            }
3792
3793            DataType::Duration(TimeUnit::Second) => {
3794                typed_cast!(array, index, as_duration_second_array, DurationSecond)?
3795            }
3796            DataType::Duration(TimeUnit::Millisecond) => typed_cast!(
3797                array,
3798                index,
3799                as_duration_millisecond_array,
3800                DurationMillisecond
3801            )?,
3802            DataType::Duration(TimeUnit::Microsecond) => typed_cast!(
3803                array,
3804                index,
3805                as_duration_microsecond_array,
3806                DurationMicrosecond
3807            )?,
3808            DataType::Duration(TimeUnit::Nanosecond) => typed_cast!(
3809                array,
3810                index,
3811                as_duration_nanosecond_array,
3812                DurationNanosecond
3813            )?,
3814            DataType::Map(_, _) => {
3815                let a = array.slice(index, 1);
3816                Self::Map(Arc::new(a.as_map().to_owned()))
3817            }
3818            DataType::Union(fields, mode) => {
3819                let array = as_union_array(array)?;
3820                let ti = array.type_id(index);
3821                let index = array.value_offset(index);
3822                let value = ScalarValue::try_from_array(array.child(ti), index)?;
3823                ScalarValue::Union(Some((ti, Box::new(value))), fields.clone(), *mode)
3824            }
3825            other => {
3826                return _not_impl_err!(
3827                    "Can't create a scalar from array of type \"{other:?}\""
3828                );
3829            }
3830        })
3831    }
3832
3833    /// Try to parse `value` into a ScalarValue of type `target_type`
3834    pub fn try_from_string(value: String, target_type: &DataType) -> Result<Self> {
3835        ScalarValue::from(value).cast_to(target_type)
3836    }
3837
3838    /// Returns the Some(`&str`) representation of `ScalarValue` of logical string type
3839    ///
3840    /// Returns `None` if this `ScalarValue` is not a logical string type or the
3841    /// `ScalarValue` represents the `NULL` value.
3842    ///
3843    /// Note you can use [`Option::flatten`] to check for non null logical
3844    /// strings.
3845    ///
3846    /// For example, [`ScalarValue::Utf8`], [`ScalarValue::LargeUtf8`], and
3847    /// [`ScalarValue::Dictionary`] with a logical string value and store
3848    /// strings and can be accessed as `&str` using this method.
3849    ///
3850    /// # Example: logical strings
3851    /// ```
3852    /// # use datafusion_common::ScalarValue;
3853    /// /// non strings return None
3854    /// let scalar = ScalarValue::from(42);
3855    /// assert_eq!(scalar.try_as_str(), None);
3856    /// // Non null logical string returns Some(Some(&str))
3857    /// let scalar = ScalarValue::from("hello");
3858    /// assert_eq!(scalar.try_as_str(), Some(Some("hello")));
3859    /// // Null logical string returns Some(None)
3860    /// let scalar = ScalarValue::Utf8(None);
3861    /// assert_eq!(scalar.try_as_str(), Some(None));
3862    /// ```
3863    ///
3864    /// # Example: use [`Option::flatten`] to check for non-null logical strings
3865    /// ```
3866    /// # use datafusion_common::ScalarValue;
3867    /// // Non null logical string returns Some(Some(&str))
3868    /// let scalar = ScalarValue::from("hello");
3869    /// assert_eq!(scalar.try_as_str().flatten(), Some("hello"));
3870    /// ```
3871    pub fn try_as_str(&self) -> Option<Option<&str>> {
3872        let v = match self {
3873            ScalarValue::Utf8(v) => v,
3874            ScalarValue::LargeUtf8(v) => v,
3875            ScalarValue::Utf8View(v) => v,
3876            ScalarValue::Dictionary(_, v) => return v.try_as_str(),
3877            ScalarValue::RunEndEncoded(_, _, v) => return v.try_as_str(),
3878            _ => return None,
3879        };
3880        Some(v.as_ref().map(|v| v.as_str()))
3881    }
3882
3883    /// Try to cast this value to a ScalarValue of type `data_type`
3884    pub fn cast_to(&self, target_type: &DataType) -> Result<Self> {
3885        self.cast_to_with_options(target_type, &DEFAULT_CAST_OPTIONS)
3886    }
3887
3888    /// Try to cast this value to a ScalarValue of type `data_type` with [`CastOptions`]
3889    pub fn cast_to_with_options(
3890        &self,
3891        target_type: &DataType,
3892        cast_options: &CastOptions<'static>,
3893    ) -> Result<Self> {
3894        let source_type = self.data_type();
3895        if let Some(multiplier) = date_to_timestamp_multiplier(&source_type, target_type)
3896            && let Some(value) = self.date_scalar_value_as_i64()
3897        {
3898            ensure_timestamp_in_bounds(value, multiplier, &source_type, target_type)?;
3899        }
3900
3901        let scalar_array = self.to_array()?;
3902
3903        // For struct types, use name-based casting logic that matches fields by name
3904        // and recursively casts nested structs. The field name wrapper is arbitrary
3905        // since cast_column only uses the DataType::Struct field definitions inside.
3906        let cast_arr = match target_type {
3907            DataType::Struct(_) => {
3908                // Field name is unused; only the struct's inner field names matter
3909                let target_field = Field::new("_", target_type.clone(), true);
3910                crate::nested_struct::cast_column(
3911                    &scalar_array,
3912                    &target_field,
3913                    cast_options,
3914                )?
3915            }
3916            _ => cast_with_options(&scalar_array, target_type, cast_options)?,
3917        };
3918
3919        ScalarValue::try_from_array(&cast_arr, 0)
3920    }
3921
3922    fn date_scalar_value_as_i64(&self) -> Option<i64> {
3923        match self {
3924            ScalarValue::Date32(Some(value)) => Some(i64::from(*value)),
3925            ScalarValue::Date64(Some(value)) => Some(*value),
3926            _ => None,
3927        }
3928    }
3929
3930    fn eq_array_decimal32(
3931        array: &ArrayRef,
3932        index: usize,
3933        value: Option<&i32>,
3934        precision: u8,
3935        scale: i8,
3936    ) -> Result<bool> {
3937        let array = as_decimal32_array(array)?;
3938        if array.precision() != precision || array.scale() != scale {
3939            return Ok(false);
3940        }
3941        let is_null = array.is_null(index);
3942        if let Some(v) = value {
3943            Ok(!array.is_null(index) && array.value(index) == *v)
3944        } else {
3945            Ok(is_null)
3946        }
3947    }
3948
3949    fn eq_array_decimal64(
3950        array: &ArrayRef,
3951        index: usize,
3952        value: Option<&i64>,
3953        precision: u8,
3954        scale: i8,
3955    ) -> Result<bool> {
3956        let array = as_decimal64_array(array)?;
3957        if array.precision() != precision || array.scale() != scale {
3958            return Ok(false);
3959        }
3960        let is_null = array.is_null(index);
3961        if let Some(v) = value {
3962            Ok(!array.is_null(index) && array.value(index) == *v)
3963        } else {
3964            Ok(is_null)
3965        }
3966    }
3967
3968    fn eq_array_decimal(
3969        array: &ArrayRef,
3970        index: usize,
3971        value: Option<&i128>,
3972        precision: u8,
3973        scale: i8,
3974    ) -> Result<bool> {
3975        let array = as_decimal128_array(array)?;
3976        if array.precision() != precision || array.scale() != scale {
3977            return Ok(false);
3978        }
3979        let is_null = array.is_null(index);
3980        if let Some(v) = value {
3981            Ok(!array.is_null(index) && array.value(index) == *v)
3982        } else {
3983            Ok(is_null)
3984        }
3985    }
3986
3987    fn eq_array_decimal256(
3988        array: &ArrayRef,
3989        index: usize,
3990        value: Option<&i256>,
3991        precision: u8,
3992        scale: i8,
3993    ) -> Result<bool> {
3994        let array = as_decimal256_array(array)?;
3995        if array.precision() != precision || array.scale() != scale {
3996            return Ok(false);
3997        }
3998        let is_null = array.is_null(index);
3999        if let Some(v) = value {
4000            Ok(!array.is_null(index) && array.value(index) == *v)
4001        } else {
4002            Ok(is_null)
4003        }
4004    }
4005
4006    /// Compares a single row of array @ index for equality with self,
4007    /// in an optimized fashion.
4008    ///
4009    /// This method implements an optimized version of:
4010    ///
4011    /// ```text
4012    ///     let arr_scalar = Self::try_from_array(array, index).unwrap();
4013    ///     arr_scalar.eq(self)
4014    /// ```
4015    ///
4016    /// *Performance note*: the arrow compute kernels should be
4017    /// preferred over this function if at all possible as they can be
4018    /// vectorized and are generally much faster.
4019    ///
4020    /// This function has a few narrow use cases such as hash table key
4021    /// comparisons where comparing a single row at a time is necessary.
4022    ///
4023    /// # Errors
4024    ///
4025    /// Errors if
4026    /// - it fails to downcast `array` to the data type of `self`
4027    /// - `self` is a `Struct`
4028    ///
4029    /// # Panics
4030    ///
4031    /// Panics if `self` is a dictionary with invalid key type
4032    #[inline]
4033    pub fn eq_array(&self, array: &ArrayRef, index: usize) -> Result<bool> {
4034        Ok(match self {
4035            ScalarValue::Decimal32(v, precision, scale) => {
4036                ScalarValue::eq_array_decimal32(
4037                    array,
4038                    index,
4039                    v.as_ref(),
4040                    *precision,
4041                    *scale,
4042                )?
4043            }
4044            ScalarValue::Decimal64(v, precision, scale) => {
4045                ScalarValue::eq_array_decimal64(
4046                    array,
4047                    index,
4048                    v.as_ref(),
4049                    *precision,
4050                    *scale,
4051                )?
4052            }
4053            ScalarValue::Decimal128(v, precision, scale) => {
4054                ScalarValue::eq_array_decimal(
4055                    array,
4056                    index,
4057                    v.as_ref(),
4058                    *precision,
4059                    *scale,
4060                )?
4061            }
4062            ScalarValue::Decimal256(v, precision, scale) => {
4063                ScalarValue::eq_array_decimal256(
4064                    array,
4065                    index,
4066                    v.as_ref(),
4067                    *precision,
4068                    *scale,
4069                )?
4070            }
4071            ScalarValue::Boolean(val) => {
4072                eq_array_primitive!(array, index, as_boolean_array, val)?
4073            }
4074            ScalarValue::Float16(val) => {
4075                eq_array_primitive!(array, index, as_float16_array, val)?
4076            }
4077            ScalarValue::Float32(val) => {
4078                eq_array_primitive!(array, index, as_float32_array, val)?
4079            }
4080            ScalarValue::Float64(val) => {
4081                eq_array_primitive!(array, index, as_float64_array, val)?
4082            }
4083            ScalarValue::Int8(val) => {
4084                eq_array_primitive!(array, index, as_int8_array, val)?
4085            }
4086            ScalarValue::Int16(val) => {
4087                eq_array_primitive!(array, index, as_int16_array, val)?
4088            }
4089            ScalarValue::Int32(val) => {
4090                eq_array_primitive!(array, index, as_int32_array, val)?
4091            }
4092            ScalarValue::Int64(val) => {
4093                eq_array_primitive!(array, index, as_int64_array, val)?
4094            }
4095            ScalarValue::UInt8(val) => {
4096                eq_array_primitive!(array, index, as_uint8_array, val)?
4097            }
4098            ScalarValue::UInt16(val) => {
4099                eq_array_primitive!(array, index, as_uint16_array, val)?
4100            }
4101            ScalarValue::UInt32(val) => {
4102                eq_array_primitive!(array, index, as_uint32_array, val)?
4103            }
4104            ScalarValue::UInt64(val) => {
4105                eq_array_primitive!(array, index, as_uint64_array, val)?
4106            }
4107            ScalarValue::Utf8(val) => {
4108                eq_array_primitive!(array, index, as_string_array, val)?
4109            }
4110            ScalarValue::Utf8View(val) => {
4111                eq_array_primitive!(array, index, as_string_view_array, val)?
4112            }
4113            ScalarValue::LargeUtf8(val) => {
4114                eq_array_primitive!(array, index, as_large_string_array, val)?
4115            }
4116            ScalarValue::Binary(val) => {
4117                eq_array_primitive!(array, index, as_binary_array, val)?
4118            }
4119            ScalarValue::BinaryView(val) => {
4120                eq_array_primitive!(array, index, as_binary_view_array, val)?
4121            }
4122            ScalarValue::FixedSizeBinary(_, val) => {
4123                eq_array_primitive!(array, index, as_fixed_size_binary_array, val)?
4124            }
4125            ScalarValue::LargeBinary(val) => {
4126                eq_array_primitive!(array, index, as_large_binary_array, val)?
4127            }
4128            ScalarValue::List(arr) => {
4129                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4130            }
4131            ScalarValue::LargeList(arr) => {
4132                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4133            }
4134            ScalarValue::FixedSizeList(arr) => {
4135                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4136            }
4137            ScalarValue::Struct(arr) => {
4138                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4139            }
4140            ScalarValue::Map(arr) => {
4141                Self::eq_array_list(&(arr.to_owned() as ArrayRef), array, index)
4142            }
4143            ScalarValue::Date32(val) => {
4144                eq_array_primitive!(array, index, as_date32_array, val)?
4145            }
4146            ScalarValue::Date64(val) => {
4147                eq_array_primitive!(array, index, as_date64_array, val)?
4148            }
4149            ScalarValue::Time32Second(val) => {
4150                eq_array_primitive!(array, index, as_time32_second_array, val)?
4151            }
4152            ScalarValue::Time32Millisecond(val) => {
4153                eq_array_primitive!(array, index, as_time32_millisecond_array, val)?
4154            }
4155            ScalarValue::Time64Microsecond(val) => {
4156                eq_array_primitive!(array, index, as_time64_microsecond_array, val)?
4157            }
4158            ScalarValue::Time64Nanosecond(val) => {
4159                eq_array_primitive!(array, index, as_time64_nanosecond_array, val)?
4160            }
4161            ScalarValue::TimestampSecond(val, _) => {
4162                eq_array_primitive!(array, index, as_timestamp_second_array, val)?
4163            }
4164            ScalarValue::TimestampMillisecond(val, _) => {
4165                eq_array_primitive!(array, index, as_timestamp_millisecond_array, val)?
4166            }
4167            ScalarValue::TimestampMicrosecond(val, _) => {
4168                eq_array_primitive!(array, index, as_timestamp_microsecond_array, val)?
4169            }
4170            ScalarValue::TimestampNanosecond(val, _) => {
4171                eq_array_primitive!(array, index, as_timestamp_nanosecond_array, val)?
4172            }
4173            ScalarValue::IntervalYearMonth(val) => {
4174                eq_array_primitive!(array, index, as_interval_ym_array, val)?
4175            }
4176            ScalarValue::IntervalDayTime(val) => {
4177                eq_array_primitive!(array, index, as_interval_dt_array, val)?
4178            }
4179            ScalarValue::IntervalMonthDayNano(val) => {
4180                eq_array_primitive!(array, index, as_interval_mdn_array, val)?
4181            }
4182            ScalarValue::DurationSecond(val) => {
4183                eq_array_primitive!(array, index, as_duration_second_array, val)?
4184            }
4185            ScalarValue::DurationMillisecond(val) => {
4186                eq_array_primitive!(array, index, as_duration_millisecond_array, val)?
4187            }
4188            ScalarValue::DurationMicrosecond(val) => {
4189                eq_array_primitive!(array, index, as_duration_microsecond_array, val)?
4190            }
4191            ScalarValue::DurationNanosecond(val) => {
4192                eq_array_primitive!(array, index, as_duration_nanosecond_array, val)?
4193            }
4194            ScalarValue::Union(value, _, _) => {
4195                let array = as_union_array(array)?;
4196                let ti = array.type_id(index);
4197                let index = array.value_offset(index);
4198                if let Some((ti_v, value)) = value {
4199                    ti_v == &ti && value.eq_array(array.child(ti), index)?
4200                } else {
4201                    array.child(ti).is_null(index)
4202                }
4203            }
4204            ScalarValue::Dictionary(key_type, v) => {
4205                let (values_array, values_index) = match key_type.as_ref() {
4206                    DataType::Int8 => get_dict_value::<Int8Type>(array, index)?,
4207                    DataType::Int16 => get_dict_value::<Int16Type>(array, index)?,
4208                    DataType::Int32 => get_dict_value::<Int32Type>(array, index)?,
4209                    DataType::Int64 => get_dict_value::<Int64Type>(array, index)?,
4210                    DataType::UInt8 => get_dict_value::<UInt8Type>(array, index)?,
4211                    DataType::UInt16 => get_dict_value::<UInt16Type>(array, index)?,
4212                    DataType::UInt32 => get_dict_value::<UInt32Type>(array, index)?,
4213                    DataType::UInt64 => get_dict_value::<UInt64Type>(array, index)?,
4214                    _ => unreachable!("Invalid dictionary keys type: {}", key_type),
4215                };
4216                // was the value in the array non null?
4217                match values_index {
4218                    Some(values_index) => v.eq_array(values_array, values_index)?,
4219                    None => v.is_null(),
4220                }
4221            }
4222            ScalarValue::RunEndEncoded(run_ends_field, _, value) => {
4223                // Explicitly check length here since get_physical_index() doesn't
4224                // bound check for us
4225                if index > array.len() {
4226                    return _exec_err!(
4227                        "Index {index} out of bounds for array of length {}",
4228                        array.len()
4229                    );
4230                }
4231                match run_ends_field.data_type() {
4232                    DataType::Int16 => {
4233                        let array = as_run_array::<Int16Type>(array)?;
4234                        let index = array.get_physical_index(index);
4235                        value.eq_array(array.values(), index)?
4236                    }
4237                    DataType::Int32 => {
4238                        let array = as_run_array::<Int32Type>(array)?;
4239                        let index = array.get_physical_index(index);
4240                        value.eq_array(array.values(), index)?
4241                    }
4242                    DataType::Int64 => {
4243                        let array = as_run_array::<Int64Type>(array)?;
4244                        let index = array.get_physical_index(index);
4245                        value.eq_array(array.values(), index)?
4246                    }
4247                    dt => unreachable!("Invalid run-ends type: {dt}"),
4248                }
4249            }
4250            ScalarValue::Null => array.is_null(index),
4251        })
4252    }
4253
4254    fn eq_array_list(arr1: &ArrayRef, arr2: &ArrayRef, index: usize) -> bool {
4255        let right = arr2.slice(index, 1);
4256        arr1 == &right
4257    }
4258
4259    /// Compare `self` with `other` and return an `Ordering`.
4260    ///
4261    /// This is the same as [`PartialOrd`] except that it returns
4262    /// `Err` if the values cannot be compared, e.g., they have incompatible data types.
4263    pub fn try_cmp(&self, other: &Self) -> Result<Ordering> {
4264        self.partial_cmp(other).ok_or_else(|| {
4265            _internal_datafusion_err!("Uncomparable values: {self:?}, {other:?}")
4266        })
4267    }
4268
4269    /// Estimate size if bytes including `Self`. For values with internal containers such as `String`
4270    /// includes the allocated size (`capacity`) rather than the current length (`len`)
4271    pub fn size(&self) -> usize {
4272        size_of_val(self)
4273            + match self {
4274                ScalarValue::Null
4275                | ScalarValue::Boolean(_)
4276                | ScalarValue::Float16(_)
4277                | ScalarValue::Float32(_)
4278                | ScalarValue::Float64(_)
4279                | ScalarValue::Decimal32(_, _, _)
4280                | ScalarValue::Decimal64(_, _, _)
4281                | ScalarValue::Decimal128(_, _, _)
4282                | ScalarValue::Decimal256(_, _, _)
4283                | ScalarValue::Int8(_)
4284                | ScalarValue::Int16(_)
4285                | ScalarValue::Int32(_)
4286                | ScalarValue::Int64(_)
4287                | ScalarValue::UInt8(_)
4288                | ScalarValue::UInt16(_)
4289                | ScalarValue::UInt32(_)
4290                | ScalarValue::UInt64(_)
4291                | ScalarValue::Date32(_)
4292                | ScalarValue::Date64(_)
4293                | ScalarValue::Time32Second(_)
4294                | ScalarValue::Time32Millisecond(_)
4295                | ScalarValue::Time64Microsecond(_)
4296                | ScalarValue::Time64Nanosecond(_)
4297                | ScalarValue::IntervalYearMonth(_)
4298                | ScalarValue::IntervalDayTime(_)
4299                | ScalarValue::IntervalMonthDayNano(_)
4300                | ScalarValue::DurationSecond(_)
4301                | ScalarValue::DurationMillisecond(_)
4302                | ScalarValue::DurationMicrosecond(_)
4303                | ScalarValue::DurationNanosecond(_) => 0,
4304                ScalarValue::Utf8(s)
4305                | ScalarValue::LargeUtf8(s)
4306                | ScalarValue::Utf8View(s) => {
4307                    s.as_ref().map(|s| s.capacity()).unwrap_or_default()
4308                }
4309                ScalarValue::TimestampSecond(_, s)
4310                | ScalarValue::TimestampMillisecond(_, s)
4311                | ScalarValue::TimestampMicrosecond(_, s)
4312                | ScalarValue::TimestampNanosecond(_, s) => {
4313                    s.as_ref().map(|s| s.len()).unwrap_or_default()
4314                }
4315                ScalarValue::Binary(b)
4316                | ScalarValue::FixedSizeBinary(_, b)
4317                | ScalarValue::LargeBinary(b)
4318                | ScalarValue::BinaryView(b) => {
4319                    b.as_ref().map(|b| b.capacity()).unwrap_or_default()
4320                }
4321                ScalarValue::List(arr) => arr.get_array_memory_size(),
4322                ScalarValue::LargeList(arr) => arr.get_array_memory_size(),
4323                ScalarValue::FixedSizeList(arr) => arr.get_array_memory_size(),
4324                ScalarValue::Struct(arr) => arr.get_array_memory_size(),
4325                ScalarValue::Map(arr) => arr.get_array_memory_size(),
4326                ScalarValue::Union(vals, fields, _mode) => {
4327                    vals.as_ref()
4328                        .map(|(_id, sv)| sv.size() - size_of_val(sv))
4329                        .unwrap_or_default()
4330                        // `fields` is boxed, so it is NOT already included in `self`
4331                        + size_of_val(fields)
4332                        + (size_of::<Field>() * fields.len())
4333                        + fields.iter().map(|(_idx, field)| field.size() - size_of_val(field)).sum::<usize>()
4334                }
4335                ScalarValue::Dictionary(dt, sv) => {
4336                    // `dt` and `sv` are boxed, so they are NOT already included in `self`
4337                    dt.size() + sv.size()
4338                }
4339                ScalarValue::RunEndEncoded(rf, vf, v) => rf.size() + vf.size() + v.size(),
4340            }
4341    }
4342
4343    /// Estimates [size](Self::size) of [`Vec`] in bytes.
4344    ///
4345    /// Includes the size of the [`Vec`] container itself.
4346    pub fn size_of_vec(vec: &Vec<Self>) -> usize {
4347        size_of_val(vec)
4348            + (size_of::<ScalarValue>() * vec.capacity())
4349            + vec
4350                .iter()
4351                .map(|sv| sv.size() - size_of_val(sv))
4352                .sum::<usize>()
4353    }
4354
4355    /// Estimates [size](Self::size) of [`VecDeque`] in bytes.
4356    ///
4357    /// Includes the size of the [`VecDeque`] container itself.
4358    pub fn size_of_vec_deque(vec_deque: &VecDeque<Self>) -> usize {
4359        size_of_val(vec_deque)
4360            + (size_of::<ScalarValue>() * vec_deque.capacity())
4361            + vec_deque
4362                .iter()
4363                .map(|sv| sv.size() - size_of_val(sv))
4364                .sum::<usize>()
4365    }
4366
4367    /// Estimates [size](Self::size) of [`HashSet`] in bytes.
4368    ///
4369    /// Includes the size of the [`HashSet`] container itself.
4370    pub fn size_of_hashset<S>(set: &HashSet<Self, S>) -> usize {
4371        size_of_val(set)
4372            + (size_of::<ScalarValue>() * set.capacity())
4373            + set
4374                .iter()
4375                .map(|sv| sv.size() - size_of_val(sv))
4376                .sum::<usize>()
4377    }
4378
4379    /// Compacts the allocation referenced by `self` to the minimum, copying the data if
4380    /// necessary.
4381    ///
4382    /// This can be relevant when `self` is a list or contains a list as a nested value, as
4383    /// a single list holds an Arc to its entire original array buffer.
4384    pub fn compact(&mut self) {
4385        match self {
4386            ScalarValue::Null
4387            | ScalarValue::Boolean(_)
4388            | ScalarValue::Float16(_)
4389            | ScalarValue::Float32(_)
4390            | ScalarValue::Float64(_)
4391            | ScalarValue::Decimal32(_, _, _)
4392            | ScalarValue::Decimal64(_, _, _)
4393            | ScalarValue::Decimal128(_, _, _)
4394            | ScalarValue::Decimal256(_, _, _)
4395            | ScalarValue::Int8(_)
4396            | ScalarValue::Int16(_)
4397            | ScalarValue::Int32(_)
4398            | ScalarValue::Int64(_)
4399            | ScalarValue::UInt8(_)
4400            | ScalarValue::UInt16(_)
4401            | ScalarValue::UInt32(_)
4402            | ScalarValue::UInt64(_)
4403            | ScalarValue::Date32(_)
4404            | ScalarValue::Date64(_)
4405            | ScalarValue::Time32Second(_)
4406            | ScalarValue::Time32Millisecond(_)
4407            | ScalarValue::Time64Microsecond(_)
4408            | ScalarValue::Time64Nanosecond(_)
4409            | ScalarValue::IntervalYearMonth(_)
4410            | ScalarValue::IntervalDayTime(_)
4411            | ScalarValue::IntervalMonthDayNano(_)
4412            | ScalarValue::DurationSecond(_)
4413            | ScalarValue::DurationMillisecond(_)
4414            | ScalarValue::DurationMicrosecond(_)
4415            | ScalarValue::DurationNanosecond(_)
4416            | ScalarValue::Utf8(_)
4417            | ScalarValue::LargeUtf8(_)
4418            | ScalarValue::Utf8View(_)
4419            | ScalarValue::TimestampSecond(_, _)
4420            | ScalarValue::TimestampMillisecond(_, _)
4421            | ScalarValue::TimestampMicrosecond(_, _)
4422            | ScalarValue::TimestampNanosecond(_, _)
4423            | ScalarValue::Binary(_)
4424            | ScalarValue::FixedSizeBinary(_, _)
4425            | ScalarValue::LargeBinary(_)
4426            | ScalarValue::BinaryView(_) => (),
4427            ScalarValue::FixedSizeList(arr) => {
4428                let array = copy_array_data(&arr.to_data());
4429                *Arc::make_mut(arr) = FixedSizeListArray::from(array);
4430            }
4431            ScalarValue::List(arr) => {
4432                let array = copy_array_data(&arr.to_data());
4433                *Arc::make_mut(arr) = ListArray::from(array);
4434            }
4435            ScalarValue::LargeList(arr) => {
4436                let array = copy_array_data(&arr.to_data());
4437                *Arc::make_mut(arr) = LargeListArray::from(array)
4438            }
4439            ScalarValue::Struct(arr) => {
4440                let array = copy_array_data(&arr.to_data());
4441                *Arc::make_mut(arr) = StructArray::from(array);
4442            }
4443            ScalarValue::Map(arr) => {
4444                let array = copy_array_data(&arr.to_data());
4445                *Arc::make_mut(arr) = MapArray::from(array);
4446            }
4447            ScalarValue::Union(val, _, _) => {
4448                if let Some((_, value)) = val.as_mut() {
4449                    value.compact();
4450                }
4451            }
4452            ScalarValue::Dictionary(_, value) => {
4453                value.compact();
4454            }
4455            ScalarValue::RunEndEncoded(_, _, value) => {
4456                value.compact();
4457            }
4458        }
4459    }
4460
4461    /// Compacts ([ScalarValue::compact]) the current [ScalarValue] and returns it.
4462    pub fn compacted(mut self) -> Self {
4463        self.compact();
4464        self
4465    }
4466
4467    /// Returns the minimum value for the given numeric `DataType`.
4468    ///
4469    /// This function returns the smallest representable value for numeric
4470    /// and temporal data types. For non-numeric types, it returns `None`.
4471    ///
4472    /// # Supported Types
4473    ///
4474    /// - **Integer types**: `i8::MIN`, `i16::MIN`, etc.
4475    /// - **Unsigned types**: Always 0 (`u8::MIN`, `u16::MIN`, etc.)
4476    /// - **Float types**: Negative infinity (IEEE 754)
4477    /// - **Decimal types**: Smallest value based on precision
4478    /// - **Temporal types**: Minimum timestamp/date values
4479    /// - **Time types**: 0 (midnight)
4480    /// - **Duration types**: `i64::MIN`
4481    pub fn min(datatype: &DataType) -> Option<ScalarValue> {
4482        match datatype {
4483            DataType::Int8 => Some(ScalarValue::Int8(Some(i8::MIN))),
4484            DataType::Int16 => Some(ScalarValue::Int16(Some(i16::MIN))),
4485            DataType::Int32 => Some(ScalarValue::Int32(Some(i32::MIN))),
4486            DataType::Int64 => Some(ScalarValue::Int64(Some(i64::MIN))),
4487            DataType::UInt8 => Some(ScalarValue::UInt8(Some(u8::MIN))),
4488            DataType::UInt16 => Some(ScalarValue::UInt16(Some(u16::MIN))),
4489            DataType::UInt32 => Some(ScalarValue::UInt32(Some(u32::MIN))),
4490            DataType::UInt64 => Some(ScalarValue::UInt64(Some(u64::MIN))),
4491            DataType::Float16 => Some(ScalarValue::Float16(Some(f16::NEG_INFINITY))),
4492            DataType::Float32 => Some(ScalarValue::Float32(Some(f32::NEG_INFINITY))),
4493            DataType::Float64 => Some(ScalarValue::Float64(Some(f64::NEG_INFINITY))),
4494            DataType::Decimal128(precision, scale) => {
4495                // For decimal, min is -10^(precision-scale) + 10^(-scale)
4496                // But for simplicity, we use the minimum i128 value that fits the precision
4497                let max_digits = 10_i128.pow(*precision as u32) - 1;
4498                Some(ScalarValue::Decimal128(
4499                    Some(-max_digits),
4500                    *precision,
4501                    *scale,
4502                ))
4503            }
4504            DataType::Decimal256(precision, scale) => {
4505                // Similar to Decimal128 but with i256
4506                // For now, use a large negative value
4507                let max_digits = i256::from_i128(10_i128)
4508                    .checked_pow(*precision as u32)
4509                    .and_then(|v| v.checked_sub(i256::from_i128(1)))
4510                    .unwrap_or(i256::MAX);
4511                Some(ScalarValue::Decimal256(
4512                    Some(max_digits.neg_wrapping()),
4513                    *precision,
4514                    *scale,
4515                ))
4516            }
4517            DataType::Date32 => Some(ScalarValue::Date32(Some(i32::MIN))),
4518            DataType::Date64 => Some(ScalarValue::Date64(Some(i64::MIN))),
4519            DataType::Time32(TimeUnit::Second) => {
4520                Some(ScalarValue::Time32Second(Some(0)))
4521            }
4522            DataType::Time32(TimeUnit::Millisecond) => {
4523                Some(ScalarValue::Time32Millisecond(Some(0)))
4524            }
4525            DataType::Time64(TimeUnit::Microsecond) => {
4526                Some(ScalarValue::Time64Microsecond(Some(0)))
4527            }
4528            DataType::Time64(TimeUnit::Nanosecond) => {
4529                Some(ScalarValue::Time64Nanosecond(Some(0)))
4530            }
4531            DataType::Timestamp(unit, tz) => match unit {
4532                TimeUnit::Second => {
4533                    Some(ScalarValue::TimestampSecond(Some(i64::MIN), tz.clone()))
4534                }
4535                TimeUnit::Millisecond => Some(ScalarValue::TimestampMillisecond(
4536                    Some(i64::MIN),
4537                    tz.clone(),
4538                )),
4539                TimeUnit::Microsecond => Some(ScalarValue::TimestampMicrosecond(
4540                    Some(i64::MIN),
4541                    tz.clone(),
4542                )),
4543                TimeUnit::Nanosecond => {
4544                    Some(ScalarValue::TimestampNanosecond(Some(i64::MIN), tz.clone()))
4545                }
4546            },
4547            DataType::Duration(unit) => match unit {
4548                TimeUnit::Second => Some(ScalarValue::DurationSecond(Some(i64::MIN))),
4549                TimeUnit::Millisecond => {
4550                    Some(ScalarValue::DurationMillisecond(Some(i64::MIN)))
4551                }
4552                TimeUnit::Microsecond => {
4553                    Some(ScalarValue::DurationMicrosecond(Some(i64::MIN)))
4554                }
4555                TimeUnit::Nanosecond => {
4556                    Some(ScalarValue::DurationNanosecond(Some(i64::MIN)))
4557                }
4558            },
4559            _ => None,
4560        }
4561    }
4562
4563    /// Returns the maximum value for the given numeric `DataType`.
4564    ///
4565    /// This function returns the largest representable value for numeric
4566    /// and temporal data types. For non-numeric types, it returns `None`.
4567    ///
4568    /// # Supported Types
4569    ///
4570    /// - **Integer types**: `i8::MAX`, `i16::MAX`, etc.
4571    /// - **Unsigned types**: `u8::MAX`, `u16::MAX`, etc.
4572    /// - **Float types**: Positive infinity (IEEE 754)
4573    /// - **Decimal types**: Largest value based on precision
4574    /// - **Temporal types**: Maximum timestamp/date values
4575    /// - **Time types**: Maximum time in the day (1 day - 1 unit)
4576    /// - **Duration types**: `i64::MAX`
4577    pub fn max(datatype: &DataType) -> Option<ScalarValue> {
4578        match datatype {
4579            DataType::Int8 => Some(ScalarValue::Int8(Some(i8::MAX))),
4580            DataType::Int16 => Some(ScalarValue::Int16(Some(i16::MAX))),
4581            DataType::Int32 => Some(ScalarValue::Int32(Some(i32::MAX))),
4582            DataType::Int64 => Some(ScalarValue::Int64(Some(i64::MAX))),
4583            DataType::UInt8 => Some(ScalarValue::UInt8(Some(u8::MAX))),
4584            DataType::UInt16 => Some(ScalarValue::UInt16(Some(u16::MAX))),
4585            DataType::UInt32 => Some(ScalarValue::UInt32(Some(u32::MAX))),
4586            DataType::UInt64 => Some(ScalarValue::UInt64(Some(u64::MAX))),
4587            DataType::Float16 => Some(ScalarValue::Float16(Some(f16::INFINITY))),
4588            DataType::Float32 => Some(ScalarValue::Float32(Some(f32::INFINITY))),
4589            DataType::Float64 => Some(ScalarValue::Float64(Some(f64::INFINITY))),
4590            DataType::Decimal128(precision, scale) => {
4591                // For decimal, max is 10^(precision-scale) - 10^(-scale)
4592                // But for simplicity, we use the maximum i128 value that fits the precision
4593                let max_digits = 10_i128.pow(*precision as u32) - 1;
4594                Some(ScalarValue::Decimal128(
4595                    Some(max_digits),
4596                    *precision,
4597                    *scale,
4598                ))
4599            }
4600            DataType::Decimal256(precision, scale) => {
4601                // Similar to Decimal128 but with i256
4602                let max_digits = i256::from_i128(10_i128)
4603                    .checked_pow(*precision as u32)
4604                    .and_then(|v| v.checked_sub(i256::from_i128(1)))
4605                    .unwrap_or(i256::MAX);
4606                Some(ScalarValue::Decimal256(
4607                    Some(max_digits),
4608                    *precision,
4609                    *scale,
4610                ))
4611            }
4612            DataType::Date32 => Some(ScalarValue::Date32(Some(i32::MAX))),
4613            DataType::Date64 => Some(ScalarValue::Date64(Some(i64::MAX))),
4614            DataType::Time32(TimeUnit::Second) => {
4615                // 86399 seconds = 23:59:59
4616                Some(ScalarValue::Time32Second(Some(86_399)))
4617            }
4618            DataType::Time32(TimeUnit::Millisecond) => {
4619                // 86_399_999 milliseconds = 23:59:59.999
4620                Some(ScalarValue::Time32Millisecond(Some(86_399_999)))
4621            }
4622            DataType::Time64(TimeUnit::Microsecond) => {
4623                // 86_399_999_999 microseconds = 23:59:59.999999
4624                Some(ScalarValue::Time64Microsecond(Some(86_399_999_999)))
4625            }
4626            DataType::Time64(TimeUnit::Nanosecond) => {
4627                // 86_399_999_999_999 nanoseconds = 23:59:59.999999999
4628                Some(ScalarValue::Time64Nanosecond(Some(86_399_999_999_999)))
4629            }
4630            DataType::Timestamp(unit, tz) => match unit {
4631                TimeUnit::Second => {
4632                    Some(ScalarValue::TimestampSecond(Some(i64::MAX), tz.clone()))
4633                }
4634                TimeUnit::Millisecond => Some(ScalarValue::TimestampMillisecond(
4635                    Some(i64::MAX),
4636                    tz.clone(),
4637                )),
4638                TimeUnit::Microsecond => Some(ScalarValue::TimestampMicrosecond(
4639                    Some(i64::MAX),
4640                    tz.clone(),
4641                )),
4642                TimeUnit::Nanosecond => {
4643                    Some(ScalarValue::TimestampNanosecond(Some(i64::MAX), tz.clone()))
4644                }
4645            },
4646            DataType::Duration(unit) => match unit {
4647                TimeUnit::Second => Some(ScalarValue::DurationSecond(Some(i64::MAX))),
4648                TimeUnit::Millisecond => {
4649                    Some(ScalarValue::DurationMillisecond(Some(i64::MAX)))
4650                }
4651                TimeUnit::Microsecond => {
4652                    Some(ScalarValue::DurationMicrosecond(Some(i64::MAX)))
4653                }
4654                TimeUnit::Nanosecond => {
4655                    Some(ScalarValue::DurationNanosecond(Some(i64::MAX)))
4656                }
4657            },
4658            _ => None,
4659        }
4660    }
4661
4662    /// A thin wrapper on Arrow's validation that throws internal error if validation
4663    /// fails.
4664    fn validate_decimal_or_internal_err<T: DecimalType>(
4665        precision: u8,
4666        scale: i8,
4667    ) -> Result<()> {
4668        validate_decimal_precision_and_scale::<T>(precision, scale).map_err(|err| {
4669            _internal_datafusion_err!(
4670                "Decimal precision/scale invariant violated \
4671                 (precision={precision}, scale={scale}): {err}"
4672            )
4673        })
4674    }
4675}
4676
4677/// Compacts the data of an `ArrayData` into a new `ArrayData`.
4678///
4679/// This is useful when you want to minimize the memory footprint of an
4680/// `ArrayData`. For example, the value returned by [`Array::slice`] still
4681/// points at the same underlying data buffers as the original array, which may
4682/// hold many more values. Calling `copy_array_data` on the sliced array will
4683/// create a new, smaller, `ArrayData` that only contains the data for the
4684/// sliced array.
4685///
4686/// # Example
4687/// ```
4688/// # use arrow::array::{make_array, Array, Int32Array};
4689/// use datafusion_common::scalar::copy_array_data;
4690/// let array = Int32Array::from_iter_values(0..8192);
4691/// // Take only the first 2 elements
4692/// let sliced_array = array.slice(0, 2);
4693/// // The memory footprint of `sliced_array` is close to 8192 * 4 bytes
4694/// assert_eq!(32864, sliced_array.get_array_memory_size());
4695/// // however, we can copy the data to a new `ArrayData`
4696/// let new_array = make_array(copy_array_data(&sliced_array.into_data()));
4697/// // The memory footprint of `new_array` is now only 2 * 4 bytes
4698/// // and overhead:
4699/// assert_eq!(160, new_array.get_array_memory_size());
4700/// ```
4701///
4702/// See also [`ScalarValue::compact`] which applies to `ScalarValue` instances
4703/// as necessary.
4704pub fn copy_array_data(src_data: &ArrayData) -> ArrayData {
4705    let mut copy = MutableArrayData::new(vec![&src_data], true, src_data.len());
4706    copy.extend(0, 0, src_data.len());
4707    copy.freeze()
4708}
4709
4710macro_rules! impl_scalar {
4711    ($ty:ty, $scalar:tt) => {
4712        impl From<$ty> for ScalarValue {
4713            fn from(value: $ty) -> Self {
4714                ScalarValue::$scalar(Some(value))
4715            }
4716        }
4717
4718        impl From<Option<$ty>> for ScalarValue {
4719            fn from(value: Option<$ty>) -> Self {
4720                ScalarValue::$scalar(value)
4721            }
4722        }
4723    };
4724}
4725
4726impl_scalar!(f64, Float64);
4727impl_scalar!(f32, Float32);
4728impl_scalar!(f16, Float16);
4729impl_scalar!(i8, Int8);
4730impl_scalar!(i16, Int16);
4731impl_scalar!(i32, Int32);
4732impl_scalar!(i64, Int64);
4733impl_scalar!(bool, Boolean);
4734impl_scalar!(u8, UInt8);
4735impl_scalar!(u16, UInt16);
4736impl_scalar!(u32, UInt32);
4737impl_scalar!(u64, UInt64);
4738
4739impl From<&str> for ScalarValue {
4740    fn from(value: &str) -> Self {
4741        Some(value).into()
4742    }
4743}
4744
4745impl From<Option<&str>> for ScalarValue {
4746    fn from(value: Option<&str>) -> Self {
4747        let value = value.map(|s| s.to_string());
4748        value.into()
4749    }
4750}
4751
4752/// Wrapper to create ScalarValue::Struct for convenience
4753impl From<Vec<(&str, ScalarValue)>> for ScalarValue {
4754    fn from(value: Vec<(&str, ScalarValue)>) -> Self {
4755        value
4756            .into_iter()
4757            .fold(ScalarStructBuilder::new(), |builder, (name, value)| {
4758                builder.with_name_and_scalar(name, value)
4759            })
4760            .build()
4761            .unwrap()
4762    }
4763}
4764
4765impl FromStr for ScalarValue {
4766    type Err = Infallible;
4767
4768    fn from_str(s: &str) -> Result<Self, Self::Err> {
4769        Ok(s.into())
4770    }
4771}
4772
4773impl From<String> for ScalarValue {
4774    fn from(value: String) -> Self {
4775        Some(value).into()
4776    }
4777}
4778
4779impl From<Option<String>> for ScalarValue {
4780    fn from(value: Option<String>) -> Self {
4781        ScalarValue::Utf8(value)
4782    }
4783}
4784
4785macro_rules! impl_try_from {
4786    ($SCALAR:ident, $NATIVE:ident) => {
4787        impl TryFrom<ScalarValue> for $NATIVE {
4788            type Error = DataFusionError;
4789
4790            fn try_from(value: ScalarValue) -> Result<Self> {
4791                match value {
4792                    ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value),
4793                    _ => _internal_err!(
4794                        "Cannot convert {:?} to {}",
4795                        value,
4796                        std::any::type_name::<Self>()
4797                    ),
4798                }
4799            }
4800        }
4801    };
4802}
4803
4804impl_try_from!(Int8, i8);
4805impl_try_from!(Int16, i16);
4806
4807// special implementation for i32 because of Date32 and Time32
4808impl TryFrom<ScalarValue> for i32 {
4809    type Error = DataFusionError;
4810
4811    fn try_from(value: ScalarValue) -> Result<Self> {
4812        match value {
4813            ScalarValue::Int32(Some(inner_value))
4814            | ScalarValue::Date32(Some(inner_value))
4815            | ScalarValue::Time32Second(Some(inner_value))
4816            | ScalarValue::Time32Millisecond(Some(inner_value)) => Ok(inner_value),
4817            _ => _internal_err!(
4818                "Cannot convert {:?} to {}",
4819                value,
4820                std::any::type_name::<Self>()
4821            ),
4822        }
4823    }
4824}
4825
4826// special implementation for i64 because of Date64, Time64 and Timestamp
4827impl TryFrom<ScalarValue> for i64 {
4828    type Error = DataFusionError;
4829
4830    fn try_from(value: ScalarValue) -> Result<Self> {
4831        match value {
4832            ScalarValue::Int64(Some(inner_value))
4833            | ScalarValue::Date64(Some(inner_value))
4834            | ScalarValue::Time64Microsecond(Some(inner_value))
4835            | ScalarValue::Time64Nanosecond(Some(inner_value))
4836            | ScalarValue::TimestampNanosecond(Some(inner_value), _)
4837            | ScalarValue::TimestampMicrosecond(Some(inner_value), _)
4838            | ScalarValue::TimestampMillisecond(Some(inner_value), _)
4839            | ScalarValue::TimestampSecond(Some(inner_value), _) => Ok(inner_value),
4840            _ => _internal_err!(
4841                "Cannot convert {:?} to {}",
4842                value,
4843                std::any::type_name::<Self>()
4844            ),
4845        }
4846    }
4847}
4848
4849// special implementation for i128 because of Decimal128
4850impl TryFrom<ScalarValue> for i128 {
4851    type Error = DataFusionError;
4852
4853    fn try_from(value: ScalarValue) -> Result<Self> {
4854        match value {
4855            ScalarValue::Decimal128(Some(inner_value), _, _) => Ok(inner_value),
4856            _ => _internal_err!(
4857                "Cannot convert {:?} to {}",
4858                value,
4859                std::any::type_name::<Self>()
4860            ),
4861        }
4862    }
4863}
4864
4865// special implementation for i256 because of Decimal128
4866impl TryFrom<ScalarValue> for i256 {
4867    type Error = DataFusionError;
4868
4869    fn try_from(value: ScalarValue) -> Result<Self> {
4870        match value {
4871            ScalarValue::Decimal256(Some(inner_value), _, _) => Ok(inner_value),
4872            _ => _internal_err!(
4873                "Cannot convert {:?} to {}",
4874                value,
4875                std::any::type_name::<Self>()
4876            ),
4877        }
4878    }
4879}
4880
4881impl_try_from!(UInt8, u8);
4882impl_try_from!(UInt16, u16);
4883impl_try_from!(UInt32, u32);
4884impl_try_from!(UInt64, u64);
4885impl_try_from!(Float16, f16);
4886impl_try_from!(Float32, f32);
4887impl_try_from!(Float64, f64);
4888impl_try_from!(Boolean, bool);
4889
4890impl TryFrom<DataType> for ScalarValue {
4891    type Error = DataFusionError;
4892
4893    /// Create a Null instance of ScalarValue for this datatype
4894    fn try_from(datatype: DataType) -> Result<Self> {
4895        (&datatype).try_into()
4896    }
4897}
4898
4899impl TryFrom<&DataType> for ScalarValue {
4900    type Error = DataFusionError;
4901
4902    /// Create a Null instance of ScalarValue for this datatype
4903    fn try_from(data_type: &DataType) -> Result<Self> {
4904        Self::try_new_null(data_type)
4905    }
4906}
4907
4908macro_rules! format_option {
4909    ($F:expr, $EXPR:expr) => {{
4910        match $EXPR {
4911            Some(e) => write!($F, "{e}"),
4912            None => write!($F, "NULL"),
4913        }
4914    }};
4915}
4916
4917// Implement Display trait for ScalarValue
4918//
4919// # Panics
4920//
4921// Panics if there is an error when creating a visual representation of columns via `arrow::util::pretty`
4922impl fmt::Display for ScalarValue {
4923    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
4924        match self {
4925            ScalarValue::Decimal32(v, p, s) => {
4926                write!(f, "{v:?},{p:?},{s:?}")?;
4927            }
4928            ScalarValue::Decimal64(v, p, s) => {
4929                write!(f, "{v:?},{p:?},{s:?}")?;
4930            }
4931            ScalarValue::Decimal128(v, p, s) => {
4932                write!(f, "{v:?},{p:?},{s:?}")?;
4933            }
4934            ScalarValue::Decimal256(v, p, s) => {
4935                write!(f, "{v:?},{p:?},{s:?}")?;
4936            }
4937            ScalarValue::Boolean(e) => format_option!(f, e)?,
4938            ScalarValue::Float16(e) => format_option!(f, e)?,
4939            ScalarValue::Float32(e) => format_option!(f, e)?,
4940            ScalarValue::Float64(e) => format_option!(f, e)?,
4941            ScalarValue::Int8(e) => format_option!(f, e)?,
4942            ScalarValue::Int16(e) => format_option!(f, e)?,
4943            ScalarValue::Int32(e) => format_option!(f, e)?,
4944            ScalarValue::Int64(e) => format_option!(f, e)?,
4945            ScalarValue::UInt8(e) => format_option!(f, e)?,
4946            ScalarValue::UInt16(e) => format_option!(f, e)?,
4947            ScalarValue::UInt32(e) => format_option!(f, e)?,
4948            ScalarValue::UInt64(e) => format_option!(f, e)?,
4949            ScalarValue::TimestampSecond(e, _) => format_option!(f, e)?,
4950            ScalarValue::TimestampMillisecond(e, _) => format_option!(f, e)?,
4951            ScalarValue::TimestampMicrosecond(e, _) => format_option!(f, e)?,
4952            ScalarValue::TimestampNanosecond(e, _) => format_option!(f, e)?,
4953            ScalarValue::Utf8(e)
4954            | ScalarValue::LargeUtf8(e)
4955            | ScalarValue::Utf8View(e) => format_option!(f, e)?,
4956            ScalarValue::Binary(e)
4957            | ScalarValue::FixedSizeBinary(_, e)
4958            | ScalarValue::LargeBinary(e)
4959            | ScalarValue::BinaryView(e) => match e {
4960                Some(bytes) => {
4961                    // print up to first 10 bytes, with trailing ... if needed
4962                    for b in bytes.iter().take(10) {
4963                        write!(f, "{b:02X}")?;
4964                    }
4965                    if bytes.len() > 10 {
4966                        write!(f, "...")?;
4967                    }
4968                }
4969                None => write!(f, "NULL")?,
4970            },
4971            ScalarValue::List(arr) => fmt_list(arr.as_ref(), f)?,
4972            ScalarValue::LargeList(arr) => fmt_list(arr.as_ref(), f)?,
4973            ScalarValue::FixedSizeList(arr) => fmt_list(arr.as_ref(), f)?,
4974            ScalarValue::Date32(e) => format_option!(
4975                f,
4976                e.map(|v| {
4977                    let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
4978                    match epoch.checked_add_signed(Duration::try_days(v as i64).unwrap())
4979                    {
4980                        Some(date) => date.to_string(),
4981                        None => "".to_string(),
4982                    }
4983                })
4984            )?,
4985            ScalarValue::Date64(e) => format_option!(
4986                f,
4987                e.map(|v| {
4988                    let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
4989                    match epoch.checked_add_signed(Duration::try_milliseconds(v).unwrap())
4990                    {
4991                        Some(date) => date.to_string(),
4992                        None => "".to_string(),
4993                    }
4994                })
4995            )?,
4996            ScalarValue::Time32Second(e) => format_option!(f, e)?,
4997            ScalarValue::Time32Millisecond(e) => format_option!(f, e)?,
4998            ScalarValue::Time64Microsecond(e) => format_option!(f, e)?,
4999            ScalarValue::Time64Nanosecond(e) => format_option!(f, e)?,
5000            ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
5001            ScalarValue::IntervalMonthDayNano(e) => {
5002                format_option!(f, e.map(|v| format!("{v:?}")))?
5003            }
5004            ScalarValue::IntervalDayTime(e) => {
5005                format_option!(f, e.map(|v| format!("{v:?}")))?;
5006            }
5007            ScalarValue::DurationSecond(e) => format_option!(f, e)?,
5008            ScalarValue::DurationMillisecond(e) => format_option!(f, e)?,
5009            ScalarValue::DurationMicrosecond(e) => format_option!(f, e)?,
5010            ScalarValue::DurationNanosecond(e) => format_option!(f, e)?,
5011            ScalarValue::Struct(struct_arr) => {
5012                // ScalarValue Struct should always have a single element
5013                assert_eq!(struct_arr.len(), 1);
5014
5015                if struct_arr.null_count() == struct_arr.len() {
5016                    write!(f, "NULL")?;
5017                    return Ok(());
5018                }
5019
5020                let columns = struct_arr.columns();
5021                let fields = struct_arr.fields();
5022                let nulls = struct_arr.nulls();
5023
5024                write!(
5025                    f,
5026                    "{{{}}}",
5027                    columns
5028                        .iter()
5029                        .zip(fields.iter())
5030                        .map(|(column, field)| {
5031                            if nulls.is_some_and(|b| b.is_null(0)) {
5032                                format!("{}:NULL", field.name())
5033                            } else if let DataType::Struct(_) = field.data_type() {
5034                                let sv = ScalarValue::Struct(Arc::new(
5035                                    column.as_struct().to_owned(),
5036                                ));
5037                                format!("{}:{sv}", field.name())
5038                            } else {
5039                                let sv = array_value_to_string(column, 0).unwrap();
5040                                format!("{}:{sv}", field.name())
5041                            }
5042                        })
5043                        .collect::<Vec<_>>()
5044                        .join(",")
5045                )?
5046            }
5047            ScalarValue::Map(map_arr) => {
5048                if map_arr.null_count() == map_arr.len() {
5049                    write!(f, "NULL")?;
5050                    return Ok(());
5051                }
5052
5053                write!(
5054                    f,
5055                    "[{}]",
5056                    map_arr
5057                        .iter()
5058                        .map(|struct_array| {
5059                            if let Some(arr) = struct_array {
5060                                let mut buffer = VecDeque::new();
5061                                for i in 0..arr.len() {
5062                                    let key =
5063                                        array_value_to_string(arr.column(0), i).unwrap();
5064                                    let value =
5065                                        array_value_to_string(arr.column(1), i).unwrap();
5066                                    buffer.push_back(format!("{key}:{value}"));
5067                                }
5068                                format!(
5069                                    "{{{}}}",
5070                                    buffer
5071                                        .into_iter()
5072                                        .collect::<Vec<_>>()
5073                                        .join(",")
5074                                        .as_str()
5075                                )
5076                            } else {
5077                                "NULL".to_string()
5078                            }
5079                        })
5080                        .collect::<Vec<_>>()
5081                        .join(",")
5082                )?
5083            }
5084            ScalarValue::Union(val, _fields, _mode) => match val {
5085                Some((id, val)) => write!(f, "{id}:{val}")?,
5086                None => write!(f, "NULL")?,
5087            },
5088            ScalarValue::Dictionary(_k, v) => write!(f, "{v}")?,
5089            ScalarValue::RunEndEncoded(_, _, v) => write!(f, "{v}")?,
5090            ScalarValue::Null => write!(f, "NULL")?,
5091        };
5092        Ok(())
5093    }
5094}
5095
5096fn fmt_list(arr: &dyn Array, f: &mut fmt::Formatter) -> fmt::Result {
5097    // ScalarValue List, LargeList, FixedSizeList should always have a single element
5098    assert_eq!(arr.len(), 1);
5099    let options = FormatOptions::default().with_display_error(true);
5100    let formatter = ArrayFormatter::try_new(arr, &options).unwrap();
5101    let value_formatter = formatter.value(0);
5102    write!(f, "{value_formatter}")
5103}
5104
5105/// writes a byte array to formatter. `[1, 2, 3]` ==> `"1,2,3"`
5106fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result {
5107    let mut iter = data.iter();
5108    if let Some(b) = iter.next() {
5109        write!(f, "{b}")?;
5110    }
5111    for b in iter {
5112        write!(f, ",{b}")?;
5113    }
5114    Ok(())
5115}
5116
5117impl fmt::Debug for ScalarValue {
5118    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
5119        match self {
5120            ScalarValue::Decimal32(_, _, _) => write!(f, "Decimal32({self})"),
5121            ScalarValue::Decimal64(_, _, _) => write!(f, "Decimal64({self})"),
5122            ScalarValue::Decimal128(_, _, _) => write!(f, "Decimal128({self})"),
5123            ScalarValue::Decimal256(_, _, _) => write!(f, "Decimal256({self})"),
5124            ScalarValue::Boolean(_) => write!(f, "Boolean({self})"),
5125            ScalarValue::Float16(_) => write!(f, "Float16({self})"),
5126            ScalarValue::Float32(_) => write!(f, "Float32({self})"),
5127            ScalarValue::Float64(_) => write!(f, "Float64({self})"),
5128            ScalarValue::Int8(_) => write!(f, "Int8({self})"),
5129            ScalarValue::Int16(_) => write!(f, "Int16({self})"),
5130            ScalarValue::Int32(_) => write!(f, "Int32({self})"),
5131            ScalarValue::Int64(_) => write!(f, "Int64({self})"),
5132            ScalarValue::UInt8(_) => write!(f, "UInt8({self})"),
5133            ScalarValue::UInt16(_) => write!(f, "UInt16({self})"),
5134            ScalarValue::UInt32(_) => write!(f, "UInt32({self})"),
5135            ScalarValue::UInt64(_) => write!(f, "UInt64({self})"),
5136            ScalarValue::TimestampSecond(_, tz_opt) => {
5137                write!(f, "TimestampSecond({self}, {tz_opt:?})")
5138            }
5139            ScalarValue::TimestampMillisecond(_, tz_opt) => {
5140                write!(f, "TimestampMillisecond({self}, {tz_opt:?})")
5141            }
5142            ScalarValue::TimestampMicrosecond(_, tz_opt) => {
5143                write!(f, "TimestampMicrosecond({self}, {tz_opt:?})")
5144            }
5145            ScalarValue::TimestampNanosecond(_, tz_opt) => {
5146                write!(f, "TimestampNanosecond({self}, {tz_opt:?})")
5147            }
5148            ScalarValue::Utf8(None) => write!(f, "Utf8({self})"),
5149            ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{self}\")"),
5150            ScalarValue::Utf8View(None) => write!(f, "Utf8View({self})"),
5151            ScalarValue::Utf8View(Some(_)) => write!(f, "Utf8View(\"{self}\")"),
5152            ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({self})"),
5153            ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{self}\")"),
5154            ScalarValue::Binary(None) => write!(f, "Binary({self})"),
5155            ScalarValue::Binary(Some(b)) => {
5156                write!(f, "Binary(\"")?;
5157                fmt_binary(b.as_slice(), f)?;
5158                write!(f, "\")")
5159            }
5160            ScalarValue::BinaryView(None) => write!(f, "BinaryView({self})"),
5161            ScalarValue::BinaryView(Some(b)) => {
5162                write!(f, "BinaryView(\"")?;
5163                fmt_binary(b.as_slice(), f)?;
5164                write!(f, "\")")
5165            }
5166            ScalarValue::FixedSizeBinary(size, None) => {
5167                write!(f, "FixedSizeBinary({size}, {self})")
5168            }
5169            ScalarValue::FixedSizeBinary(size, Some(b)) => {
5170                write!(f, "FixedSizeBinary({size}, \"")?;
5171                fmt_binary(b.as_slice(), f)?;
5172                write!(f, "\")")
5173            }
5174            ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({self})"),
5175            ScalarValue::LargeBinary(Some(b)) => {
5176                write!(f, "LargeBinary(\"")?;
5177                fmt_binary(b.as_slice(), f)?;
5178                write!(f, "\")")
5179            }
5180            ScalarValue::FixedSizeList(_) => write!(f, "FixedSizeList({self})"),
5181            ScalarValue::List(_) => write!(f, "List({self})"),
5182            ScalarValue::LargeList(_) => write!(f, "LargeList({self})"),
5183            ScalarValue::Struct(struct_arr) => {
5184                // ScalarValue Struct should always have a single element
5185                assert_eq!(struct_arr.len(), 1);
5186
5187                let columns = struct_arr.columns();
5188                let fields = struct_arr.fields();
5189
5190                write!(
5191                    f,
5192                    "Struct({{{}}})",
5193                    columns
5194                        .iter()
5195                        .zip(fields.iter())
5196                        .map(|(column, field)| {
5197                            let sv = array_value_to_string(column, 0).unwrap();
5198                            let name = field.name();
5199                            format!("{name}:{sv}")
5200                        })
5201                        .collect::<Vec<_>>()
5202                        .join(",")
5203                )
5204            }
5205            ScalarValue::Map(map_arr) => {
5206                write!(
5207                    f,
5208                    "Map([{}])",
5209                    map_arr
5210                        .iter()
5211                        .map(|struct_array| {
5212                            if let Some(arr) = struct_array {
5213                                let buffer: Vec<String> = (0..arr.len())
5214                                    .map(|i| {
5215                                        let key = array_value_to_string(arr.column(0), i)
5216                                            .unwrap();
5217                                        let value =
5218                                            array_value_to_string(arr.column(1), i)
5219                                                .unwrap();
5220                                        format!("{key:?}:{value:?}")
5221                                    })
5222                                    .collect();
5223                                format!("{{{}}}", buffer.join(","))
5224                            } else {
5225                                "NULL".to_string()
5226                            }
5227                        })
5228                        .collect::<Vec<_>>()
5229                        .join(",")
5230                )
5231            }
5232            ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"),
5233            ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"),
5234            ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{self}\")"),
5235            ScalarValue::Time32Millisecond(_) => {
5236                write!(f, "Time32Millisecond(\"{self}\")")
5237            }
5238            ScalarValue::Time64Microsecond(_) => {
5239                write!(f, "Time64Microsecond(\"{self}\")")
5240            }
5241            ScalarValue::Time64Nanosecond(_) => {
5242                write!(f, "Time64Nanosecond(\"{self}\")")
5243            }
5244            ScalarValue::IntervalDayTime(_) => {
5245                write!(f, "IntervalDayTime(\"{self}\")")
5246            }
5247            ScalarValue::IntervalYearMonth(_) => {
5248                write!(f, "IntervalYearMonth(\"{self}\")")
5249            }
5250            ScalarValue::IntervalMonthDayNano(_) => {
5251                write!(f, "IntervalMonthDayNano(\"{self}\")")
5252            }
5253            ScalarValue::DurationSecond(_) => write!(f, "DurationSecond(\"{self}\")"),
5254            ScalarValue::DurationMillisecond(_) => {
5255                write!(f, "DurationMillisecond(\"{self}\")")
5256            }
5257            ScalarValue::DurationMicrosecond(_) => {
5258                write!(f, "DurationMicrosecond(\"{self}\")")
5259            }
5260            ScalarValue::DurationNanosecond(_) => {
5261                write!(f, "DurationNanosecond(\"{self}\")")
5262            }
5263            ScalarValue::Union(val, _fields, _mode) => match val {
5264                Some((id, val)) => write!(f, "Union {id}:{val}"),
5265                None => write!(f, "Union(NULL)"),
5266            },
5267            ScalarValue::Dictionary(k, v) => write!(f, "Dictionary({k:?}, {v:?})"),
5268            ScalarValue::RunEndEncoded(rf, vf, v) => {
5269                write!(f, "RunEndEncoded({rf:?}, {vf:?}, {v:?})")
5270            }
5271            ScalarValue::Null => write!(f, "NULL"),
5272        }
5273    }
5274}
5275
5276/// Trait used to map a NativeType to a ScalarValue
5277pub trait ScalarType<T: ArrowNativeType> {
5278    /// returns a scalar from an optional T
5279    fn scalar(r: Option<T>) -> ScalarValue;
5280}
5281
5282impl ScalarType<f32> for Float32Type {
5283    fn scalar(r: Option<f32>) -> ScalarValue {
5284        ScalarValue::Float32(r)
5285    }
5286}
5287
5288impl ScalarType<i64> for TimestampSecondType {
5289    fn scalar(r: Option<i64>) -> ScalarValue {
5290        ScalarValue::TimestampSecond(r, None)
5291    }
5292}
5293
5294impl ScalarType<i64> for TimestampMillisecondType {
5295    fn scalar(r: Option<i64>) -> ScalarValue {
5296        ScalarValue::TimestampMillisecond(r, None)
5297    }
5298}
5299
5300impl ScalarType<i64> for TimestampMicrosecondType {
5301    fn scalar(r: Option<i64>) -> ScalarValue {
5302        ScalarValue::TimestampMicrosecond(r, None)
5303    }
5304}
5305
5306impl ScalarType<i64> for TimestampNanosecondType {
5307    fn scalar(r: Option<i64>) -> ScalarValue {
5308        ScalarValue::TimestampNanosecond(r, None)
5309    }
5310}
5311
5312impl ScalarType<i32> for Date32Type {
5313    fn scalar(r: Option<i32>) -> ScalarValue {
5314        ScalarValue::Date32(r)
5315    }
5316}
5317
5318#[cfg(test)]
5319mod tests {
5320    use std::sync::Arc;
5321
5322    use super::*;
5323    use crate::cast::{as_list_array, as_map_array, as_struct_array};
5324    use crate::test_util::batches_to_string;
5325    use arrow::array::{
5326        FixedSizeListBuilder, Int32Builder, LargeListBuilder, ListBuilder, MapBuilder,
5327        NullArray, NullBufferBuilder, OffsetSizeTrait, PrimitiveBuilder, RecordBatch,
5328        StringBuilder, StringDictionaryBuilder, StructBuilder, UnionBuilder,
5329    };
5330    use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer};
5331    use arrow::compute::{is_null, kernels};
5332    use arrow::datatypes::{
5333        ArrowNumericType, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, Fields,
5334        Float64Type, TimeUnit,
5335    };
5336    use arrow::error::ArrowError;
5337    use arrow::util::pretty::pretty_format_columns;
5338    use chrono::NaiveDate;
5339    use insta::assert_snapshot;
5340    use rand::Rng;
5341
5342    #[test]
5343    fn test_scalar_value_from_for_map() {
5344        let string_builder = StringBuilder::new();
5345        let int_builder = Int32Builder::with_capacity(4);
5346        let mut builder = MapBuilder::new(None, string_builder, int_builder);
5347        builder.keys().append_value("joe");
5348        builder.values().append_value(1);
5349        builder.append(true).unwrap();
5350
5351        builder.keys().append_value("blogs");
5352        builder.values().append_value(2);
5353        builder.keys().append_value("foo");
5354        builder.values().append_value(4);
5355        builder.append(true).unwrap();
5356        builder.append(true).unwrap();
5357        builder.append(false).unwrap();
5358
5359        let expected = builder.finish();
5360
5361        let sv = ScalarValue::Map(Arc::new(expected.clone()));
5362        let map_arr = sv.to_array().unwrap();
5363        let actual = as_map_array(&map_arr).unwrap();
5364        assert_eq!(actual, &expected);
5365    }
5366
5367    #[test]
5368    fn test_format_timestamp_type_for_error_and_bounds() {
5369        // format helper
5370        let ts_ns = format_timestamp_type_for_error(&DataType::Timestamp(
5371            TimeUnit::Nanosecond,
5372            None,
5373        ));
5374        assert_eq!(ts_ns, "Timestamp(ns)");
5375
5376        let ts_us = format_timestamp_type_for_error(&DataType::Timestamp(
5377            TimeUnit::Microsecond,
5378            None,
5379        ));
5380        assert_eq!(ts_us, "Timestamp(us)");
5381
5382        // ensure_timestamp_in_bounds: Date32 non-overflow
5383        let ok = ensure_timestamp_in_bounds(
5384            1000,
5385            NANOS_PER_DAY,
5386            &DataType::Date32,
5387            &DataType::Timestamp(TimeUnit::Nanosecond, None),
5388        );
5389        assert!(ok.is_ok());
5390
5391        // Date32 overflow -- known large day value (9999-12-31 -> 2932896)
5392        let err = ensure_timestamp_in_bounds(
5393            2932896,
5394            NANOS_PER_DAY,
5395            &DataType::Date32,
5396            &DataType::Timestamp(TimeUnit::Nanosecond, None),
5397        );
5398        assert!(err.is_err());
5399        let msg = err.unwrap_err().to_string();
5400        assert!(msg.contains("Cannot cast Date32 value 2932896 to Timestamp(ns): converted value exceeds the representable i64 range"));
5401
5402        // Date64 overflow for ns (millis * 1_000_000)
5403        let overflow_millis: i64 = (i64::MAX / NANOS_PER_MILLISECOND) + 1;
5404        let err2 = ensure_timestamp_in_bounds(
5405            overflow_millis,
5406            NANOS_PER_MILLISECOND,
5407            &DataType::Date64,
5408            &DataType::Timestamp(TimeUnit::Nanosecond, None),
5409        );
5410        assert!(err2.is_err());
5411    }
5412
5413    #[test]
5414    fn test_scalar_value_from_for_struct() {
5415        let boolean = Arc::new(BooleanArray::from(vec![false]));
5416        let int = Arc::new(Int32Array::from(vec![42]));
5417
5418        let expected = StructArray::from(vec![
5419            (
5420                Arc::new(Field::new("b", DataType::Boolean, false)),
5421                Arc::clone(&boolean) as ArrayRef,
5422            ),
5423            (
5424                Arc::new(Field::new("c", DataType::Int32, false)),
5425                Arc::clone(&int) as ArrayRef,
5426            ),
5427        ]);
5428
5429        let sv = ScalarStructBuilder::new()
5430            .with_array(Field::new("b", DataType::Boolean, false), boolean)
5431            .with_array(Field::new("c", DataType::Int32, false), int)
5432            .build()
5433            .unwrap();
5434
5435        let struct_arr = sv.to_array().unwrap();
5436        let actual = as_struct_array(&struct_arr).unwrap();
5437        assert_eq!(actual, &expected);
5438    }
5439
5440    #[test]
5441    #[should_panic(
5442        expected = "InvalidArgumentError(\"Incorrect array length for StructArray field \\\"bool\\\", expected 1 got 4\")"
5443    )]
5444    fn test_scalar_value_from_for_struct_should_panic() {
5445        let _ = ScalarStructBuilder::new()
5446            .with_array(
5447                Field::new("bool", DataType::Boolean, false),
5448                Arc::new(BooleanArray::from(vec![false, true, false, false])),
5449            )
5450            .with_array(
5451                Field::new("i32", DataType::Int32, false),
5452                Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
5453            )
5454            .build()
5455            .unwrap();
5456    }
5457
5458    #[test]
5459    fn test_to_array_of_size_for_nested() {
5460        // Struct
5461        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
5462        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
5463
5464        let struct_array = StructArray::from(vec![
5465            (
5466                Arc::new(Field::new("b", DataType::Boolean, false)),
5467                Arc::clone(&boolean) as ArrayRef,
5468            ),
5469            (
5470                Arc::new(Field::new("c", DataType::Int32, false)),
5471                Arc::clone(&int) as ArrayRef,
5472            ),
5473        ]);
5474        let sv = ScalarValue::Struct(Arc::new(struct_array));
5475        let actual_arr = sv.to_array_of_size(2).unwrap();
5476
5477        let boolean = Arc::new(BooleanArray::from(vec![
5478            false, false, true, true, false, false, true, true,
5479        ]));
5480        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31, 42, 28, 19, 31]));
5481
5482        let struct_array = StructArray::from(vec![
5483            (
5484                Arc::new(Field::new("b", DataType::Boolean, false)),
5485                Arc::clone(&boolean) as ArrayRef,
5486            ),
5487            (
5488                Arc::new(Field::new("c", DataType::Int32, false)),
5489                Arc::clone(&int) as ArrayRef,
5490            ),
5491        ]);
5492
5493        let actual = as_struct_array(&actual_arr).unwrap();
5494        assert_eq!(actual, &struct_array);
5495
5496        // List
5497        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5498            Some(1),
5499            None,
5500            Some(2),
5501        ])]);
5502
5503        let sv = ScalarValue::List(Arc::new(arr));
5504        let actual_arr = sv
5505            .to_array_of_size(2)
5506            .expect("Failed to convert to array of size");
5507        let actual_list_arr = actual_arr.as_list::<i32>();
5508
5509        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5510            Some(vec![Some(1), None, Some(2)]),
5511            Some(vec![Some(1), None, Some(2)]),
5512        ]);
5513
5514        assert_eq!(&arr, actual_list_arr);
5515    }
5516
5517    #[test]
5518    fn test_to_array_of_size_for_fsl() {
5519        let values = Int32Array::from_iter([Some(1), None, Some(2)]);
5520        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
5521        let arr = FixedSizeListArray::new(Arc::clone(&field), 3, Arc::new(values), None);
5522        let sv = ScalarValue::FixedSizeList(Arc::new(arr));
5523        let actual_arr = sv
5524            .to_array_of_size(2)
5525            .expect("Failed to convert to array of size");
5526
5527        let expected_values =
5528            Int32Array::from_iter([Some(1), None, Some(2), Some(1), None, Some(2)]);
5529        let expected_arr =
5530            FixedSizeListArray::new(field, 3, Arc::new(expected_values), None);
5531
5532        assert_eq!(
5533            &expected_arr,
5534            as_fixed_size_list_array(actual_arr.as_ref()).unwrap()
5535        );
5536
5537        let empty_array = sv
5538            .to_array_of_size(0)
5539            .expect("Failed to convert to empty array");
5540
5541        assert_eq!(empty_array.len(), 0);
5542    }
5543
5544    #[test]
5545    fn test_to_array_of_size_list_size_one() {
5546        // size=1 takes the fast path (Arc::clone)
5547        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5548            Some(10),
5549            Some(20),
5550        ])]);
5551        let sv = ScalarValue::List(Arc::new(arr.clone()));
5552        let result = sv.to_array_of_size(1).unwrap();
5553        assert_eq!(result.as_list::<i32>(), &arr);
5554    }
5555
5556    #[test]
5557    fn test_to_array_of_size_list_empty_inner() {
5558        // A list scalar containing an empty list: [[]]
5559        let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![])]);
5560        let sv = ScalarValue::List(Arc::new(arr));
5561        let result = sv.to_array_of_size(3).unwrap();
5562        let result_list = result.as_list::<i32>();
5563        assert_eq!(result_list.len(), 3);
5564        for i in 0..3 {
5565            assert_eq!(result_list.value(i).len(), 0);
5566        }
5567    }
5568
5569    #[test]
5570    fn test_to_array_of_size_large_list() {
5571        let arr =
5572            LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5573                Some(100),
5574                Some(200),
5575            ])]);
5576        let sv = ScalarValue::LargeList(Arc::new(arr));
5577        let result = sv.to_array_of_size(3).unwrap();
5578        let expected = LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5579            Some(vec![Some(100), Some(200)]),
5580            Some(vec![Some(100), Some(200)]),
5581            Some(vec![Some(100), Some(200)]),
5582        ]);
5583        assert_eq!(result.as_list::<i64>(), &expected);
5584    }
5585
5586    #[test]
5587    fn test_list_to_array_of_size_multi_row() {
5588        // Call list_to_array_of_size directly with arr.len() > 1
5589        let arr = Int32Array::from(vec![Some(10), None, Some(30)]);
5590        let result = ScalarValue::list_to_array_of_size(&arr, 3).unwrap();
5591        let result = result.as_primitive::<Int32Type>();
5592        assert_eq!(
5593            result.iter().collect::<Vec<_>>(),
5594            vec![
5595                Some(10),
5596                None,
5597                Some(30),
5598                Some(10),
5599                None,
5600                Some(30),
5601                Some(10),
5602                None,
5603                Some(30),
5604            ]
5605        );
5606    }
5607
5608    #[test]
5609    fn test_to_array_of_size_null_list() {
5610        let dt = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
5611        let sv = ScalarValue::try_from(&dt).unwrap();
5612        let result = sv.to_array_of_size(3).unwrap();
5613        assert_eq!(result.len(), 3);
5614        assert_eq!(result.null_count(), 3);
5615    }
5616
5617    /// See https://github.com/apache/datafusion/issues/18870
5618    #[test]
5619    fn test_to_array_of_size_for_none_fsb() {
5620        let sv = ScalarValue::FixedSizeBinary(5, None);
5621        let result = sv
5622            .to_array_of_size(2)
5623            .expect("Failed to convert to array of size");
5624        assert_eq!(result.len(), 2);
5625        assert_eq!(result.null_count(), 2);
5626        assert_eq!(result.as_fixed_size_binary().values().len(), 10);
5627    }
5628
5629    #[test]
5630    fn test_list_to_array_string() {
5631        let scalars = vec![
5632            ScalarValue::from("rust"),
5633            ScalarValue::from("arrow"),
5634            ScalarValue::from("data-fusion"),
5635        ];
5636
5637        let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8);
5638
5639        let expected = single_row_list_array(vec!["rust", "arrow", "data-fusion"]);
5640        assert_eq!(*result, expected);
5641    }
5642
5643    fn single_row_list_array(items: Vec<&str>) -> ListArray {
5644        SingleRowListArrayBuilder::new(Arc::new(StringArray::from(items)))
5645            .build_list_array()
5646    }
5647
5648    fn build_list<O: OffsetSizeTrait>(
5649        values: Vec<Option<Vec<Option<i64>>>>,
5650    ) -> Vec<ScalarValue> {
5651        values
5652            .into_iter()
5653            .map(|v| {
5654                let arr = if v.is_some() {
5655                    Arc::new(
5656                        GenericListArray::<O>::from_iter_primitive::<Int64Type, _, _>(
5657                            vec![v],
5658                        ),
5659                    )
5660                } else if O::IS_LARGE {
5661                    new_null_array(
5662                        &DataType::LargeList(Arc::new(Field::new_list_field(
5663                            DataType::Int64,
5664                            true,
5665                        ))),
5666                        1,
5667                    )
5668                } else {
5669                    new_null_array(
5670                        &DataType::List(Arc::new(Field::new_list_field(
5671                            DataType::Int64,
5672                            true,
5673                        ))),
5674                        1,
5675                    )
5676                };
5677
5678                if O::IS_LARGE {
5679                    ScalarValue::LargeList(arr.as_list::<i64>().to_owned().into())
5680                } else {
5681                    ScalarValue::List(arr.as_list::<i32>().to_owned().into())
5682                }
5683            })
5684            .collect()
5685    }
5686
5687    #[test]
5688    fn test_iter_to_array_fixed_size_list() {
5689        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
5690        let f1 = Arc::new(FixedSizeListArray::new(
5691            Arc::clone(&field),
5692            3,
5693            Arc::new(Int32Array::from(vec![1, 2, 3])),
5694            None,
5695        ));
5696        let f2 = Arc::new(FixedSizeListArray::new(
5697            Arc::clone(&field),
5698            3,
5699            Arc::new(Int32Array::from(vec![4, 5, 6])),
5700            None,
5701        ));
5702        let f_nulls = Arc::new(FixedSizeListArray::new_null(field, 1, 1));
5703
5704        let scalars = vec![
5705            ScalarValue::FixedSizeList(Arc::clone(&f_nulls)),
5706            ScalarValue::FixedSizeList(f1),
5707            ScalarValue::FixedSizeList(f2),
5708            ScalarValue::FixedSizeList(f_nulls),
5709        ];
5710
5711        let array = ScalarValue::iter_to_array(scalars).unwrap();
5712
5713        let expected = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
5714            vec![
5715                None,
5716                Some(vec![Some(1), Some(2), Some(3)]),
5717                Some(vec![Some(4), Some(5), Some(6)]),
5718                None,
5719            ],
5720            3,
5721        );
5722        assert_eq!(array.as_ref(), &expected);
5723    }
5724
5725    #[test]
5726    fn test_iter_to_array_struct() {
5727        let s1 = StructArray::from(vec![
5728            (
5729                Arc::new(Field::new("A", DataType::Boolean, false)),
5730                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
5731            ),
5732            (
5733                Arc::new(Field::new("B", DataType::Int32, false)),
5734                Arc::new(Int32Array::from(vec![42])) as ArrayRef,
5735            ),
5736        ]);
5737
5738        let s2 = StructArray::from(vec![
5739            (
5740                Arc::new(Field::new("A", DataType::Boolean, false)),
5741                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
5742            ),
5743            (
5744                Arc::new(Field::new("B", DataType::Int32, false)),
5745                Arc::new(Int32Array::from(vec![42])) as ArrayRef,
5746            ),
5747        ]);
5748
5749        let scalars = vec![
5750            ScalarValue::Struct(Arc::new(s1)),
5751            ScalarValue::Struct(Arc::new(s2)),
5752        ];
5753
5754        let array = ScalarValue::iter_to_array(scalars).unwrap();
5755
5756        let expected = StructArray::from(vec![
5757            (
5758                Arc::new(Field::new("A", DataType::Boolean, false)),
5759                Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
5760            ),
5761            (
5762                Arc::new(Field::new("B", DataType::Int32, false)),
5763                Arc::new(Int32Array::from(vec![42, 42])) as ArrayRef,
5764            ),
5765        ]);
5766        assert_eq!(array.as_ref(), &expected);
5767    }
5768
5769    #[test]
5770    fn test_iter_to_array_struct_with_nulls() {
5771        // non-null
5772        let s1 = StructArray::from((
5773            vec![
5774                (
5775                    Arc::new(Field::new("A", DataType::Int32, false)),
5776                    Arc::new(Int32Array::from(vec![1])) as ArrayRef,
5777                ),
5778                (
5779                    Arc::new(Field::new("B", DataType::Int64, false)),
5780                    Arc::new(Int64Array::from(vec![2])) as ArrayRef,
5781                ),
5782            ],
5783            // Present the null mask, 1 is non-null, 0 is null
5784            Buffer::from(&[1]),
5785        ));
5786
5787        // null
5788        let s2 = StructArray::from((
5789            vec![
5790                (
5791                    Arc::new(Field::new("A", DataType::Int32, false)),
5792                    Arc::new(Int32Array::from(vec![3])) as ArrayRef,
5793                ),
5794                (
5795                    Arc::new(Field::new("B", DataType::Int64, false)),
5796                    Arc::new(Int64Array::from(vec![4])) as ArrayRef,
5797                ),
5798            ],
5799            Buffer::from(&[0]),
5800        ));
5801
5802        let scalars = vec![
5803            ScalarValue::Struct(Arc::new(s1)),
5804            ScalarValue::Struct(Arc::new(s2)),
5805        ];
5806
5807        let array = ScalarValue::iter_to_array(scalars).unwrap();
5808        let struct_array = array.as_struct();
5809        assert!(struct_array.is_valid(0));
5810        assert!(struct_array.is_null(1));
5811    }
5812
5813    #[test]
5814    fn iter_to_array_primitive_test() {
5815        // List[[1,2,3]], List[null], List[[4,5]]
5816        let scalars = build_list::<i32>(vec![
5817            Some(vec![Some(1), Some(2), Some(3)]),
5818            None,
5819            Some(vec![Some(4), Some(5)]),
5820        ]);
5821
5822        let array = ScalarValue::iter_to_array(scalars).unwrap();
5823        let list_array = as_list_array(&array).unwrap();
5824        // List[[1,2,3], null, [4,5]]
5825        let expected = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
5826            Some(vec![Some(1), Some(2), Some(3)]),
5827            None,
5828            Some(vec![Some(4), Some(5)]),
5829        ]);
5830        assert_eq!(list_array, &expected);
5831
5832        let scalars = build_list::<i64>(vec![
5833            Some(vec![Some(1), Some(2), Some(3)]),
5834            None,
5835            Some(vec![Some(4), Some(5)]),
5836        ]);
5837
5838        let array = ScalarValue::iter_to_array(scalars).unwrap();
5839        let list_array = as_large_list_array(&array).unwrap();
5840        let expected = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
5841            Some(vec![Some(1), Some(2), Some(3)]),
5842            None,
5843            Some(vec![Some(4), Some(5)]),
5844        ]);
5845        assert_eq!(list_array, &expected);
5846    }
5847
5848    #[test]
5849    fn iter_to_array_string_test() {
5850        let arr1 = single_row_list_array(vec!["foo", "bar", "baz"]);
5851        let arr2 = single_row_list_array(vec!["rust", "world"]);
5852
5853        let scalars = vec![
5854            ScalarValue::List(Arc::new(arr1)),
5855            ScalarValue::List(Arc::new(arr2)),
5856        ];
5857
5858        let array = ScalarValue::iter_to_array(scalars).unwrap();
5859        let result = array.as_list::<i32>();
5860
5861        // build expected array
5862        let string_builder = StringBuilder::with_capacity(5, 25);
5863        let mut list_of_string_builder = ListBuilder::new(string_builder);
5864
5865        list_of_string_builder.values().append_value("foo");
5866        list_of_string_builder.values().append_value("bar");
5867        list_of_string_builder.values().append_value("baz");
5868        list_of_string_builder.append(true);
5869
5870        list_of_string_builder.values().append_value("rust");
5871        list_of_string_builder.values().append_value("world");
5872        list_of_string_builder.append(true);
5873        let expected = list_of_string_builder.finish();
5874
5875        assert_eq!(result, &expected);
5876    }
5877
5878    #[test]
5879    fn test_list_scalar_eq_to_array() {
5880        let list_array: ArrayRef =
5881            Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5882                Some(vec![Some(0), Some(1), Some(2)]),
5883                None,
5884                Some(vec![None, Some(5)]),
5885            ]));
5886
5887        let fsl_array: ArrayRef =
5888            Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5889                Some(vec![Some(0), Some(1), Some(2)]),
5890                None,
5891                Some(vec![Some(3), None, Some(5)]),
5892            ]));
5893
5894        for arr in [list_array, fsl_array] {
5895            for i in 0..arr.len() {
5896                let scalar =
5897                    ScalarValue::List(arr.slice(i, 1).as_list::<i32>().to_owned().into());
5898                assert!(scalar.eq_array(&arr, i).unwrap());
5899            }
5900        }
5901    }
5902
5903    #[test]
5904    fn test_eq_array_err_message() {
5905        assert_starts_with(
5906            ScalarValue::Utf8(Some("123".to_string()))
5907                .eq_array(&(Arc::new(Int32Array::from(vec![123])) as ArrayRef), 0)
5908                .unwrap_err()
5909                .message(),
5910            "could not cast array of type Int32 to arrow_array::array::byte_array::GenericByteArray<arrow_array::types::GenericStringType<i32>>",
5911        );
5912    }
5913
5914    #[test]
5915    fn scalar_add_trait_test() -> Result<()> {
5916        let float_value = ScalarValue::Float64(Some(123.));
5917        let float_value_2 = ScalarValue::Float64(Some(123.));
5918        assert_eq!(
5919            (float_value.add(&float_value_2))?,
5920            ScalarValue::Float64(Some(246.))
5921        );
5922        assert_eq!(
5923            (float_value.add(float_value_2))?,
5924            ScalarValue::Float64(Some(246.))
5925        );
5926        Ok(())
5927    }
5928
5929    #[test]
5930    fn scalar_sub_trait_test() -> Result<()> {
5931        let float_value = ScalarValue::Float64(Some(123.));
5932        let float_value_2 = ScalarValue::Float64(Some(123.));
5933        assert_eq!(
5934            float_value.sub(&float_value_2)?,
5935            ScalarValue::Float64(Some(0.))
5936        );
5937        assert_eq!(
5938            float_value.sub(float_value_2)?,
5939            ScalarValue::Float64(Some(0.))
5940        );
5941        Ok(())
5942    }
5943
5944    #[test]
5945    fn scalar_sub_trait_int32_test() -> Result<()> {
5946        let int_value = ScalarValue::Int32(Some(42));
5947        let int_value_2 = ScalarValue::Int32(Some(100));
5948        assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int32(Some(-58)));
5949        assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int32(Some(58)));
5950        Ok(())
5951    }
5952
5953    #[test]
5954    fn scalar_sub_trait_int32_overflow_test() {
5955        let int_value = ScalarValue::Int32(Some(i32::MAX));
5956        let int_value_2 = ScalarValue::Int32(Some(i32::MIN));
5957        let err = int_value
5958            .sub_checked(&int_value_2)
5959            .unwrap_err()
5960            .strip_backtrace();
5961        assert_eq!(
5962            err,
5963            "Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
5964        )
5965    }
5966
5967    #[test]
5968    fn scalar_sub_trait_int64_test() -> Result<()> {
5969        let int_value = ScalarValue::Int64(Some(42));
5970        let int_value_2 = ScalarValue::Int64(Some(100));
5971        assert_eq!(int_value.sub(&int_value_2)?, ScalarValue::Int64(Some(-58)));
5972        assert_eq!(int_value_2.sub(int_value)?, ScalarValue::Int64(Some(58)));
5973        Ok(())
5974    }
5975
5976    #[test]
5977    fn scalar_sub_trait_int64_overflow_test() {
5978        let int_value = ScalarValue::Int64(Some(i64::MAX));
5979        let int_value_2 = ScalarValue::Int64(Some(i64::MIN));
5980        let err = int_value
5981            .sub_checked(&int_value_2)
5982            .unwrap_err()
5983            .strip_backtrace();
5984        assert_eq!(
5985            err,
5986            "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808"
5987        )
5988    }
5989
5990    #[test]
5991    fn scalar_add_overflow_test() -> Result<()> {
5992        check_scalar_add_overflow::<Int8Type>(
5993            ScalarValue::Int8(Some(i8::MAX)),
5994            ScalarValue::Int8(Some(i8::MAX)),
5995        );
5996        check_scalar_add_overflow::<UInt8Type>(
5997            ScalarValue::UInt8(Some(u8::MAX)),
5998            ScalarValue::UInt8(Some(u8::MAX)),
5999        );
6000        check_scalar_add_overflow::<Int16Type>(
6001            ScalarValue::Int16(Some(i16::MAX)),
6002            ScalarValue::Int16(Some(i16::MAX)),
6003        );
6004        check_scalar_add_overflow::<UInt16Type>(
6005            ScalarValue::UInt16(Some(u16::MAX)),
6006            ScalarValue::UInt16(Some(u16::MAX)),
6007        );
6008        check_scalar_add_overflow::<Int32Type>(
6009            ScalarValue::Int32(Some(i32::MAX)),
6010            ScalarValue::Int32(Some(i32::MAX)),
6011        );
6012        check_scalar_add_overflow::<UInt32Type>(
6013            ScalarValue::UInt32(Some(u32::MAX)),
6014            ScalarValue::UInt32(Some(u32::MAX)),
6015        );
6016        check_scalar_add_overflow::<Int64Type>(
6017            ScalarValue::Int64(Some(i64::MAX)),
6018            ScalarValue::Int64(Some(i64::MAX)),
6019        );
6020        check_scalar_add_overflow::<UInt64Type>(
6021            ScalarValue::UInt64(Some(u64::MAX)),
6022            ScalarValue::UInt64(Some(u64::MAX)),
6023        );
6024
6025        Ok(())
6026    }
6027
6028    // Verifies that ScalarValue has the same behavior with compute kernel when it overflows.
6029    fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
6030    where
6031        T: ArrowNumericType,
6032    {
6033        let scalar_result = left.add_checked(&right);
6034
6035        let left_array = left.to_array().expect("Failed to convert to array");
6036        let right_array = right.to_array().expect("Failed to convert to array");
6037        let arrow_left_array = left_array.as_primitive::<T>();
6038        let arrow_right_array = right_array.as_primitive::<T>();
6039        let arrow_result = add(arrow_left_array, arrow_right_array);
6040
6041        assert_eq!(scalar_result.is_ok(), arrow_result.is_ok());
6042    }
6043
6044    #[test]
6045    fn test_interval_add_timestamp() -> Result<()> {
6046        let interval = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
6047            months: 1,
6048            days: 2,
6049            nanoseconds: 3,
6050        }));
6051        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
6052        let result = interval.add(&timestamp)?;
6053        let expect = timestamp.add(&interval)?;
6054        assert_eq!(result, expect);
6055
6056        let interval = ScalarValue::IntervalYearMonth(Some(123));
6057        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
6058        let result = interval.add(&timestamp)?;
6059        let expect = timestamp.add(&interval)?;
6060        assert_eq!(result, expect);
6061
6062        let interval = ScalarValue::IntervalDayTime(Some(IntervalDayTime {
6063            days: 1,
6064            milliseconds: 23,
6065        }));
6066        let timestamp = ScalarValue::TimestampNanosecond(Some(123), None);
6067        let result = interval.add(&timestamp)?;
6068        let expect = timestamp.add(&interval)?;
6069        assert_eq!(result, expect);
6070        Ok(())
6071    }
6072
6073    #[test]
6074    fn test_try_cmp() {
6075        assert_eq!(
6076            ScalarValue::try_cmp(
6077                &ScalarValue::Int32(Some(1)),
6078                &ScalarValue::Int32(Some(2))
6079            )
6080            .unwrap(),
6081            Ordering::Less
6082        );
6083        assert_eq!(
6084            ScalarValue::try_cmp(&ScalarValue::Int32(None), &ScalarValue::Int32(Some(2)))
6085                .unwrap(),
6086            Ordering::Less
6087        );
6088        assert_starts_with(
6089            ScalarValue::try_cmp(
6090                &ScalarValue::Int32(Some(1)),
6091                &ScalarValue::Int64(Some(2)),
6092            )
6093            .unwrap_err()
6094            .message(),
6095            "Uncomparable values: Int32(1), Int64(2)",
6096        );
6097    }
6098
6099    #[test]
6100    fn scalar_decimal_test() -> Result<()> {
6101        let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
6102        assert_eq!(DataType::Decimal128(10, 1), decimal_value.data_type());
6103        let try_into_value: i128 = decimal_value.clone().try_into().unwrap();
6104        assert_eq!(123_i128, try_into_value);
6105        assert!(!decimal_value.is_null());
6106        let neg_decimal_value = decimal_value.arithmetic_negate()?;
6107        match neg_decimal_value {
6108            ScalarValue::Decimal128(v, _, _) => {
6109                assert_eq!(-123, v.unwrap());
6110            }
6111            _ => {
6112                unreachable!();
6113            }
6114        }
6115
6116        // decimal scalar to array
6117        let array = decimal_value
6118            .to_array()
6119            .expect("Failed to convert to array");
6120        let array = as_decimal128_array(&array)?;
6121        assert_eq!(1, array.len());
6122        assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
6123        assert_eq!(123i128, array.value(0));
6124
6125        // decimal scalar to array with size
6126        let array = decimal_value
6127            .to_array_of_size(10)
6128            .expect("Failed to convert to array of size");
6129        let array_decimal = as_decimal128_array(&array)?;
6130        assert_eq!(10, array.len());
6131        assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
6132        assert_eq!(123i128, array_decimal.value(0));
6133        assert_eq!(123i128, array_decimal.value(9));
6134        // test eq array
6135        assert!(
6136            decimal_value
6137                .eq_array(&array, 1)
6138                .expect("Failed to compare arrays")
6139        );
6140        assert!(
6141            decimal_value
6142                .eq_array(&array, 5)
6143                .expect("Failed to compare arrays")
6144        );
6145        // test try from array
6146        assert_eq!(
6147            decimal_value,
6148            ScalarValue::try_from_array(&array, 5).unwrap()
6149        );
6150
6151        assert_eq!(
6152            decimal_value,
6153            ScalarValue::try_new_decimal128(123, 10, 1).unwrap()
6154        );
6155
6156        // test compare
6157        let left = ScalarValue::Decimal128(Some(123), 10, 2);
6158        let right = ScalarValue::Decimal128(Some(124), 10, 2);
6159        assert!(!left.eq(&right));
6160        let result = left < right;
6161        assert!(result);
6162        let result = left <= right;
6163        assert!(result);
6164        let right = ScalarValue::Decimal128(Some(124), 10, 3);
6165        // make sure that two decimals with diff datatype can't be compared.
6166        let result = left.partial_cmp(&right);
6167        assert_eq!(None, result);
6168
6169        let decimal_vec = vec![
6170            ScalarValue::Decimal128(Some(1), 10, 2),
6171            ScalarValue::Decimal128(Some(2), 10, 2),
6172            ScalarValue::Decimal128(Some(3), 10, 2),
6173        ];
6174        // convert the vec to decimal array and check the result
6175        let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
6176        assert_eq!(3, array.len());
6177        assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
6178
6179        let decimal_vec = vec![
6180            ScalarValue::Decimal128(Some(1), 10, 2),
6181            ScalarValue::Decimal128(Some(2), 10, 2),
6182            ScalarValue::Decimal128(Some(3), 10, 2),
6183            ScalarValue::Decimal128(None, 10, 2),
6184        ];
6185        let array = ScalarValue::iter_to_array(decimal_vec).unwrap();
6186        assert_eq!(4, array.len());
6187        assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());
6188
6189        assert!(
6190            ScalarValue::try_new_decimal128(1, 10, 2)
6191                .unwrap()
6192                .eq_array(&array, 0)
6193                .expect("Failed to compare arrays")
6194        );
6195        assert!(
6196            ScalarValue::try_new_decimal128(2, 10, 2)
6197                .unwrap()
6198                .eq_array(&array, 1)
6199                .expect("Failed to compare arrays")
6200        );
6201        assert!(
6202            ScalarValue::try_new_decimal128(3, 10, 2)
6203                .unwrap()
6204                .eq_array(&array, 2)
6205                .expect("Failed to compare arrays")
6206        );
6207        assert_eq!(
6208            ScalarValue::Decimal128(None, 10, 2),
6209            ScalarValue::try_from_array(&array, 3).unwrap()
6210        );
6211
6212        Ok(())
6213    }
6214
6215    #[test]
6216    fn test_new_one_decimal128() {
6217        assert_eq!(
6218            ScalarValue::new_one(&DataType::Decimal128(5, 0)).unwrap(),
6219            ScalarValue::Decimal128(Some(1), 5, 0)
6220        );
6221        assert_eq!(
6222            ScalarValue::new_one(&DataType::Decimal128(5, 1)).unwrap(),
6223            ScalarValue::Decimal128(Some(10), 5, 1)
6224        );
6225        assert_eq!(
6226            ScalarValue::new_one(&DataType::Decimal128(5, 2)).unwrap(),
6227            ScalarValue::Decimal128(Some(100), 5, 2)
6228        );
6229        // More precision
6230        assert_eq!(
6231            ScalarValue::new_one(&DataType::Decimal128(7, 2)).unwrap(),
6232            ScalarValue::Decimal128(Some(100), 7, 2)
6233        );
6234        // No negative scale
6235        assert!(ScalarValue::new_one(&DataType::Decimal128(5, -1)).is_err());
6236        // Invalid combination
6237        assert!(ScalarValue::new_one(&DataType::Decimal128(0, 2)).is_err());
6238        assert!(ScalarValue::new_one(&DataType::Decimal128(5, 7)).is_err());
6239    }
6240
6241    #[test]
6242    fn test_new_one_decimal256() {
6243        assert_eq!(
6244            ScalarValue::new_one(&DataType::Decimal256(5, 0)).unwrap(),
6245            ScalarValue::Decimal256(Some(1.into()), 5, 0)
6246        );
6247        assert_eq!(
6248            ScalarValue::new_one(&DataType::Decimal256(5, 1)).unwrap(),
6249            ScalarValue::Decimal256(Some(10.into()), 5, 1)
6250        );
6251        assert_eq!(
6252            ScalarValue::new_one(&DataType::Decimal256(5, 2)).unwrap(),
6253            ScalarValue::Decimal256(Some(100.into()), 5, 2)
6254        );
6255        // More precision
6256        assert_eq!(
6257            ScalarValue::new_one(&DataType::Decimal256(7, 2)).unwrap(),
6258            ScalarValue::Decimal256(Some(100.into()), 7, 2)
6259        );
6260        // No negative scale
6261        assert!(ScalarValue::new_one(&DataType::Decimal256(5, -1)).is_err());
6262        // Invalid combination
6263        assert!(ScalarValue::new_one(&DataType::Decimal256(0, 2)).is_err());
6264        assert!(ScalarValue::new_one(&DataType::Decimal256(5, 7)).is_err());
6265    }
6266
6267    #[test]
6268    fn test_new_ten_decimal128() {
6269        assert_eq!(
6270            ScalarValue::new_ten(&DataType::Decimal128(5, 1)).unwrap(),
6271            ScalarValue::Decimal128(Some(100), 5, 1)
6272        );
6273        assert_eq!(
6274            ScalarValue::new_ten(&DataType::Decimal128(5, 2)).unwrap(),
6275            ScalarValue::Decimal128(Some(1000), 5, 2)
6276        );
6277        // More precision
6278        assert_eq!(
6279            ScalarValue::new_ten(&DataType::Decimal128(7, 2)).unwrap(),
6280            ScalarValue::Decimal128(Some(1000), 7, 2)
6281        );
6282        // No negative scale
6283        assert!(ScalarValue::new_ten(&DataType::Decimal128(5, -1)).is_err());
6284        // Invalid combination
6285        assert!(ScalarValue::new_ten(&DataType::Decimal128(0, 2)).is_err());
6286        assert!(ScalarValue::new_ten(&DataType::Decimal128(5, 7)).is_err());
6287    }
6288
6289    #[test]
6290    fn test_new_ten_decimal256() {
6291        assert_eq!(
6292            ScalarValue::new_ten(&DataType::Decimal256(5, 1)).unwrap(),
6293            ScalarValue::Decimal256(Some(100.into()), 5, 1)
6294        );
6295        assert_eq!(
6296            ScalarValue::new_ten(&DataType::Decimal256(5, 2)).unwrap(),
6297            ScalarValue::Decimal256(Some(1000.into()), 5, 2)
6298        );
6299        // More precision
6300        assert_eq!(
6301            ScalarValue::new_ten(&DataType::Decimal256(7, 2)).unwrap(),
6302            ScalarValue::Decimal256(Some(1000.into()), 7, 2)
6303        );
6304        // No negative scale
6305        assert!(ScalarValue::new_ten(&DataType::Decimal256(5, -1)).is_err());
6306        // Invalid combination
6307        assert!(ScalarValue::new_ten(&DataType::Decimal256(0, 2)).is_err());
6308        assert!(ScalarValue::new_ten(&DataType::Decimal256(5, 7)).is_err());
6309    }
6310
6311    #[test]
6312    fn test_new_negative_one_decimal128() {
6313        assert_eq!(
6314            ScalarValue::new_negative_one(&DataType::Decimal128(5, 0)).unwrap(),
6315            ScalarValue::Decimal128(Some(-1), 5, 0)
6316        );
6317        assert_eq!(
6318            ScalarValue::new_negative_one(&DataType::Decimal128(5, 2)).unwrap(),
6319            ScalarValue::Decimal128(Some(-100), 5, 2)
6320        );
6321    }
6322
6323    #[test]
6324    fn test_list_partial_cmp() {
6325        let a =
6326            ScalarValue::List(Arc::new(
6327                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6328                    Some(1),
6329                    Some(2),
6330                    Some(3),
6331                ])]),
6332            ));
6333        let b =
6334            ScalarValue::List(Arc::new(
6335                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6336                    Some(1),
6337                    Some(2),
6338                    Some(3),
6339                ])]),
6340            ));
6341        assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal));
6342
6343        let a =
6344            ScalarValue::List(Arc::new(
6345                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6346                    Some(10),
6347                    Some(2),
6348                    Some(3),
6349                ])]),
6350            ));
6351        let b =
6352            ScalarValue::List(Arc::new(
6353                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6354                    Some(1),
6355                    Some(2),
6356                    Some(30),
6357                ])]),
6358            ));
6359        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6360
6361        let a =
6362            ScalarValue::List(Arc::new(
6363                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6364                    Some(10),
6365                    Some(2),
6366                    Some(3),
6367                ])]),
6368            ));
6369        let b =
6370            ScalarValue::List(Arc::new(
6371                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6372                    Some(10),
6373                    Some(2),
6374                    Some(30),
6375                ])]),
6376            ));
6377        assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
6378
6379        let a =
6380            ScalarValue::List(Arc::new(
6381                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6382                    Some(1),
6383                    Some(2),
6384                    Some(3),
6385                ])]),
6386            ));
6387        let b =
6388            ScalarValue::List(Arc::new(
6389                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6390                    Some(2),
6391                    Some(3),
6392                ])]),
6393            ));
6394        assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
6395
6396        let a =
6397            ScalarValue::List(Arc::new(
6398                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6399                    Some(2),
6400                    Some(3),
6401                    Some(4),
6402                ])]),
6403            ));
6404        let b =
6405            ScalarValue::List(Arc::new(
6406                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6407                    Some(1),
6408                    Some(2),
6409                ])]),
6410            ));
6411        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6412
6413        let a =
6414            ScalarValue::List(Arc::new(
6415                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6416                    Some(1),
6417                    Some(2),
6418                    Some(3),
6419                ])]),
6420            ));
6421        let b =
6422            ScalarValue::List(Arc::new(
6423                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6424                    Some(1),
6425                    Some(2),
6426                ])]),
6427            ));
6428        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6429
6430        let a =
6431            ScalarValue::List(Arc::new(
6432                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6433                    None,
6434                    Some(2),
6435                    Some(3),
6436                ])]),
6437            ));
6438        let b =
6439            ScalarValue::List(Arc::new(
6440                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
6441                    Some(1),
6442                    Some(2),
6443                    Some(3),
6444                ])]),
6445            ));
6446        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6447
6448        let a = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
6449            Int64Type,
6450            _,
6451            _,
6452        >(vec![Some(vec![
6453            None,
6454            Some(2),
6455            Some(3),
6456        ])])));
6457        let b = ScalarValue::LargeList(Arc::new(LargeListArray::from_iter_primitive::<
6458            Int64Type,
6459            _,
6460            _,
6461        >(vec![Some(vec![
6462            Some(1),
6463            Some(2),
6464            Some(3),
6465        ])])));
6466        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6467
6468        let a = ScalarValue::FixedSizeList(Arc::new(
6469            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
6470                vec![Some(vec![None, Some(2), Some(3)])],
6471                3,
6472            ),
6473        ));
6474        let b = ScalarValue::FixedSizeList(Arc::new(
6475            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
6476                vec![Some(vec![Some(1), Some(2), Some(3)])],
6477                3,
6478            ),
6479        ));
6480        assert_eq!(a.partial_cmp(&b), Some(Ordering::Greater));
6481    }
6482
6483    #[test]
6484    fn scalar_value_to_array_u64() -> Result<()> {
6485        let value = ScalarValue::UInt64(Some(13u64));
6486        let array = value.to_array().expect("Failed to convert to array");
6487        let array = as_uint64_array(&array)?;
6488        assert_eq!(array.len(), 1);
6489        assert!(!array.is_null(0));
6490        assert_eq!(array.value(0), 13);
6491
6492        let value = ScalarValue::UInt64(None);
6493        let array = value.to_array().expect("Failed to convert to array");
6494        let array = as_uint64_array(&array)?;
6495        assert_eq!(array.len(), 1);
6496        assert!(array.is_null(0));
6497        Ok(())
6498    }
6499
6500    #[test]
6501    fn scalar_value_to_array_u32() -> Result<()> {
6502        let value = ScalarValue::UInt32(Some(13u32));
6503        let array = value.to_array().expect("Failed to convert to array");
6504        let array = as_uint32_array(&array)?;
6505        assert_eq!(array.len(), 1);
6506        assert!(!array.is_null(0));
6507        assert_eq!(array.value(0), 13);
6508
6509        let value = ScalarValue::UInt32(None);
6510        let array = value.to_array().expect("Failed to convert to array");
6511        let array = as_uint32_array(&array)?;
6512        assert_eq!(array.len(), 1);
6513        assert!(array.is_null(0));
6514        Ok(())
6515    }
6516
6517    #[test]
6518    fn scalar_list_null_to_array() {
6519        let list_array = ScalarValue::new_list_nullable(&[], &DataType::UInt64);
6520
6521        assert_eq!(list_array.len(), 1);
6522        assert_eq!(list_array.values().len(), 0);
6523    }
6524
6525    #[test]
6526    fn scalar_large_list_null_to_array() {
6527        let list_array = ScalarValue::new_large_list(&[], &DataType::UInt64);
6528
6529        assert_eq!(list_array.len(), 1);
6530        assert_eq!(list_array.values().len(), 0);
6531    }
6532
6533    #[test]
6534    fn scalar_list_to_array() -> Result<()> {
6535        let values = vec![
6536            ScalarValue::UInt64(Some(100)),
6537            ScalarValue::UInt64(None),
6538            ScalarValue::UInt64(Some(101)),
6539        ];
6540        let list_array = ScalarValue::new_list_nullable(&values, &DataType::UInt64);
6541        assert_eq!(list_array.len(), 1);
6542        assert_eq!(list_array.values().len(), 3);
6543
6544        let prim_array_ref = list_array.value(0);
6545        let prim_array = as_uint64_array(&prim_array_ref)?;
6546        assert_eq!(prim_array.len(), 3);
6547        assert_eq!(prim_array.value(0), 100);
6548        assert!(prim_array.is_null(1));
6549        assert_eq!(prim_array.value(2), 101);
6550        Ok(())
6551    }
6552
6553    #[test]
6554    fn scalar_large_list_to_array() -> Result<()> {
6555        let values = vec![
6556            ScalarValue::UInt64(Some(100)),
6557            ScalarValue::UInt64(None),
6558            ScalarValue::UInt64(Some(101)),
6559        ];
6560        let list_array = ScalarValue::new_large_list(&values, &DataType::UInt64);
6561        assert_eq!(list_array.len(), 1);
6562        assert_eq!(list_array.values().len(), 3);
6563
6564        let prim_array_ref = list_array.value(0);
6565        let prim_array = as_uint64_array(&prim_array_ref)?;
6566        assert_eq!(prim_array.len(), 3);
6567        assert_eq!(prim_array.value(0), 100);
6568        assert!(prim_array.is_null(1));
6569        assert_eq!(prim_array.value(2), 101);
6570        Ok(())
6571    }
6572
6573    /// Creates array directly and via ScalarValue and ensures they are the same
6574    macro_rules! check_scalar_iter {
6575        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6576            let scalars: Vec<_> =
6577                $INPUT.iter().map(|v| ScalarValue::$SCALAR_T(*v)).collect();
6578
6579            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6580
6581            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
6582
6583            assert_eq!(&array, &expected);
6584        }};
6585    }
6586
6587    /// Creates array directly and via ScalarValue and ensures they are the same
6588    /// but for variants that carry a timezone field.
6589    macro_rules! check_scalar_iter_tz {
6590        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6591            let scalars: Vec<_> = $INPUT
6592                .iter()
6593                .map(|v| ScalarValue::$SCALAR_T(*v, None))
6594                .collect();
6595
6596            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6597
6598            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
6599
6600            assert_eq!(&array, &expected);
6601        }};
6602    }
6603
6604    /// Creates array directly and via ScalarValue and ensures they
6605    /// are the same, for string  arrays
6606    macro_rules! check_scalar_iter_string {
6607        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6608            let scalars: Vec<_> = $INPUT
6609                .iter()
6610                .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_string())))
6611                .collect();
6612
6613            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6614
6615            let expected: ArrayRef = Arc::new($ARRAYTYPE::from($INPUT));
6616
6617            assert_eq!(&array, &expected);
6618        }};
6619    }
6620
6621    /// Creates array directly and via ScalarValue and ensures they
6622    /// are the same, for binary arrays
6623    macro_rules! check_scalar_iter_binary {
6624        ($SCALAR_T:ident, $ARRAYTYPE:ident, $INPUT:expr) => {{
6625            let scalars: Vec<_> = $INPUT
6626                .iter()
6627                .map(|v| ScalarValue::$SCALAR_T(v.map(|v| v.to_vec())))
6628                .collect();
6629
6630            let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
6631
6632            let expected: $ARRAYTYPE =
6633                $INPUT.iter().map(|v| v.map(|v| v.to_vec())).collect();
6634
6635            let expected: ArrayRef = Arc::new(expected);
6636
6637            assert_eq!(&array, &expected);
6638        }};
6639    }
6640
6641    #[test]
6642    fn scalar_iter_to_array_boolean() {
6643        check_scalar_iter!(Boolean, BooleanArray, vec![Some(true), None, Some(false)]);
6644        check_scalar_iter!(Float32, Float32Array, vec![Some(1.9), None, Some(-2.1)]);
6645        check_scalar_iter!(Float64, Float64Array, vec![Some(1.9), None, Some(-2.1)]);
6646
6647        check_scalar_iter!(Int8, Int8Array, vec![Some(1), None, Some(3)]);
6648        check_scalar_iter!(Int16, Int16Array, vec![Some(1), None, Some(3)]);
6649        check_scalar_iter!(Int32, Int32Array, vec![Some(1), None, Some(3)]);
6650        check_scalar_iter!(Int64, Int64Array, vec![Some(1), None, Some(3)]);
6651
6652        check_scalar_iter!(UInt8, UInt8Array, vec![Some(1), None, Some(3)]);
6653        check_scalar_iter!(UInt16, UInt16Array, vec![Some(1), None, Some(3)]);
6654        check_scalar_iter!(UInt32, UInt32Array, vec![Some(1), None, Some(3)]);
6655        check_scalar_iter!(UInt64, UInt64Array, vec![Some(1), None, Some(3)]);
6656
6657        check_scalar_iter_tz!(
6658            TimestampSecond,
6659            TimestampSecondArray,
6660            vec![Some(1), None, Some(3)]
6661        );
6662        check_scalar_iter_tz!(
6663            TimestampMillisecond,
6664            TimestampMillisecondArray,
6665            vec![Some(1), None, Some(3)]
6666        );
6667        check_scalar_iter_tz!(
6668            TimestampMicrosecond,
6669            TimestampMicrosecondArray,
6670            vec![Some(1), None, Some(3)]
6671        );
6672        check_scalar_iter_tz!(
6673            TimestampNanosecond,
6674            TimestampNanosecondArray,
6675            vec![Some(1), None, Some(3)]
6676        );
6677
6678        check_scalar_iter_string!(
6679            Utf8,
6680            StringArray,
6681            vec![Some("foo"), None, Some("bar")]
6682        );
6683        check_scalar_iter_string!(
6684            LargeUtf8,
6685            LargeStringArray,
6686            vec![Some("foo"), None, Some("bar")]
6687        );
6688        check_scalar_iter_binary!(
6689            Binary,
6690            BinaryArray,
6691            [Some(b"foo"), None, Some(b"bar")]
6692        );
6693        check_scalar_iter_binary!(
6694            LargeBinary,
6695            LargeBinaryArray,
6696            [Some(b"foo"), None, Some(b"bar")]
6697        );
6698    }
6699
6700    #[test]
6701    fn scalar_iter_to_array_empty() {
6702        let scalars = vec![] as Vec<ScalarValue>;
6703
6704        let result = ScalarValue::iter_to_array(scalars).unwrap_err();
6705        assert!(
6706            result
6707                .to_string()
6708                .contains("Empty iterator passed to ScalarValue::iter_to_array"),
6709            "{}",
6710            result
6711        );
6712    }
6713
6714    #[test]
6715    fn scalar_iter_to_dictionary() {
6716        fn make_val(v: Option<String>) -> ScalarValue {
6717            let key_type = DataType::Int32;
6718            let value = ScalarValue::Utf8(v);
6719            ScalarValue::Dictionary(Box::new(key_type), Box::new(value))
6720        }
6721
6722        let scalars = [
6723            make_val(Some("Foo".into())),
6724            make_val(None),
6725            make_val(Some("Bar".into())),
6726        ];
6727
6728        let array = ScalarValue::iter_to_array(scalars).unwrap();
6729        let array = as_dictionary_array::<Int32Type>(&array).unwrap();
6730        let values_array = as_string_array(array.values()).unwrap();
6731
6732        let values = array
6733            .keys_iter()
6734            .map(|k| {
6735                k.map(|k| {
6736                    assert!(values_array.is_valid(k));
6737                    values_array.value(k)
6738                })
6739            })
6740            .collect::<Vec<_>>();
6741
6742        let expected = vec![Some("Foo"), None, Some("Bar")];
6743        assert_eq!(values, expected);
6744    }
6745
6746    #[test]
6747    fn scalar_iter_to_array_mismatched_types() {
6748        use ScalarValue::*;
6749        // If the scalar values are not all the correct type, error here
6750        let scalars = [Boolean(Some(true)), Int32(Some(5))];
6751
6752        let result = ScalarValue::iter_to_array(scalars).unwrap_err();
6753        assert!(result.to_string().contains("Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)"),
6754                "{}", result);
6755    }
6756
6757    #[test]
6758    fn scalar_try_from_array_null() {
6759        let array = vec![Some(33), None].into_iter().collect::<Int64Array>();
6760        let array: ArrayRef = Arc::new(array);
6761
6762        assert_eq!(
6763            ScalarValue::Int64(Some(33)),
6764            ScalarValue::try_from_array(&array, 0).unwrap()
6765        );
6766        assert_eq!(
6767            ScalarValue::Int64(None),
6768            ScalarValue::try_from_array(&array, 1).unwrap()
6769        );
6770    }
6771
6772    #[test]
6773    fn scalar_try_from_array_list_array_null() {
6774        let list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
6775            Some(vec![Some(1), Some(2)]),
6776            None,
6777        ]);
6778
6779        let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap();
6780        let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap();
6781
6782        let data_type =
6783            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
6784
6785        assert_eq!(non_null_list_scalar.data_type(), data_type);
6786        assert_eq!(null_list_scalar.data_type(), data_type);
6787    }
6788
6789    #[test]
6790    fn scalar_try_from_list_datatypes() {
6791        let inner_field = Arc::new(Field::new_list_field(DataType::Int32, true));
6792
6793        // Test for List
6794        let data_type = &DataType::List(Arc::clone(&inner_field));
6795        let scalar: ScalarValue = data_type.try_into().unwrap();
6796        let expected = ScalarValue::List(
6797            new_null_array(data_type, 1)
6798                .as_list::<i32>()
6799                .to_owned()
6800                .into(),
6801        );
6802        assert_eq!(expected, scalar);
6803        assert!(expected.is_null());
6804
6805        // Test for LargeList
6806        let data_type = &DataType::LargeList(Arc::clone(&inner_field));
6807        let scalar: ScalarValue = data_type.try_into().unwrap();
6808        let expected = ScalarValue::LargeList(
6809            new_null_array(data_type, 1)
6810                .as_list::<i64>()
6811                .to_owned()
6812                .into(),
6813        );
6814        assert_eq!(expected, scalar);
6815        assert!(expected.is_null());
6816
6817        // Test for FixedSizeList(5)
6818        let data_type = &DataType::FixedSizeList(Arc::clone(&inner_field), 5);
6819        let scalar: ScalarValue = data_type.try_into().unwrap();
6820        let expected = ScalarValue::FixedSizeList(
6821            new_null_array(data_type, 1)
6822                .as_fixed_size_list()
6823                .to_owned()
6824                .into(),
6825        );
6826        assert_eq!(expected, scalar);
6827        assert!(expected.is_null());
6828    }
6829
6830    #[test]
6831    fn scalar_try_from_list_of_list() {
6832        let data_type = DataType::List(Arc::new(Field::new_list_field(
6833            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6834            true,
6835        )));
6836        let data_type = &data_type;
6837        let scalar: ScalarValue = data_type.try_into().unwrap();
6838
6839        let expected = ScalarValue::List(
6840            new_null_array(
6841                &DataType::List(Arc::new(Field::new_list_field(
6842                    DataType::List(Arc::new(Field::new_list_field(
6843                        DataType::Int32,
6844                        true,
6845                    ))),
6846                    true,
6847                ))),
6848                1,
6849            )
6850            .as_list::<i32>()
6851            .to_owned()
6852            .into(),
6853        );
6854
6855        assert_eq!(expected, scalar)
6856    }
6857
6858    #[test]
6859    fn scalar_try_from_not_equal_list_nested_list() {
6860        let list_data_type =
6861            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
6862        let data_type = &list_data_type;
6863        let list_scalar: ScalarValue = data_type.try_into().unwrap();
6864
6865        let nested_list_data_type = DataType::List(Arc::new(Field::new_list_field(
6866            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
6867            true,
6868        )));
6869        let data_type = &nested_list_data_type;
6870        let nested_list_scalar: ScalarValue = data_type.try_into().unwrap();
6871
6872        assert_ne!(list_scalar, nested_list_scalar);
6873    }
6874
6875    #[test]
6876    fn scalar_try_from_dict_datatype() {
6877        let data_type =
6878            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
6879        let data_type = &data_type;
6880        let expected = ScalarValue::Dictionary(
6881            Box::new(DataType::Int8),
6882            Box::new(ScalarValue::Utf8(None)),
6883        );
6884        assert_eq!(expected, data_type.try_into().unwrap())
6885    }
6886
6887    #[test]
6888    fn size_of_scalar() {
6889        // Since ScalarValues are used in a non trivial number of places,
6890        // making it larger means significant more memory consumption
6891        // per distinct value.
6892        //
6893        // Thus this test ensures that no code change makes ScalarValue larger
6894        //
6895        // The alignment requirements differ across architectures and
6896        // thus the size of the enum appears to as well
6897
6898        // The value may also change depending on rust version
6899        assert_eq!(size_of::<ScalarValue>(), 64);
6900    }
6901
6902    #[test]
6903    fn memory_size() {
6904        let sv = ScalarValue::Binary(Some(Vec::with_capacity(10)));
6905        assert_eq!(sv.size(), size_of::<ScalarValue>() + 10,);
6906        let sv_size = sv.size();
6907
6908        let mut v = Vec::with_capacity(10);
6909        // do NOT clone `sv` here because this may shrink the vector capacity
6910        v.push(sv);
6911        assert_eq!(v.capacity(), 10);
6912        assert_eq!(
6913            ScalarValue::size_of_vec(&v),
6914            size_of::<Vec<ScalarValue>>() + (9 * size_of::<ScalarValue>()) + sv_size,
6915        );
6916
6917        let mut s = HashSet::with_capacity(0);
6918        // do NOT clone `sv` here because this may shrink the vector capacity
6919        s.insert(v.pop().unwrap());
6920        // hashsets may easily grow during insert, so capacity is dynamic
6921        let s_capacity = s.capacity();
6922        assert_eq!(
6923            ScalarValue::size_of_hashset(&s),
6924            size_of::<HashSet<ScalarValue>>()
6925                + ((s_capacity - 1) * size_of::<ScalarValue>())
6926                + sv_size,
6927        );
6928    }
6929
6930    #[test]
6931    fn scalar_eq_array() {
6932        // Validate that eq_array has the same semantics as ScalarValue::eq
6933        macro_rules! make_typed_vec {
6934            ($INPUT:expr, $TYPE:ident) => {{
6935                $INPUT
6936                    .iter()
6937                    .map(|v| v.map(|v| v as $TYPE))
6938                    .collect::<Vec<_>>()
6939            }};
6940        }
6941
6942        let bool_vals = [Some(true), None, Some(false)];
6943        let f32_vals = [Some(-1.0), None, Some(1.0)];
6944        let f64_vals = make_typed_vec!(f32_vals, f64);
6945
6946        let i8_vals = [Some(-1), None, Some(1)];
6947        let i16_vals = make_typed_vec!(i8_vals, i16);
6948        let i32_vals = make_typed_vec!(i8_vals, i32);
6949        let i64_vals = make_typed_vec!(i8_vals, i64);
6950
6951        let u8_vals = [Some(0), None, Some(1)];
6952        let u16_vals = make_typed_vec!(u8_vals, u16);
6953        let u32_vals = make_typed_vec!(u8_vals, u32);
6954        let u64_vals = make_typed_vec!(u8_vals, u64);
6955
6956        let str_vals = [Some("foo"), None, Some("bar")];
6957
6958        let interval_dt_vals = [
6959            Some(IntervalDayTime::MINUS_ONE),
6960            None,
6961            Some(IntervalDayTime::ONE),
6962        ];
6963        let interval_mdn_vals = [
6964            Some(IntervalMonthDayNano::MINUS_ONE),
6965            None,
6966            Some(IntervalMonthDayNano::ONE),
6967        ];
6968
6969        /// Test each value in `scalar` with the corresponding element
6970        /// at `array`. Assumes each element is unique (aka not equal
6971        /// with all other indexes)
6972        #[derive(Debug)]
6973        struct TestCase {
6974            array: ArrayRef,
6975            scalars: Vec<ScalarValue>,
6976        }
6977
6978        /// Create a test case for casing the input to the specified array type
6979        macro_rules! make_test_case {
6980            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
6981                TestCase {
6982                    array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
6983                    scalars: $INPUT.iter().map(|v| ScalarValue::$SCALAR_TY(*v)).collect(),
6984                }
6985            }};
6986
6987            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{
6988                let tz = $TZ;
6989                TestCase {
6990                    array: Arc::new($INPUT.iter().collect::<$ARRAY_TY>()),
6991                    scalars: $INPUT
6992                        .iter()
6993                        .map(|v| ScalarValue::$SCALAR_TY(*v, tz.clone()))
6994                        .collect(),
6995                }
6996            }};
6997        }
6998
6999        macro_rules! make_str_test_case {
7000            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
7001                TestCase {
7002                    array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
7003                    scalars: $INPUT
7004                        .iter()
7005                        .map(|v| ScalarValue::$SCALAR_TY(v.map(|v| v.to_string())))
7006                        .collect(),
7007                }
7008            }};
7009        }
7010
7011        macro_rules! make_binary_test_case {
7012            ($INPUT:expr, $ARRAY_TY:ident, $SCALAR_TY:ident) => {{
7013                TestCase {
7014                    array: Arc::new($INPUT.iter().cloned().collect::<$ARRAY_TY>()),
7015                    scalars: $INPUT
7016                        .iter()
7017                        .map(|v| {
7018                            ScalarValue::$SCALAR_TY(v.map(|v| v.as_bytes().to_vec()))
7019                        })
7020                        .collect(),
7021                }
7022            }};
7023        }
7024
7025        /// create a test case for DictionaryArray<$INDEX_TY>
7026        macro_rules! make_str_dict_test_case {
7027            ($INPUT:expr, $INDEX_TY:ident) => {{
7028                TestCase {
7029                    array: Arc::new(
7030                        $INPUT
7031                            .iter()
7032                            .cloned()
7033                            .collect::<DictionaryArray<$INDEX_TY>>(),
7034                    ),
7035                    scalars: $INPUT
7036                        .iter()
7037                        .map(|v| {
7038                            ScalarValue::Dictionary(
7039                                Box::new($INDEX_TY::DATA_TYPE),
7040                                Box::new(ScalarValue::Utf8(v.map(|v| v.to_string()))),
7041                            )
7042                        })
7043                        .collect(),
7044                }
7045            }};
7046        }
7047
7048        let cases = vec![
7049            make_test_case!(bool_vals, BooleanArray, Boolean),
7050            make_test_case!(f32_vals, Float32Array, Float32),
7051            make_test_case!(f64_vals, Float64Array, Float64),
7052            make_test_case!(i8_vals, Int8Array, Int8),
7053            make_test_case!(i16_vals, Int16Array, Int16),
7054            make_test_case!(i32_vals, Int32Array, Int32),
7055            make_test_case!(i64_vals, Int64Array, Int64),
7056            make_test_case!(u8_vals, UInt8Array, UInt8),
7057            make_test_case!(u16_vals, UInt16Array, UInt16),
7058            make_test_case!(u32_vals, UInt32Array, UInt32),
7059            make_test_case!(u64_vals, UInt64Array, UInt64),
7060            make_str_test_case!(str_vals, StringArray, Utf8),
7061            make_str_test_case!(str_vals, LargeStringArray, LargeUtf8),
7062            make_binary_test_case!(str_vals, BinaryArray, Binary),
7063            make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary),
7064            make_test_case!(i32_vals, Date32Array, Date32),
7065            make_test_case!(i64_vals, Date64Array, Date64),
7066            make_test_case!(i32_vals, Time32SecondArray, Time32Second),
7067            make_test_case!(i32_vals, Time32MillisecondArray, Time32Millisecond),
7068            make_test_case!(i64_vals, Time64MicrosecondArray, Time64Microsecond),
7069            make_test_case!(i64_vals, Time64NanosecondArray, Time64Nanosecond),
7070            make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None),
7071            make_test_case!(
7072                i64_vals,
7073                TimestampSecondArray,
7074                TimestampSecond,
7075                Some("UTC".into())
7076            ),
7077            make_test_case!(
7078                i64_vals,
7079                TimestampMillisecondArray,
7080                TimestampMillisecond,
7081                None
7082            ),
7083            make_test_case!(
7084                i64_vals,
7085                TimestampMillisecondArray,
7086                TimestampMillisecond,
7087                Some("UTC".into())
7088            ),
7089            make_test_case!(
7090                i64_vals,
7091                TimestampMicrosecondArray,
7092                TimestampMicrosecond,
7093                None
7094            ),
7095            make_test_case!(
7096                i64_vals,
7097                TimestampMicrosecondArray,
7098                TimestampMicrosecond,
7099                Some("UTC".into())
7100            ),
7101            make_test_case!(
7102                i64_vals,
7103                TimestampNanosecondArray,
7104                TimestampNanosecond,
7105                None
7106            ),
7107            make_test_case!(
7108                i64_vals,
7109                TimestampNanosecondArray,
7110                TimestampNanosecond,
7111                Some("UTC".into())
7112            ),
7113            make_test_case!(i32_vals, IntervalYearMonthArray, IntervalYearMonth),
7114            make_test_case!(interval_dt_vals, IntervalDayTimeArray, IntervalDayTime),
7115            make_test_case!(
7116                interval_mdn_vals,
7117                IntervalMonthDayNanoArray,
7118                IntervalMonthDayNano
7119            ),
7120            make_str_dict_test_case!(str_vals, Int8Type),
7121            make_str_dict_test_case!(str_vals, Int16Type),
7122            make_str_dict_test_case!(str_vals, Int32Type),
7123            make_str_dict_test_case!(str_vals, Int64Type),
7124            make_str_dict_test_case!(str_vals, UInt8Type),
7125            make_str_dict_test_case!(str_vals, UInt16Type),
7126            make_str_dict_test_case!(str_vals, UInt32Type),
7127            make_str_dict_test_case!(str_vals, UInt64Type),
7128        ];
7129
7130        for case in cases {
7131            println!("**** Test Case *****");
7132            let TestCase { array, scalars } = case;
7133            println!("Input array type: {}", array.data_type());
7134            println!("Input scalars: {scalars:#?}");
7135            assert_eq!(array.len(), scalars.len());
7136
7137            for (index, scalar) in scalars.into_iter().enumerate() {
7138                assert!(
7139                    scalar
7140                        .eq_array(&array, index)
7141                        .expect("Failed to compare arrays"),
7142                    "Expected {scalar:?} to be equal to {array:?} at index {index}"
7143                );
7144
7145                // test that all other elements are *not* equal
7146                for other_index in 0..array.len() {
7147                    if index != other_index {
7148                        assert!(
7149                            !scalar
7150                                .eq_array(&array, other_index)
7151                                .expect("Failed to compare arrays"),
7152                            "Expected {scalar:?} to be NOT equal to {array:?} at index {other_index}"
7153                        );
7154                    }
7155                }
7156            }
7157        }
7158    }
7159
7160    #[test]
7161    fn scalar_partial_ordering() {
7162        use ScalarValue::*;
7163
7164        assert_eq!(
7165            Int64(Some(33)).partial_cmp(&Int64(Some(0))),
7166            Some(Ordering::Greater)
7167        );
7168        assert_eq!(
7169            Int64(Some(0)).partial_cmp(&Int64(Some(33))),
7170            Some(Ordering::Less)
7171        );
7172        assert_eq!(
7173            Int64(Some(33)).partial_cmp(&Int64(Some(33))),
7174            Some(Ordering::Equal)
7175        );
7176        // For different data type, `partial_cmp` returns None.
7177        assert_eq!(Int64(Some(33)).partial_cmp(&Int32(Some(33))), None);
7178        assert_eq!(Int32(Some(33)).partial_cmp(&Int64(Some(33))), None);
7179
7180        assert_eq!(
7181            ScalarValue::from(vec![
7182                ("A", ScalarValue::from(1.0)),
7183                ("B", ScalarValue::from("Z")),
7184            ])
7185            .partial_cmp(&ScalarValue::from(vec![
7186                ("A", ScalarValue::from(2.0)),
7187                ("B", ScalarValue::from("A")),
7188            ])),
7189            Some(Ordering::Less)
7190        );
7191
7192        // For different struct fields, `partial_cmp` returns None.
7193        assert_eq!(
7194            ScalarValue::from(vec![
7195                ("A", ScalarValue::from(1.0)),
7196                ("B", ScalarValue::from("Z")),
7197            ])
7198            .partial_cmp(&ScalarValue::from(vec![
7199                ("a", ScalarValue::from(2.0)),
7200                ("b", ScalarValue::from("A")),
7201            ])),
7202            None
7203        );
7204    }
7205
7206    #[test]
7207    fn test_scalar_value_from_string() {
7208        let scalar = ScalarValue::from("foo");
7209        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
7210        let scalar = ScalarValue::from("foo".to_string());
7211        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
7212        let scalar = ScalarValue::from_str("foo").unwrap();
7213        assert_eq!(scalar, ScalarValue::Utf8(Some("foo".to_string())));
7214    }
7215
7216    #[test]
7217    fn test_scalar_struct() {
7218        let field_a = Arc::new(Field::new("A", DataType::Int32, false));
7219        let field_b = Arc::new(Field::new("B", DataType::Boolean, false));
7220        let field_c = Arc::new(Field::new("C", DataType::Utf8, false));
7221
7222        let field_e = Arc::new(Field::new("e", DataType::Int16, false));
7223        let field_f = Arc::new(Field::new("f", DataType::Int64, false));
7224        let field_d = Arc::new(Field::new(
7225            "D",
7226            DataType::Struct(vec![Arc::clone(&field_e), Arc::clone(&field_f)].into()),
7227            false,
7228        ));
7229
7230        let struct_array = StructArray::from(vec![
7231            (
7232                Arc::clone(&field_e),
7233                Arc::new(Int16Array::from(vec![2])) as ArrayRef,
7234            ),
7235            (
7236                Arc::clone(&field_f),
7237                Arc::new(Int64Array::from(vec![3])) as ArrayRef,
7238            ),
7239        ]);
7240
7241        let struct_array = StructArray::from(vec![
7242            (
7243                Arc::clone(&field_a),
7244                Arc::new(Int32Array::from(vec![23])) as ArrayRef,
7245            ),
7246            (
7247                Arc::clone(&field_b),
7248                Arc::new(BooleanArray::from(vec![false])) as ArrayRef,
7249            ),
7250            (
7251                Arc::clone(&field_c),
7252                Arc::new(StringArray::from(vec!["Hello"])) as ArrayRef,
7253            ),
7254            (Arc::clone(&field_d), Arc::new(struct_array) as ArrayRef),
7255        ]);
7256        let scalar = ScalarValue::Struct(Arc::new(struct_array));
7257
7258        let array = scalar
7259            .to_array_of_size(2)
7260            .expect("Failed to convert to array of size");
7261
7262        let expected = Arc::new(StructArray::from(vec![
7263            (
7264                Arc::clone(&field_a),
7265                Arc::new(Int32Array::from(vec![23, 23])) as ArrayRef,
7266            ),
7267            (
7268                Arc::clone(&field_b),
7269                Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef,
7270            ),
7271            (
7272                Arc::clone(&field_c),
7273                Arc::new(StringArray::from(vec!["Hello", "Hello"])) as ArrayRef,
7274            ),
7275            (
7276                Arc::clone(&field_d),
7277                Arc::new(StructArray::from(vec![
7278                    (
7279                        Arc::clone(&field_e),
7280                        Arc::new(Int16Array::from(vec![2, 2])) as ArrayRef,
7281                    ),
7282                    (
7283                        Arc::clone(&field_f),
7284                        Arc::new(Int64Array::from(vec![3, 3])) as ArrayRef,
7285                    ),
7286                ])) as ArrayRef,
7287            ),
7288        ])) as ArrayRef;
7289
7290        assert_eq!(&array, &expected);
7291
7292        // Construct from second element of ArrayRef
7293        let constructed = ScalarValue::try_from_array(&expected, 1).unwrap();
7294        assert_eq!(constructed, scalar);
7295
7296        // None version
7297        let none_scalar = ScalarValue::try_from(array.data_type()).unwrap();
7298        assert!(none_scalar.is_null());
7299        assert_eq!(
7300            format!("{none_scalar:?}"),
7301            String::from("Struct({A:,B:,C:,D:})")
7302        );
7303
7304        // Construct with convenience From<Vec<(&str, ScalarValue)>>
7305        let constructed = ScalarValue::from(vec![
7306            ("A", ScalarValue::from(23)),
7307            ("B", ScalarValue::from(false)),
7308            ("C", ScalarValue::from("Hello")),
7309            (
7310                "D",
7311                ScalarValue::from(vec![
7312                    ("e", ScalarValue::from(2i16)),
7313                    ("f", ScalarValue::from(3i64)),
7314                ]),
7315            ),
7316        ]);
7317        assert_eq!(constructed, scalar);
7318
7319        // Build Array from Vec of structs
7320        let scalars = vec![
7321            ScalarValue::from(vec![
7322                ("A", ScalarValue::from(23)),
7323                ("B", ScalarValue::from(false)),
7324                ("C", ScalarValue::from("Hello")),
7325                (
7326                    "D",
7327                    ScalarValue::from(vec![
7328                        ("e", ScalarValue::from(2i16)),
7329                        ("f", ScalarValue::from(3i64)),
7330                    ]),
7331                ),
7332            ]),
7333            ScalarValue::from(vec![
7334                ("A", ScalarValue::from(7)),
7335                ("B", ScalarValue::from(true)),
7336                ("C", ScalarValue::from("World")),
7337                (
7338                    "D",
7339                    ScalarValue::from(vec![
7340                        ("e", ScalarValue::from(4i16)),
7341                        ("f", ScalarValue::from(5i64)),
7342                    ]),
7343                ),
7344            ]),
7345            ScalarValue::from(vec![
7346                ("A", ScalarValue::from(-1000)),
7347                ("B", ScalarValue::from(true)),
7348                ("C", ScalarValue::from("!!!!!")),
7349                (
7350                    "D",
7351                    ScalarValue::from(vec![
7352                        ("e", ScalarValue::from(6i16)),
7353                        ("f", ScalarValue::from(7i64)),
7354                    ]),
7355                ),
7356            ]),
7357        ];
7358        let array = ScalarValue::iter_to_array(scalars).unwrap();
7359
7360        let expected = Arc::new(StructArray::from(vec![
7361            (
7362                Arc::clone(&field_a),
7363                Arc::new(Int32Array::from(vec![23, 7, -1000])) as ArrayRef,
7364            ),
7365            (
7366                Arc::clone(&field_b),
7367                Arc::new(BooleanArray::from(vec![false, true, true])) as ArrayRef,
7368            ),
7369            (
7370                Arc::clone(&field_c),
7371                Arc::new(StringArray::from(vec!["Hello", "World", "!!!!!"])) as ArrayRef,
7372            ),
7373            (
7374                Arc::clone(&field_d),
7375                Arc::new(StructArray::from(vec![
7376                    (
7377                        Arc::clone(&field_e),
7378                        Arc::new(Int16Array::from(vec![2, 4, 6])) as ArrayRef,
7379                    ),
7380                    (
7381                        Arc::clone(&field_f),
7382                        Arc::new(Int64Array::from(vec![3, 5, 7])) as ArrayRef,
7383                    ),
7384                ])) as ArrayRef,
7385            ),
7386        ])) as ArrayRef;
7387
7388        assert_eq!(&array, &expected);
7389    }
7390
7391    #[test]
7392    fn round_trip() {
7393        // Each array type should be able to round tripped through a scalar
7394        let cases: Vec<ArrayRef> = vec![
7395            // int
7396            Arc::new(Int8Array::from(vec![Some(1), None, Some(3)])),
7397            Arc::new(Int16Array::from(vec![Some(1), None, Some(3)])),
7398            Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])),
7399            Arc::new(Int64Array::from(vec![Some(1), None, Some(3)])),
7400            Arc::new(UInt8Array::from(vec![Some(1), None, Some(3)])),
7401            Arc::new(UInt16Array::from(vec![Some(1), None, Some(3)])),
7402            Arc::new(UInt32Array::from(vec![Some(1), None, Some(3)])),
7403            Arc::new(UInt64Array::from(vec![Some(1), None, Some(3)])),
7404            // bool
7405            Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)])),
7406            // float
7407            Arc::new(Float32Array::from(vec![Some(1.0), None, Some(3.0)])),
7408            Arc::new(Float64Array::from(vec![Some(1.0), None, Some(3.0)])),
7409            // string array
7410            Arc::new(StringArray::from(vec![Some("foo"), None, Some("bar")])),
7411            Arc::new(LargeStringArray::from(vec![Some("foo"), None, Some("bar")])),
7412            Arc::new(StringViewArray::from(vec![Some("foo"), None, Some("bar")])),
7413            // string dictionary
7414            {
7415                let mut builder = StringDictionaryBuilder::<Int32Type>::new();
7416                builder.append("foo").unwrap();
7417                builder.append_null();
7418                builder.append("bar").unwrap();
7419                Arc::new(builder.finish())
7420            },
7421            // binary array
7422            Arc::new(BinaryArray::from_iter(vec![
7423                Some(b"foo"),
7424                None,
7425                Some(b"bar"),
7426            ])),
7427            Arc::new(LargeBinaryArray::from_iter(vec![
7428                Some(b"foo"),
7429                None,
7430                Some(b"bar"),
7431            ])),
7432            Arc::new(BinaryViewArray::from_iter(vec![
7433                Some(b"foo"),
7434                None,
7435                Some(b"bar"),
7436            ])),
7437            // timestamp
7438            Arc::new(TimestampSecondArray::from(vec![Some(1), None, Some(3)])),
7439            Arc::new(TimestampMillisecondArray::from(vec![
7440                Some(1),
7441                None,
7442                Some(3),
7443            ])),
7444            Arc::new(TimestampMicrosecondArray::from(vec![
7445                Some(1),
7446                None,
7447                Some(3),
7448            ])),
7449            Arc::new(TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])),
7450            // timestamp with timezone
7451            Arc::new(
7452                TimestampSecondArray::from(vec![Some(1), None, Some(3)])
7453                    .with_timezone_opt(Some("UTC")),
7454            ),
7455            Arc::new(
7456                TimestampMillisecondArray::from(vec![Some(1), None, Some(3)])
7457                    .with_timezone_opt(Some("UTC")),
7458            ),
7459            Arc::new(
7460                TimestampMicrosecondArray::from(vec![Some(1), None, Some(3)])
7461                    .with_timezone_opt(Some("UTC")),
7462            ),
7463            Arc::new(
7464                TimestampNanosecondArray::from(vec![Some(1), None, Some(3)])
7465                    .with_timezone_opt(Some("UTC")),
7466            ),
7467            // date
7468            Arc::new(Date32Array::from(vec![Some(1), None, Some(3)])),
7469            Arc::new(Date64Array::from(vec![Some(1), None, Some(3)])),
7470            // time
7471            Arc::new(Time32SecondArray::from(vec![Some(1), None, Some(3)])),
7472            Arc::new(Time32MillisecondArray::from(vec![Some(1), None, Some(3)])),
7473            Arc::new(Time64MicrosecondArray::from(vec![Some(1), None, Some(3)])),
7474            Arc::new(Time64NanosecondArray::from(vec![Some(1), None, Some(3)])),
7475            // null array
7476            Arc::new(NullArray::new(3)),
7477            // dense union
7478            {
7479                let mut builder = UnionBuilder::new_dense();
7480                builder.append::<Int32Type>("a", 1).unwrap();
7481                builder.append::<Float64Type>("b", 3.4).unwrap();
7482                Arc::new(builder.build().unwrap())
7483            },
7484            // sparse union
7485            {
7486                let mut builder = UnionBuilder::new_sparse();
7487                builder.append::<Int32Type>("a", 1).unwrap();
7488                builder.append::<Float64Type>("b", 3.4).unwrap();
7489                Arc::new(builder.build().unwrap())
7490            },
7491            // list array
7492            {
7493                let values_builder = StringBuilder::new();
7494                let mut builder = ListBuilder::new(values_builder);
7495                // [A, B]
7496                builder.values().append_value("A");
7497                builder.values().append_value("B");
7498                builder.append(true);
7499                // [ ] (empty list)
7500                builder.append(true);
7501                // Null
7502                builder.values().append_value("?"); // irrelevant
7503                builder.append(false);
7504                Arc::new(builder.finish())
7505            },
7506            // large list array
7507            {
7508                let values_builder = StringBuilder::new();
7509                let mut builder = LargeListBuilder::new(values_builder);
7510                // [A, B]
7511                builder.values().append_value("A");
7512                builder.values().append_value("B");
7513                builder.append(true);
7514                // [ ] (empty list)
7515                builder.append(true);
7516                // Null
7517                builder.append(false);
7518                Arc::new(builder.finish())
7519            },
7520            // fixed size list array
7521            {
7522                let values_builder = Int32Builder::new();
7523                let mut builder = FixedSizeListBuilder::new(values_builder, 3);
7524
7525                //  [[0, 1, 2], null, [3, null, 5]
7526                builder.values().append_value(0);
7527                builder.values().append_value(1);
7528                builder.values().append_value(2);
7529                builder.append(true);
7530                builder.values().append_null();
7531                builder.values().append_null();
7532                builder.values().append_null();
7533                builder.append(false);
7534                builder.values().append_value(3);
7535                builder.values().append_null();
7536                builder.values().append_value(5);
7537                builder.append(true);
7538                Arc::new(builder.finish())
7539            },
7540            // map
7541            {
7542                let string_builder = StringBuilder::new();
7543                let int_builder = Int32Builder::with_capacity(4);
7544
7545                let mut builder = MapBuilder::new(None, string_builder, int_builder);
7546                // {"joe": 1}
7547                builder.keys().append_value("joe");
7548                builder.values().append_value(1);
7549                builder.append(true).unwrap();
7550                // {}
7551                builder.append(true).unwrap();
7552                // null
7553                builder.append(false).unwrap();
7554
7555                Arc::new(builder.finish())
7556            },
7557        ];
7558
7559        for arr in cases {
7560            round_trip_through_scalar(arr);
7561        }
7562    }
7563
7564    /// for each row in `arr`:
7565    /// 1. convert to a `ScalarValue`
7566    /// 2. Convert `ScalarValue` back to an `ArrayRef`
7567    /// 3. Compare the original array (sliced) and new array for equality
7568    fn round_trip_through_scalar(arr: ArrayRef) {
7569        for i in 0..arr.len() {
7570            // convert Scalar --> Array
7571            let scalar = ScalarValue::try_from_array(&arr, i).unwrap();
7572            let array = scalar.to_array_of_size(1).unwrap();
7573            assert_eq!(array.len(), 1);
7574            assert_eq!(array.data_type(), arr.data_type());
7575            assert_eq!(array.as_ref(), arr.slice(i, 1).as_ref());
7576        }
7577    }
7578
7579    #[test]
7580    fn roundtrip_run_array() {
7581        // Comparison logic in round_trip_through_scalar doesn't work for RunArrays
7582        // so we have a custom test for them
7583        // TODO: https://github.com/apache/arrow-rs/pull/9213 might fix this ^
7584        let run_ends = Int16Array::from(vec![2, 3]);
7585        let values = Int64Array::from(vec![Some(1), None]);
7586        let run_array = RunArray::try_new(&run_ends, &values).unwrap();
7587        let run_array = run_array.downcast::<Int64Array>().unwrap();
7588
7589        let expected_values = run_array.into_iter().collect::<Vec<_>>();
7590
7591        for i in 0..run_array.len() {
7592            let scalar = ScalarValue::try_from_array(&run_array, i).unwrap();
7593            let array = scalar.to_array_of_size(1).unwrap();
7594            assert_eq!(array.data_type(), run_array.data_type());
7595            let array = array.as_run::<Int16Type>();
7596            let array = array.downcast::<Int64Array>().unwrap();
7597            assert_eq!(
7598                array.into_iter().collect::<Vec<_>>(),
7599                expected_values[i..i + 1]
7600            );
7601        }
7602    }
7603
7604    #[test]
7605    fn test_scalar_union_sparse() {
7606        let field_a = Arc::new(Field::new("A", DataType::Int32, true));
7607        let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
7608        let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
7609        let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
7610
7611        let mut values_a = vec![None; 6];
7612        values_a[0] = Some(42);
7613        let mut values_b = vec![None; 6];
7614        values_b[1] = Some(true);
7615        let mut values_c = vec![None; 6];
7616        values_c[2] = Some("foo");
7617        let children: Vec<ArrayRef> = vec![
7618            Arc::new(Int32Array::from(values_a)),
7619            Arc::new(BooleanArray::from(values_b)),
7620            Arc::new(StringArray::from(values_c)),
7621        ];
7622
7623        let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
7624        let array: ArrayRef = Arc::new(
7625            UnionArray::try_new(fields.clone(), type_ids, None, children)
7626                .expect("UnionArray"),
7627        );
7628
7629        let expected = [
7630            (0, ScalarValue::from(42)),
7631            (1, ScalarValue::from(true)),
7632            (2, ScalarValue::from("foo")),
7633            (0, ScalarValue::Int32(None)),
7634            (1, ScalarValue::Boolean(None)),
7635            (2, ScalarValue::Utf8(None)),
7636        ];
7637
7638        for (i, (ti, value)) in expected.into_iter().enumerate() {
7639            let is_null = value.is_null();
7640            let value = Some((ti, Box::new(value)));
7641            let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Sparse);
7642            let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
7643
7644            assert_eq!(
7645                actual, expected,
7646                "[{i}] {actual} was not equal to {expected}"
7647            );
7648
7649            assert!(
7650                expected.eq_array(&array, i).expect("eq_array"),
7651                "[{i}] {expected}.eq_array was false"
7652            );
7653
7654            if is_null {
7655                assert!(actual.is_null(), "[{i}] {actual} was not null")
7656            }
7657        }
7658    }
7659
7660    #[test]
7661    fn test_scalar_union_dense() {
7662        let field_a = Arc::new(Field::new("A", DataType::Int32, true));
7663        let field_b = Arc::new(Field::new("B", DataType::Boolean, true));
7664        let field_c = Arc::new(Field::new("C", DataType::Utf8, true));
7665        let fields = UnionFields::from_iter([(0, field_a), (1, field_b), (2, field_c)]);
7666        let children: Vec<ArrayRef> = vec![
7667            Arc::new(Int32Array::from(vec![Some(42), None])),
7668            Arc::new(BooleanArray::from(vec![Some(true), None])),
7669            Arc::new(StringArray::from(vec![Some("foo"), None])),
7670        ];
7671
7672        let type_ids = ScalarBuffer::from(vec![0, 1, 2, 0, 1, 2]);
7673        let offsets = ScalarBuffer::from(vec![0, 0, 0, 1, 1, 1]);
7674        let array: ArrayRef = Arc::new(
7675            UnionArray::try_new(fields.clone(), type_ids, Some(offsets), children)
7676                .expect("UnionArray"),
7677        );
7678
7679        let expected = [
7680            (0, ScalarValue::from(42)),
7681            (1, ScalarValue::from(true)),
7682            (2, ScalarValue::from("foo")),
7683            (0, ScalarValue::Int32(None)),
7684            (1, ScalarValue::Boolean(None)),
7685            (2, ScalarValue::Utf8(None)),
7686        ];
7687
7688        for (i, (ti, value)) in expected.into_iter().enumerate() {
7689            let is_null = value.is_null();
7690            let value = Some((ti, Box::new(value)));
7691            let expected = ScalarValue::Union(value, fields.clone(), UnionMode::Dense);
7692            let actual = ScalarValue::try_from_array(&array, i).expect("try_from_array");
7693
7694            assert_eq!(
7695                actual, expected,
7696                "[{i}] {actual} was not equal to {expected}"
7697            );
7698
7699            assert!(
7700                expected.eq_array(&array, i).expect("eq_array"),
7701                "[{i}] {expected}.eq_array was false"
7702            );
7703
7704            if is_null {
7705                assert!(actual.is_null(), "[{i}] {actual} was not null")
7706            }
7707        }
7708    }
7709
7710    #[test]
7711    fn test_lists_in_struct() {
7712        let field_a = Arc::new(Field::new("A", DataType::Utf8, false));
7713        let field_primitive_list = Arc::new(Field::new(
7714            "primitive_list",
7715            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
7716            false,
7717        ));
7718
7719        // Define primitive list scalars
7720        let l0 =
7721            ScalarValue::List(Arc::new(
7722                ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
7723                    Some(1),
7724                    Some(2),
7725                    Some(3),
7726                ])]),
7727            ));
7728        let l1 =
7729            ScalarValue::List(Arc::new(
7730                ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
7731                    Some(4),
7732                    Some(5),
7733                ])]),
7734            ));
7735        let l2 = ScalarValue::List(Arc::new(ListArray::from_iter_primitive::<
7736            Int32Type,
7737            _,
7738            _,
7739        >(vec![Some(vec![Some(6)])])));
7740
7741        // Define struct scalars
7742        let s0 = ScalarValue::from(vec![
7743            ("A", ScalarValue::from("First")),
7744            ("primitive_list", l0),
7745        ]);
7746
7747        let s1 = ScalarValue::from(vec![
7748            ("A", ScalarValue::from("Second")),
7749            ("primitive_list", l1),
7750        ]);
7751
7752        let s2 = ScalarValue::from(vec![
7753            ("A", ScalarValue::from("Third")),
7754            ("primitive_list", l2),
7755        ]);
7756
7757        // iter_to_array for struct scalars
7758        let array =
7759            ScalarValue::iter_to_array(vec![s0.clone(), s1.clone(), s2.clone()]).unwrap();
7760
7761        let array = as_struct_array(&array).unwrap();
7762        let expected = StructArray::from(vec![
7763            (
7764                Arc::clone(&field_a),
7765                Arc::new(StringArray::from(vec!["First", "Second", "Third"])) as ArrayRef,
7766            ),
7767            (
7768                Arc::clone(&field_primitive_list),
7769                Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
7770                    Some(vec![Some(1), Some(2), Some(3)]),
7771                    Some(vec![Some(4), Some(5)]),
7772                    Some(vec![Some(6)]),
7773                ])),
7774            ),
7775        ]);
7776
7777        assert_eq!(array, &expected);
7778
7779        // Define list-of-structs scalars
7780
7781        let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap();
7782        let nl0 = SingleRowListArrayBuilder::new(nl0_array).build_list_scalar();
7783
7784        let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap();
7785        let nl1 = SingleRowListArrayBuilder::new(nl1_array).build_list_scalar();
7786
7787        let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap();
7788        let nl2 = SingleRowListArrayBuilder::new(nl2_array).build_list_scalar();
7789
7790        // iter_to_array for list-of-struct
7791        let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap();
7792        let array = array.as_list::<i32>();
7793
7794        // Construct expected array with array builders
7795        let field_a_builder = StringBuilder::with_capacity(4, 1024);
7796        let primitive_value_builder = Int32Array::builder(8);
7797        let field_primitive_list_builder = ListBuilder::new(primitive_value_builder);
7798
7799        let element_builder = StructBuilder::new(
7800            vec![field_a, field_primitive_list],
7801            vec![
7802                Box::new(field_a_builder),
7803                Box::new(field_primitive_list_builder),
7804            ],
7805        );
7806
7807        let mut list_builder = ListBuilder::new(element_builder);
7808
7809        list_builder
7810            .values()
7811            .field_builder::<StringBuilder>(0)
7812            .unwrap()
7813            .append_value("First");
7814        list_builder
7815            .values()
7816            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7817            .unwrap()
7818            .values()
7819            .append_value(1);
7820        list_builder
7821            .values()
7822            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7823            .unwrap()
7824            .values()
7825            .append_value(2);
7826        list_builder
7827            .values()
7828            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7829            .unwrap()
7830            .values()
7831            .append_value(3);
7832        list_builder
7833            .values()
7834            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7835            .unwrap()
7836            .append(true);
7837        list_builder.values().append(true);
7838
7839        list_builder
7840            .values()
7841            .field_builder::<StringBuilder>(0)
7842            .unwrap()
7843            .append_value("Second");
7844        list_builder
7845            .values()
7846            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7847            .unwrap()
7848            .values()
7849            .append_value(4);
7850        list_builder
7851            .values()
7852            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7853            .unwrap()
7854            .values()
7855            .append_value(5);
7856        list_builder
7857            .values()
7858            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7859            .unwrap()
7860            .append(true);
7861        list_builder.values().append(true);
7862        list_builder.append(true);
7863
7864        list_builder
7865            .values()
7866            .field_builder::<StringBuilder>(0)
7867            .unwrap()
7868            .append_value("Third");
7869        list_builder
7870            .values()
7871            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7872            .unwrap()
7873            .values()
7874            .append_value(6);
7875        list_builder
7876            .values()
7877            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7878            .unwrap()
7879            .append(true);
7880        list_builder.values().append(true);
7881        list_builder.append(true);
7882
7883        list_builder
7884            .values()
7885            .field_builder::<StringBuilder>(0)
7886            .unwrap()
7887            .append_value("Second");
7888        list_builder
7889            .values()
7890            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7891            .unwrap()
7892            .values()
7893            .append_value(4);
7894        list_builder
7895            .values()
7896            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7897            .unwrap()
7898            .values()
7899            .append_value(5);
7900        list_builder
7901            .values()
7902            .field_builder::<ListBuilder<PrimitiveBuilder<Int32Type>>>(1)
7903            .unwrap()
7904            .append(true);
7905        list_builder.values().append(true);
7906        list_builder.append(true);
7907
7908        let expected = list_builder.finish();
7909
7910        assert_eq!(array, &expected);
7911    }
7912
7913    fn build_2d_list(data: Vec<Option<i32>>) -> ListArray {
7914        let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(data)]);
7915        ListArray::new(
7916            Arc::new(Field::new_list_field(
7917                DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
7918                true,
7919            )),
7920            OffsetBuffer::<i32>::from_lengths([1]),
7921            Arc::new(a1),
7922            None,
7923        )
7924    }
7925
7926    #[test]
7927    fn test_nested_lists() {
7928        // Define inner list scalars
7929        let arr1 = build_2d_list(vec![Some(1), Some(2), Some(3)]);
7930        let arr2 = build_2d_list(vec![Some(4), Some(5)]);
7931        let arr3 = build_2d_list(vec![Some(6)]);
7932
7933        let array = ScalarValue::iter_to_array(vec![
7934            ScalarValue::List(Arc::new(arr1)),
7935            ScalarValue::List(Arc::new(arr2)),
7936            ScalarValue::List(Arc::new(arr3)),
7937        ])
7938        .unwrap();
7939        let array = array.as_list::<i32>();
7940
7941        // Construct expected array with array builders
7942        let inner_builder = Int32Array::builder(6);
7943        let middle_builder = ListBuilder::new(inner_builder);
7944        let mut outer_builder = ListBuilder::new(middle_builder);
7945
7946        outer_builder.values().values().append_value(1);
7947        outer_builder.values().values().append_value(2);
7948        outer_builder.values().values().append_value(3);
7949        outer_builder.values().append(true);
7950        outer_builder.append(true);
7951
7952        outer_builder.values().values().append_value(4);
7953        outer_builder.values().values().append_value(5);
7954        outer_builder.values().append(true);
7955        outer_builder.append(true);
7956
7957        outer_builder.values().values().append_value(6);
7958        outer_builder.values().append(true);
7959        outer_builder.append(true);
7960
7961        let expected = outer_builder.finish();
7962
7963        assert_eq!(array, &expected);
7964    }
7965
7966    #[test]
7967    fn scalar_timestamp_ns_utc_timezone() {
7968        let scalar = ScalarValue::TimestampNanosecond(
7969            Some(1599566400000000000),
7970            Some("UTC".into()),
7971        );
7972
7973        assert_eq!(
7974            scalar.data_type(),
7975            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
7976        );
7977
7978        let array = scalar.to_array().expect("Failed to convert to array");
7979        assert_eq!(array.len(), 1);
7980        assert_eq!(
7981            array.data_type(),
7982            &DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
7983        );
7984
7985        let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
7986        assert_eq!(
7987            new_scalar.data_type(),
7988            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
7989        );
7990    }
7991
7992    #[test]
7993    fn cast_round_trip() {
7994        check_scalar_cast(ScalarValue::Int8(Some(5)), DataType::Int16);
7995        check_scalar_cast(ScalarValue::Int8(None), DataType::Int16);
7996
7997        check_scalar_cast(ScalarValue::Float64(Some(5.5)), DataType::Int16);
7998
7999        check_scalar_cast(ScalarValue::Float64(None), DataType::Int16);
8000
8001        check_scalar_cast(
8002            ScalarValue::from("foo"),
8003            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
8004        );
8005
8006        check_scalar_cast(
8007            ScalarValue::Utf8(None),
8008            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
8009        );
8010
8011        check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View);
8012        check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View);
8013        check_scalar_cast(
8014            ScalarValue::from("larger than 12 bytes string"),
8015            DataType::Utf8View,
8016        );
8017        check_scalar_cast(
8018            {
8019                let element_field =
8020                    Arc::new(Field::new("element", DataType::Int32, true));
8021
8022                let mut builder =
8023                    ListBuilder::new(Int32Builder::new()).with_field(element_field);
8024                builder.append_value([Some(1)]);
8025                builder.append(true);
8026
8027                ScalarValue::List(Arc::new(builder.finish()))
8028            },
8029            DataType::List(Arc::new(Field::new("element", DataType::Int64, true))),
8030        );
8031        check_scalar_cast(
8032            {
8033                let element_field =
8034                    Arc::new(Field::new("element", DataType::Int32, true));
8035
8036                let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 1)
8037                    .with_field(element_field);
8038                builder.values().append_value(1);
8039                builder.append(true);
8040
8041                ScalarValue::FixedSizeList(Arc::new(builder.finish()))
8042            },
8043            DataType::FixedSizeList(
8044                Arc::new(Field::new("element", DataType::Int64, true)),
8045                1,
8046            ),
8047        );
8048        check_scalar_cast(
8049            {
8050                let element_field =
8051                    Arc::new(Field::new("element", DataType::Int32, true));
8052
8053                let mut builder =
8054                    LargeListBuilder::new(Int32Builder::new()).with_field(element_field);
8055                builder.append_value([Some(1)]);
8056                builder.append(true);
8057
8058                ScalarValue::LargeList(Arc::new(builder.finish()))
8059            },
8060            DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))),
8061        );
8062    }
8063
8064    // mimics how casting work on scalar values by `casting` `scalar` to `desired_type`
8065    fn check_scalar_cast(scalar: ScalarValue, desired_type: DataType) {
8066        // convert from scalar --> Array to call cast
8067        let scalar_array = scalar.to_array().expect("Failed to convert to array");
8068        // cast the actual value
8069        let cast_array = kernels::cast::cast(&scalar_array, &desired_type).unwrap();
8070
8071        // turn it back to a scalar
8072        let cast_scalar = ScalarValue::try_from_array(&cast_array, 0).unwrap();
8073        assert_eq!(cast_scalar.data_type(), desired_type);
8074
8075        // Some time later the "cast" scalar is turned back into an array:
8076        let array = cast_scalar
8077            .to_array_of_size(10)
8078            .expect("Failed to convert to array of size");
8079
8080        // The datatype should be "Dictionary" but is actually Utf8!!!
8081        assert_eq!(array.data_type(), &desired_type)
8082    }
8083
8084    #[test]
8085    fn test_scalar_negative() -> Result<()> {
8086        // positive test
8087        let value = ScalarValue::Int32(Some(12));
8088        assert_eq!(ScalarValue::Int32(Some(-12)), value.arithmetic_negate()?);
8089        let value = ScalarValue::Int32(None);
8090        assert_eq!(ScalarValue::Int32(None), value.arithmetic_negate()?);
8091
8092        // negative test
8093        let value = ScalarValue::UInt8(Some(12));
8094        assert!(value.arithmetic_negate().is_err());
8095        let value = ScalarValue::Boolean(None);
8096        assert!(value.arithmetic_negate().is_err());
8097        Ok(())
8098    }
8099
8100    #[test]
8101    fn test_scalar_negative_overflows() -> Result<()> {
8102        macro_rules! test_overflow_on_value {
8103            ($($val:expr),* $(,)?) => {$(
8104                {
8105                    let value: ScalarValue = $val;
8106                    let err = value.arithmetic_negate().expect_err("Should receive overflow error on negating {value:?}");
8107                    let root_err = err.find_root();
8108                    match  root_err{
8109                        DataFusionError::ArrowError(err, _) if matches!(err.as_ref(), ArrowError::ArithmeticOverflow(_)) => {}
8110                        _ => return Err(err),
8111                    };
8112                }
8113            )*};
8114        }
8115        test_overflow_on_value!(
8116            // the integers
8117            i8::MIN.into(),
8118            i16::MIN.into(),
8119            i32::MIN.into(),
8120            i64::MIN.into(),
8121            // for decimals, only value needs to be tested
8122            ScalarValue::try_new_decimal128(i128::MIN, 10, 5)?,
8123            ScalarValue::Decimal256(Some(i256::MIN), 20, 5),
8124            // interval, check all possible values
8125            ScalarValue::IntervalYearMonth(Some(i32::MIN)),
8126            ScalarValue::new_interval_dt(i32::MIN, 999),
8127            ScalarValue::new_interval_dt(1, i32::MIN),
8128            ScalarValue::new_interval_mdn(i32::MIN, 15, 123_456),
8129            ScalarValue::new_interval_mdn(12, i32::MIN, 123_456),
8130            ScalarValue::new_interval_mdn(12, 15, i64::MIN),
8131            // tz doesn't matter when negating
8132            ScalarValue::TimestampSecond(Some(i64::MIN), None),
8133            ScalarValue::TimestampMillisecond(Some(i64::MIN), None),
8134            ScalarValue::TimestampMicrosecond(Some(i64::MIN), None),
8135            ScalarValue::TimestampNanosecond(Some(i64::MIN), None),
8136        );
8137
8138        let float_cases = [
8139            (
8140                ScalarValue::Float16(Some(f16::MIN)),
8141                ScalarValue::Float16(Some(f16::MAX)),
8142            ),
8143            (
8144                ScalarValue::Float16(Some(f16::MAX)),
8145                ScalarValue::Float16(Some(f16::MIN)),
8146            ),
8147            (f32::MIN.into(), f32::MAX.into()),
8148            (f32::MAX.into(), f32::MIN.into()),
8149            (f64::MIN.into(), f64::MAX.into()),
8150            (f64::MAX.into(), f64::MIN.into()),
8151        ];
8152        // skip float 16 because they aren't supported
8153        for (test, expected) in float_cases.into_iter().skip(2) {
8154            assert_eq!(test.arithmetic_negate()?, expected);
8155        }
8156        Ok(())
8157    }
8158
8159    #[test]
8160    fn f16_test_overflow() {
8161        // TODO: if negate supports f16, add these cases to `test_scalar_negative_overflows` test case
8162        let cases = [
8163            (
8164                ScalarValue::Float16(Some(f16::MIN)),
8165                ScalarValue::Float16(Some(f16::MAX)),
8166            ),
8167            (
8168                ScalarValue::Float16(Some(f16::MAX)),
8169                ScalarValue::Float16(Some(f16::MIN)),
8170            ),
8171        ];
8172
8173        for (test, expected) in cases {
8174            assert_eq!(test.arithmetic_negate().unwrap(), expected);
8175        }
8176    }
8177
8178    macro_rules! expect_operation_error {
8179        ($TEST_NAME:ident, $FUNCTION:ident, $EXPECTED_ERROR:expr) => {
8180            #[test]
8181            fn $TEST_NAME() {
8182                let lhs = ScalarValue::UInt64(Some(12));
8183                let rhs = ScalarValue::Int32(Some(-3));
8184                match lhs.$FUNCTION(&rhs) {
8185                    Ok(_result) => {
8186                        panic!(
8187                            "Expected binary operation error between lhs: '{:?}', rhs: {:?}",
8188                            lhs, rhs
8189                        );
8190                    }
8191                    Err(e) => {
8192                        let error_message = e.to_string();
8193                        assert!(
8194                            error_message.contains($EXPECTED_ERROR),
8195                            "Expected error '{}' not found in actual error '{}'",
8196                            $EXPECTED_ERROR,
8197                            error_message
8198                        );
8199                    }
8200                }
8201            }
8202        };
8203    }
8204
8205    expect_operation_error!(
8206        expect_add_error,
8207        add,
8208        "Invalid arithmetic operation: UInt64 + Int32"
8209    );
8210    expect_operation_error!(
8211        expect_sub_error,
8212        sub,
8213        "Invalid arithmetic operation: UInt64 - Int32"
8214    );
8215
8216    macro_rules! decimal_op_test_cases {
8217    ($OPERATION:ident, [$([$L_VALUE:expr, $L_PRECISION:expr, $L_SCALE:expr, $R_VALUE:expr, $R_PRECISION:expr, $R_SCALE:expr, $O_VALUE:expr, $O_PRECISION:expr, $O_SCALE:expr]),+]) => {
8218            $(
8219
8220                let left = ScalarValue::Decimal128($L_VALUE, $L_PRECISION, $L_SCALE);
8221                let right = ScalarValue::Decimal128($R_VALUE, $R_PRECISION, $R_SCALE);
8222                let result = left.$OPERATION(&right).unwrap();
8223                assert_eq!(ScalarValue::Decimal128($O_VALUE, $O_PRECISION, $O_SCALE), result);
8224
8225            )+
8226        };
8227    }
8228
8229    #[test]
8230    fn decimal_operations() {
8231        decimal_op_test_cases!(
8232            add,
8233            [
8234                [Some(123), 10, 2, Some(124), 10, 2, Some(123 + 124), 11, 2],
8235                // test sum decimal with diff scale
8236                [
8237                    Some(123),
8238                    10,
8239                    3,
8240                    Some(124),
8241                    10,
8242                    2,
8243                    Some(123 + 124 * 10_i128.pow(1)),
8244                    12,
8245                    3
8246                ],
8247                // diff precision and scale for decimal data type
8248                [
8249                    Some(123),
8250                    10,
8251                    2,
8252                    Some(124),
8253                    11,
8254                    3,
8255                    Some(123 * 10_i128.pow(3 - 2) + 124),
8256                    12,
8257                    3
8258                ]
8259            ]
8260        );
8261    }
8262
8263    #[test]
8264    fn decimal_operations_with_nulls() {
8265        decimal_op_test_cases!(
8266            add,
8267            [
8268                // Case: (None, Some, 0)
8269                [None, 10, 2, Some(123), 10, 2, None, 11, 2],
8270                // Case: (Some, None, 0)
8271                [Some(123), 10, 2, None, 10, 2, None, 11, 2],
8272                // Case: (Some, None, _) + Side=False
8273                [Some(123), 8, 2, None, 10, 3, None, 11, 3],
8274                // Case: (None, Some, _) + Side=False
8275                [None, 8, 2, Some(123), 10, 3, None, 11, 3],
8276                // Case: (Some, None, _) + Side=True
8277                [Some(123), 8, 4, None, 10, 3, None, 12, 4],
8278                // Case: (None, Some, _) + Side=True
8279                [None, 10, 3, Some(123), 8, 4, None, 12, 4]
8280            ]
8281        );
8282    }
8283
8284    #[test]
8285    fn test_scalar_distance() {
8286        let cases = [
8287            // scalar (lhs), scalar (rhs), expected distance
8288            // ---------------------------------------------
8289            (ScalarValue::Int8(Some(1)), ScalarValue::Int8(Some(2)), 1),
8290            (ScalarValue::Int8(Some(2)), ScalarValue::Int8(Some(1)), 1),
8291            (
8292                ScalarValue::Int16(Some(-5)),
8293                ScalarValue::Int16(Some(5)),
8294                10,
8295            ),
8296            (
8297                ScalarValue::Int16(Some(5)),
8298                ScalarValue::Int16(Some(-5)),
8299                10,
8300            ),
8301            (ScalarValue::Int32(Some(0)), ScalarValue::Int32(Some(0)), 0),
8302            (
8303                ScalarValue::Int32(Some(-5)),
8304                ScalarValue::Int32(Some(-10)),
8305                5,
8306            ),
8307            (
8308                ScalarValue::Int64(Some(-10)),
8309                ScalarValue::Int64(Some(-5)),
8310                5,
8311            ),
8312            (ScalarValue::UInt8(Some(1)), ScalarValue::UInt8(Some(2)), 1),
8313            (ScalarValue::UInt8(Some(0)), ScalarValue::UInt8(Some(0)), 0),
8314            (
8315                ScalarValue::UInt16(Some(5)),
8316                ScalarValue::UInt16(Some(10)),
8317                5,
8318            ),
8319            (
8320                ScalarValue::UInt32(Some(10)),
8321                ScalarValue::UInt32(Some(5)),
8322                5,
8323            ),
8324            (
8325                ScalarValue::UInt64(Some(5)),
8326                ScalarValue::UInt64(Some(10)),
8327                5,
8328            ),
8329            (
8330                ScalarValue::Float16(Some(f16::from_f32(1.1))),
8331                ScalarValue::Float16(Some(f16::from_f32(1.9))),
8332                1,
8333            ),
8334            (
8335                ScalarValue::Float16(Some(f16::from_f32(-5.3))),
8336                ScalarValue::Float16(Some(f16::from_f32(-9.2))),
8337                4,
8338            ),
8339            (
8340                ScalarValue::Float16(Some(f16::from_f32(-5.3))),
8341                ScalarValue::Float16(Some(f16::from_f32(-9.7))),
8342                4,
8343            ),
8344            (
8345                ScalarValue::Float32(Some(1.0)),
8346                ScalarValue::Float32(Some(2.0)),
8347                1,
8348            ),
8349            (
8350                ScalarValue::Float32(Some(2.0)),
8351                ScalarValue::Float32(Some(1.0)),
8352                1,
8353            ),
8354            (
8355                ScalarValue::Float64(Some(0.0)),
8356                ScalarValue::Float64(Some(0.0)),
8357                0,
8358            ),
8359            (
8360                ScalarValue::Float64(Some(-5.0)),
8361                ScalarValue::Float64(Some(-10.0)),
8362                5,
8363            ),
8364            (
8365                ScalarValue::Float64(Some(-10.0)),
8366                ScalarValue::Float64(Some(-5.0)),
8367                5,
8368            ),
8369            // Floats are currently special cased to f64/f32 and the result is rounded
8370            // rather than ceiled/floored. In the future we might want to take a mode
8371            // which specified the rounding behavior.
8372            (
8373                ScalarValue::Float32(Some(1.2)),
8374                ScalarValue::Float32(Some(1.3)),
8375                0,
8376            ),
8377            (
8378                ScalarValue::Float32(Some(1.1)),
8379                ScalarValue::Float32(Some(1.9)),
8380                1,
8381            ),
8382            (
8383                ScalarValue::Float64(Some(-5.3)),
8384                ScalarValue::Float64(Some(-9.2)),
8385                4,
8386            ),
8387            (
8388                ScalarValue::Float64(Some(-5.3)),
8389                ScalarValue::Float64(Some(-9.7)),
8390                4,
8391            ),
8392            (
8393                ScalarValue::Float64(Some(-5.3)),
8394                ScalarValue::Float64(Some(-9.9)),
8395                5,
8396            ),
8397            (
8398                ScalarValue::Decimal128(Some(10), 1, 0),
8399                ScalarValue::Decimal128(Some(5), 1, 0),
8400                5,
8401            ),
8402            (
8403                ScalarValue::Decimal128(Some(5), 1, 0),
8404                ScalarValue::Decimal128(Some(10), 1, 0),
8405                5,
8406            ),
8407            (
8408                ScalarValue::Decimal256(Some(10.into()), 1, 0),
8409                ScalarValue::Decimal256(Some(5.into()), 1, 0),
8410                5,
8411            ),
8412            (
8413                ScalarValue::Decimal256(Some(5.into()), 1, 0),
8414                ScalarValue::Decimal256(Some(10.into()), 1, 0),
8415                5,
8416            ),
8417        ];
8418        for (lhs, rhs, expected) in cases.iter() {
8419            let distance = lhs.distance(rhs).unwrap();
8420            assert_eq!(distance, *expected);
8421        }
8422    }
8423
8424    #[test]
8425    fn test_distance_none() {
8426        let cases = [
8427            (
8428                ScalarValue::Decimal128(Some(i128::MAX), DECIMAL128_MAX_PRECISION, 0),
8429                ScalarValue::Decimal128(Some(-i128::MAX), DECIMAL128_MAX_PRECISION, 0),
8430            ),
8431            (
8432                ScalarValue::Decimal256(Some(i256::MAX), DECIMAL256_MAX_PRECISION, 0),
8433                ScalarValue::Decimal256(Some(-i256::MAX), DECIMAL256_MAX_PRECISION, 0),
8434            ),
8435        ];
8436        for (lhs, rhs) in cases.iter() {
8437            let distance = lhs.distance(rhs);
8438            assert!(distance.is_none(), "{lhs} vs {rhs}");
8439        }
8440    }
8441
8442    #[test]
8443    fn test_scalar_distance_invalid() {
8444        let cases = [
8445            // scalar (lhs), scalar (rhs)
8446            // --------------------------
8447            // Same type but with nulls
8448            (ScalarValue::Int8(None), ScalarValue::Int8(None)),
8449            (ScalarValue::Int8(None), ScalarValue::Int8(Some(1))),
8450            (ScalarValue::Int8(Some(1)), ScalarValue::Int8(None)),
8451            // Different type
8452            (ScalarValue::Int8(Some(1)), ScalarValue::Int16(Some(1))),
8453            (ScalarValue::Int8(Some(1)), ScalarValue::Float32(Some(1.0))),
8454            (
8455                ScalarValue::Float16(Some(f16::from_f32(1.0))),
8456                ScalarValue::Float32(Some(1.0)),
8457            ),
8458            (
8459                ScalarValue::Float16(Some(f16::from_f32(1.0))),
8460                ScalarValue::Int32(Some(1)),
8461            ),
8462            (
8463                ScalarValue::Float64(Some(1.1)),
8464                ScalarValue::Float32(Some(2.2)),
8465            ),
8466            (
8467                ScalarValue::UInt64(Some(777)),
8468                ScalarValue::Int32(Some(111)),
8469            ),
8470            // Different types with nulls
8471            (ScalarValue::Int8(None), ScalarValue::Int16(Some(1))),
8472            (ScalarValue::Int8(Some(1)), ScalarValue::Int16(None)),
8473            // Unsupported types
8474            (ScalarValue::from("foo"), ScalarValue::from("bar")),
8475            (
8476                ScalarValue::Boolean(Some(true)),
8477                ScalarValue::Boolean(Some(false)),
8478            ),
8479            (ScalarValue::Date32(Some(0)), ScalarValue::Date32(Some(1))),
8480            (ScalarValue::Date64(Some(0)), ScalarValue::Date64(Some(1))),
8481            (
8482                ScalarValue::Decimal128(Some(123), 5, 5),
8483                ScalarValue::Decimal128(Some(120), 5, 3),
8484            ),
8485            (
8486                ScalarValue::Decimal128(Some(123), 5, 5),
8487                ScalarValue::Decimal128(Some(120), 3, 5),
8488            ),
8489            (
8490                ScalarValue::Decimal256(Some(123.into()), 5, 5),
8491                ScalarValue::Decimal256(Some(120.into()), 3, 5),
8492            ),
8493            // Distance 2 * 2^50 is larger than usize
8494            (
8495                ScalarValue::Decimal256(
8496                    Some(i256::from_parts(0, 2_i64.pow(50).into())),
8497                    1,
8498                    0,
8499                ),
8500                ScalarValue::Decimal256(
8501                    Some(i256::from_parts(0, (-(2_i64).pow(50)).into())),
8502                    1,
8503                    0,
8504                ),
8505            ),
8506            // Distance overflow
8507            (
8508                ScalarValue::Decimal256(Some(i256::from_parts(0, i128::MAX)), 1, 0),
8509                ScalarValue::Decimal256(Some(i256::from_parts(0, -i128::MAX)), 1, 0),
8510            ),
8511        ];
8512        for (lhs, rhs) in cases {
8513            let distance = lhs.distance(&rhs);
8514            assert!(distance.is_none());
8515        }
8516    }
8517
8518    #[test]
8519    fn test_scalar_interval_negate() {
8520        let cases = [
8521            (
8522                ScalarValue::new_interval_ym(1, 12),
8523                ScalarValue::new_interval_ym(-1, -12),
8524            ),
8525            (
8526                ScalarValue::new_interval_dt(1, 999),
8527                ScalarValue::new_interval_dt(-1, -999),
8528            ),
8529            (
8530                ScalarValue::new_interval_mdn(12, 15, 123_456),
8531                ScalarValue::new_interval_mdn(-12, -15, -123_456),
8532            ),
8533        ];
8534        for (expr, expected) in cases.iter() {
8535            let result = expr.arithmetic_negate().unwrap();
8536            assert_eq!(*expected, result, "-expr:{expr:?}");
8537        }
8538    }
8539
8540    #[test]
8541    fn test_scalar_interval_add() {
8542        let cases = [
8543            (
8544                ScalarValue::new_interval_ym(1, 12),
8545                ScalarValue::new_interval_ym(1, 12),
8546                ScalarValue::new_interval_ym(2, 24),
8547            ),
8548            (
8549                ScalarValue::new_interval_dt(1, 999),
8550                ScalarValue::new_interval_dt(1, 999),
8551                ScalarValue::new_interval_dt(2, 1998),
8552            ),
8553            (
8554                ScalarValue::new_interval_mdn(12, 15, 123_456),
8555                ScalarValue::new_interval_mdn(12, 15, 123_456),
8556                ScalarValue::new_interval_mdn(24, 30, 246_912),
8557            ),
8558        ];
8559        for (lhs, rhs, expected) in cases.iter() {
8560            let result = lhs.add(rhs).unwrap();
8561            let result_commute = rhs.add(lhs).unwrap();
8562            assert_eq!(*expected, result, "lhs:{lhs:?} + rhs:{rhs:?}");
8563            assert_eq!(*expected, result_commute, "lhs:{rhs:?} + rhs:{lhs:?}");
8564        }
8565    }
8566
8567    #[test]
8568    fn test_scalar_interval_sub() {
8569        let cases = [
8570            (
8571                ScalarValue::new_interval_ym(1, 12),
8572                ScalarValue::new_interval_ym(1, 12),
8573                ScalarValue::new_interval_ym(0, 0),
8574            ),
8575            (
8576                ScalarValue::new_interval_dt(1, 999),
8577                ScalarValue::new_interval_dt(1, 999),
8578                ScalarValue::new_interval_dt(0, 0),
8579            ),
8580            (
8581                ScalarValue::new_interval_mdn(12, 15, 123_456),
8582                ScalarValue::new_interval_mdn(12, 15, 123_456),
8583                ScalarValue::new_interval_mdn(0, 0, 0),
8584            ),
8585        ];
8586        for (lhs, rhs, expected) in cases.iter() {
8587            let result = lhs.sub(rhs).unwrap();
8588            assert_eq!(*expected, result, "lhs:{lhs:?} - rhs:{rhs:?}");
8589        }
8590    }
8591
8592    #[test]
8593    fn timestamp_op_random_tests() {
8594        // timestamp1 + (or -) interval = timestamp2
8595        // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ?
8596        let sample_size = 1000;
8597        let timestamps1 = get_random_timestamps(sample_size);
8598        let intervals = get_random_intervals(sample_size);
8599        // ts(sec) + interval(ns) = ts(sec); however,
8600        // ts(sec) - ts(sec) cannot be = interval(ns). Therefore,
8601        // timestamps are more precise than intervals in tests.
8602        for (idx, ts1) in timestamps1.iter().enumerate() {
8603            if idx % 2 == 0 {
8604                let timestamp2 = ts1.add(intervals[idx].clone()).unwrap();
8605                let back = timestamp2.sub(intervals[idx].clone()).unwrap();
8606                assert_eq!(ts1, &back);
8607            } else {
8608                let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap();
8609                let back = timestamp2.add(intervals[idx].clone()).unwrap();
8610                assert_eq!(ts1, &back);
8611            };
8612        }
8613    }
8614
8615    #[test]
8616    fn test_struct_nulls() {
8617        let fields_b = Fields::from(vec![
8618            Field::new("ba", DataType::UInt64, true),
8619            Field::new("bb", DataType::UInt64, true),
8620        ]);
8621        let fields = Fields::from(vec![
8622            Field::new("a", DataType::UInt64, true),
8623            Field::new("b", DataType::Struct(fields_b.clone()), true),
8624        ]);
8625
8626        let struct_value = vec![
8627            (
8628                Arc::clone(&fields[0]),
8629                Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
8630            ),
8631            (
8632                Arc::clone(&fields[1]),
8633                Arc::new(StructArray::from(vec![
8634                    (
8635                        Arc::clone(&fields_b[0]),
8636                        Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
8637                    ),
8638                    (
8639                        Arc::clone(&fields_b[1]),
8640                        Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
8641                    ),
8642                ])) as ArrayRef,
8643            ),
8644        ];
8645
8646        let struct_value_with_nulls = vec![
8647            (
8648                Arc::clone(&fields[0]),
8649                Arc::new(UInt64Array::from(vec![Some(1)])) as ArrayRef,
8650            ),
8651            (
8652                Arc::clone(&fields[1]),
8653                Arc::new(StructArray::from((
8654                    vec![
8655                        (
8656                            Arc::clone(&fields_b[0]),
8657                            Arc::new(UInt64Array::from(vec![Some(2)])) as ArrayRef,
8658                        ),
8659                        (
8660                            Arc::clone(&fields_b[1]),
8661                            Arc::new(UInt64Array::from(vec![Some(3)])) as ArrayRef,
8662                        ),
8663                    ],
8664                    Buffer::from(&[0]),
8665                ))) as ArrayRef,
8666            ),
8667        ];
8668
8669        let scalars = vec![
8670            // all null
8671            ScalarValue::Struct(Arc::new(StructArray::from((
8672                struct_value.clone(),
8673                Buffer::from(&[0]),
8674            )))),
8675            // field 1 valid, field 2 null
8676            ScalarValue::Struct(Arc::new(StructArray::from((
8677                struct_value_with_nulls.clone(),
8678                Buffer::from(&[1]),
8679            )))),
8680            // all valid
8681            ScalarValue::Struct(Arc::new(StructArray::from((
8682                struct_value.clone(),
8683                Buffer::from(&[1]),
8684            )))),
8685        ];
8686
8687        let check_array = |array| {
8688            let is_null = is_null(&array).unwrap();
8689            assert_eq!(is_null, BooleanArray::from(vec![true, false, false]));
8690
8691            let formatted = pretty_format_columns("col", &[array]).unwrap().to_string();
8692            let formatted = formatted.split('\n').collect::<Vec<_>>();
8693            let expected = vec![
8694                "+---------------------------+",
8695                "| col                       |",
8696                "+---------------------------+",
8697                "|                           |",
8698                "| {a: 1, b: }               |",
8699                "| {a: 1, b: {ba: 2, bb: 3}} |",
8700                "+---------------------------+",
8701            ];
8702            assert_eq!(
8703                formatted, expected,
8704                "Actual:\n{formatted:#?}\n\nExpected:\n{expected:#?}"
8705            );
8706        };
8707
8708        // test `ScalarValue::iter_to_array`
8709        let array = ScalarValue::iter_to_array(scalars.clone()).unwrap();
8710        check_array(array);
8711
8712        // test `ScalarValue::to_array` / `ScalarValue::to_array_of_size`
8713        let arrays = scalars
8714            .iter()
8715            .map(ScalarValue::to_array)
8716            .collect::<Result<Vec<_>>>()
8717            .expect("Failed to convert to array");
8718        let arrays = arrays.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
8719        let array = arrow::compute::concat(&arrays).unwrap();
8720        check_array(array);
8721    }
8722
8723    #[test]
8724    fn test_struct_display() {
8725        let field_a = Field::new("a", DataType::Int32, true);
8726        let field_b = Field::new("b", DataType::Utf8, true);
8727
8728        let s = ScalarStructBuilder::new()
8729            .with_scalar(field_a, ScalarValue::from(1i32))
8730            .with_scalar(field_b, ScalarValue::Utf8(None))
8731            .build()
8732            .unwrap();
8733
8734        assert_eq!(s.to_string(), "{a:1,b:}");
8735        assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:})"#);
8736
8737        let ScalarValue::Struct(arr) = s else {
8738            panic!("Expected struct");
8739        };
8740
8741        //verify compared to arrow display
8742        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
8743        assert_snapshot!(batches_to_string(&[batch]), @r"
8744        +-------------+
8745        | s           |
8746        +-------------+
8747        | {a: 1, b: } |
8748        +-------------+
8749        ");
8750    }
8751
8752    #[test]
8753    fn test_null_bug() {
8754        let field_a = Field::new("a", DataType::Int32, true);
8755        let field_b = Field::new("b", DataType::Int32, true);
8756        let fields = Fields::from(vec![field_a, field_b]);
8757
8758        let array_a = Arc::new(Int32Array::from_iter_values([1]));
8759        let array_b = Arc::new(Int32Array::from_iter_values([2]));
8760        let arrays: Vec<ArrayRef> = vec![array_a, array_b];
8761
8762        let mut not_nulls = NullBufferBuilder::new(1);
8763
8764        not_nulls.append_non_null();
8765
8766        let ar = StructArray::new(fields, arrays, not_nulls.finish());
8767        let s = ScalarValue::Struct(Arc::new(ar));
8768
8769        assert_eq!(s.to_string(), "{a:1,b:2}");
8770        assert_eq!(format!("{s:?}"), r#"Struct({a:1,b:2})"#);
8771
8772        let ScalarValue::Struct(arr) = s else {
8773            panic!("Expected struct");
8774        };
8775
8776        //verify compared to arrow display
8777        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
8778        assert_snapshot!(batches_to_string(&[batch]), @r"
8779        +--------------+
8780        | s            |
8781        +--------------+
8782        | {a: 1, b: 2} |
8783        +--------------+
8784        ");
8785    }
8786
8787    #[test]
8788    fn test_display_date64_large_values() {
8789        assert_eq!(
8790            format!("{}", ScalarValue::Date64(Some(790179464505))),
8791            "1995-01-15"
8792        );
8793        // This used to panic, see https://github.com/apache/arrow-rs/issues/7728
8794        assert_eq!(
8795            format!("{}", ScalarValue::Date64(Some(-790179464505600000))),
8796            ""
8797        );
8798    }
8799
8800    #[test]
8801    fn test_struct_display_null() {
8802        let fields = vec![Field::new("a", DataType::Int32, false)];
8803        let s = ScalarStructBuilder::new_null(fields);
8804        assert_eq!(s.to_string(), "NULL");
8805
8806        let ScalarValue::Struct(arr) = s else {
8807            panic!("Expected struct");
8808        };
8809
8810        //verify compared to arrow display
8811        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
8812
8813        assert_snapshot!(batches_to_string(&[batch]), @r"
8814        +---+
8815        | s |
8816        +---+
8817        |   |
8818        +---+
8819        ");
8820    }
8821
8822    #[test]
8823    fn test_map_display_and_debug() {
8824        let string_builder = StringBuilder::new();
8825        let int_builder = Int32Builder::with_capacity(4);
8826        let mut builder = MapBuilder::new(None, string_builder, int_builder);
8827        builder.keys().append_value("joe");
8828        builder.values().append_value(1);
8829        builder.append(true).unwrap();
8830
8831        builder.keys().append_value("blogs");
8832        builder.values().append_value(2);
8833        builder.keys().append_value("foo");
8834        builder.values().append_value(4);
8835        builder.append(true).unwrap();
8836        builder.append(true).unwrap();
8837        builder.append(false).unwrap();
8838
8839        let map_value = ScalarValue::Map(Arc::new(builder.finish()));
8840
8841        assert_eq!(map_value.to_string(), "[{joe:1},{blogs:2,foo:4},{},NULL]");
8842        assert_eq!(
8843            format!("{map_value:?}"),
8844            r#"Map([{"joe":"1"},{"blogs":"2","foo":"4"},{},NULL])"#
8845        );
8846
8847        let ScalarValue::Map(arr) = map_value else {
8848            panic!("Expected map");
8849        };
8850
8851        //verify compared to arrow display
8852        let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap();
8853        assert_snapshot!(batches_to_string(&[batch]), @r"
8854        +--------------------+
8855        | m                  |
8856        +--------------------+
8857        | {joe: 1}           |
8858        | {blogs: 2, foo: 4} |
8859        | {}                 |
8860        |                    |
8861        +--------------------+
8862        ");
8863    }
8864
8865    #[test]
8866    fn test_binary_display() {
8867        let no_binary_value = ScalarValue::Binary(None);
8868        assert_eq!(format!("{no_binary_value}"), "NULL");
8869        let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
8870        assert_eq!(format!("{single_binary_value}"), "2A");
8871        let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
8872        assert_eq!(format!("{small_binary_value}"), "010203");
8873        let large_binary_value =
8874            ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8875        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8876
8877        let no_binary_value = ScalarValue::BinaryView(None);
8878        assert_eq!(format!("{no_binary_value}"), "NULL");
8879        let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
8880        assert_eq!(format!("{small_binary_value}"), "010203");
8881        let large_binary_value =
8882            ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8883        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8884
8885        let no_binary_value = ScalarValue::LargeBinary(None);
8886        assert_eq!(format!("{no_binary_value}"), "NULL");
8887        let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
8888        assert_eq!(format!("{small_binary_value}"), "010203");
8889        let large_binary_value =
8890            ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8891        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8892
8893        let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
8894        assert_eq!(format!("{no_binary_value}"), "NULL");
8895        let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
8896        assert_eq!(format!("{small_binary_value}"), "010203");
8897        let large_binary_value = ScalarValue::FixedSizeBinary(
8898            11,
8899            Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
8900        );
8901        assert_eq!(format!("{large_binary_value}"), "0102030405060708090A...");
8902    }
8903
8904    #[test]
8905    fn test_binary_debug() {
8906        let no_binary_value = ScalarValue::Binary(None);
8907        assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)");
8908        let single_binary_value = ScalarValue::Binary(Some(vec![42u8]));
8909        assert_eq!(format!("{single_binary_value:?}"), "Binary(\"42\")");
8910        let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3]));
8911        assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")");
8912        let large_binary_value =
8913            ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8914        assert_eq!(
8915            format!("{large_binary_value:?}"),
8916            "Binary(\"1,2,3,4,5,6,7,8,9,10,11\")"
8917        );
8918
8919        let no_binary_value = ScalarValue::BinaryView(None);
8920        assert_eq!(format!("{no_binary_value:?}"), "BinaryView(NULL)");
8921        let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3]));
8922        assert_eq!(format!("{small_binary_value:?}"), "BinaryView(\"1,2,3\")");
8923        let large_binary_value =
8924            ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8925        assert_eq!(
8926            format!("{large_binary_value:?}"),
8927            "BinaryView(\"1,2,3,4,5,6,7,8,9,10,11\")"
8928        );
8929
8930        let no_binary_value = ScalarValue::LargeBinary(None);
8931        assert_eq!(format!("{no_binary_value:?}"), "LargeBinary(NULL)");
8932        let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3]));
8933        assert_eq!(format!("{small_binary_value:?}"), "LargeBinary(\"1,2,3\")");
8934        let large_binary_value =
8935            ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]));
8936        assert_eq!(
8937            format!("{large_binary_value:?}"),
8938            "LargeBinary(\"1,2,3,4,5,6,7,8,9,10,11\")"
8939        );
8940
8941        let no_binary_value = ScalarValue::FixedSizeBinary(3, None);
8942        assert_eq!(format!("{no_binary_value:?}"), "FixedSizeBinary(3, NULL)");
8943        let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3]));
8944        assert_eq!(
8945            format!("{small_binary_value:?}"),
8946            "FixedSizeBinary(3, \"1,2,3\")"
8947        );
8948        let large_binary_value = ScalarValue::FixedSizeBinary(
8949            11,
8950            Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
8951        );
8952        assert_eq!(
8953            format!("{large_binary_value:?}"),
8954            "FixedSizeBinary(11, \"1,2,3,4,5,6,7,8,9,10,11\")"
8955        );
8956    }
8957
8958    #[test]
8959    fn test_build_timestamp_millisecond_list() {
8960        let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)];
8961        let arr = ScalarValue::new_list_nullable(
8962            &values,
8963            &DataType::Timestamp(TimeUnit::Millisecond, None),
8964        );
8965        assert_eq!(1, arr.len());
8966    }
8967
8968    #[test]
8969    fn test_newlist_timestamp_zone() {
8970        let s: &'static str = "UTC";
8971        let values = vec![ScalarValue::TimestampMillisecond(Some(1), Some(s.into()))];
8972        let arr = ScalarValue::new_list_nullable(
8973            &values,
8974            &DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
8975        );
8976        assert_eq!(1, arr.len());
8977        assert_eq!(
8978            arr.data_type(),
8979            &DataType::List(Arc::new(Field::new_list_field(
8980                DataType::Timestamp(TimeUnit::Millisecond, Some(s.into())),
8981                true,
8982            )))
8983        );
8984    }
8985
8986    fn get_random_timestamps(sample_size: u64) -> Vec<ScalarValue> {
8987        let vector_size = sample_size;
8988        let mut timestamp = vec![];
8989        let mut rng = rand::rng();
8990        for i in 0..vector_size {
8991            let year = rng.random_range(1995..=2050);
8992            let month = rng.random_range(1..=12);
8993            let day = rng.random_range(1..=28); // to exclude invalid dates
8994            let hour = rng.random_range(0..=23);
8995            let minute = rng.random_range(0..=59);
8996            let second = rng.random_range(0..=59);
8997            if i % 4 == 0 {
8998                timestamp.push(ScalarValue::TimestampSecond(
8999                    Some(
9000                        NaiveDate::from_ymd_opt(year, month, day)
9001                            .unwrap()
9002                            .and_hms_opt(hour, minute, second)
9003                            .unwrap()
9004                            .and_utc()
9005                            .timestamp(),
9006                    ),
9007                    None,
9008                ))
9009            } else if i % 4 == 1 {
9010                let millisec = rng.random_range(0..=999);
9011                timestamp.push(ScalarValue::TimestampMillisecond(
9012                    Some(
9013                        NaiveDate::from_ymd_opt(year, month, day)
9014                            .unwrap()
9015                            .and_hms_milli_opt(hour, minute, second, millisec)
9016                            .unwrap()
9017                            .and_utc()
9018                            .timestamp_millis(),
9019                    ),
9020                    None,
9021                ))
9022            } else if i % 4 == 2 {
9023                let microsec = rng.random_range(0..=999_999);
9024                timestamp.push(ScalarValue::TimestampMicrosecond(
9025                    Some(
9026                        NaiveDate::from_ymd_opt(year, month, day)
9027                            .unwrap()
9028                            .and_hms_micro_opt(hour, minute, second, microsec)
9029                            .unwrap()
9030                            .and_utc()
9031                            .timestamp_micros(),
9032                    ),
9033                    None,
9034                ))
9035            } else if i % 4 == 3 {
9036                let nanosec = rng.random_range(0..=999_999_999);
9037                timestamp.push(ScalarValue::TimestampNanosecond(
9038                    Some(
9039                        NaiveDate::from_ymd_opt(year, month, day)
9040                            .unwrap()
9041                            .and_hms_nano_opt(hour, minute, second, nanosec)
9042                            .unwrap()
9043                            .and_utc()
9044                            .timestamp_nanos_opt()
9045                            .unwrap(),
9046                    ),
9047                    None,
9048                ))
9049            }
9050        }
9051        timestamp
9052    }
9053
9054    fn get_random_intervals(sample_size: u64) -> Vec<ScalarValue> {
9055        const MILLISECS_IN_ONE_DAY: i64 = 86_400_000;
9056        const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000;
9057
9058        let vector_size = sample_size;
9059        let mut intervals = vec![];
9060        let mut rng = rand::rng();
9061        const SECS_IN_ONE_DAY: i32 = 86_400;
9062        const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000;
9063        for i in 0..vector_size {
9064            if i % 4 == 0 {
9065                let days = rng.random_range(0..5000);
9066                // to not break second precision
9067                let millis = rng.random_range(0..SECS_IN_ONE_DAY) * 1000;
9068                intervals.push(ScalarValue::new_interval_dt(days, millis));
9069            } else if i % 4 == 1 {
9070                let days = rng.random_range(0..5000);
9071                let millisec = rng.random_range(0..(MILLISECS_IN_ONE_DAY as i32));
9072                intervals.push(ScalarValue::new_interval_dt(days, millisec));
9073            } else if i % 4 == 2 {
9074                let days = rng.random_range(0..5000);
9075                // to not break microsec precision
9076                let nanosec = rng.random_range(0..MICROSECS_IN_ONE_DAY) * 1000;
9077                intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
9078            } else {
9079                let days = rng.random_range(0..5000);
9080                let nanosec = rng.random_range(0..NANOSECS_IN_ONE_DAY);
9081                intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec));
9082            }
9083        }
9084        intervals
9085    }
9086
9087    fn union_fields() -> UnionFields {
9088        [
9089            (0, Arc::new(Field::new("A", DataType::Int32, true))),
9090            (1, Arc::new(Field::new("B", DataType::Float64, true))),
9091        ]
9092        .into_iter()
9093        .collect()
9094    }
9095
9096    #[test]
9097    fn sparse_scalar_union_is_null() {
9098        let sparse_scalar = ScalarValue::Union(
9099            Some((0_i8, Box::new(ScalarValue::Int32(None)))),
9100            union_fields(),
9101            UnionMode::Sparse,
9102        );
9103        assert!(sparse_scalar.is_null());
9104    }
9105
9106    #[test]
9107    fn dense_scalar_union_is_null() {
9108        let dense_scalar = ScalarValue::Union(
9109            Some((0_i8, Box::new(ScalarValue::Int32(None)))),
9110            union_fields(),
9111            UnionMode::Dense,
9112        );
9113        assert!(dense_scalar.is_null());
9114    }
9115
9116    #[test]
9117    fn cast_date_to_timestamp_overflow_returns_error() {
9118        let scalar = ScalarValue::Date32(Some(i32::MAX));
9119        let err = scalar
9120            .cast_to(&DataType::Timestamp(TimeUnit::Nanosecond, None))
9121            .expect_err("expected cast to fail");
9122        assert!(
9123            err.to_string()
9124                .contains("converted value exceeds the representable i64 range"),
9125            "unexpected error: {err}"
9126        );
9127    }
9128
9129    #[test]
9130    fn null_dictionary_scalar_produces_null_dictionary_array() {
9131        let dictionary_scalar = ScalarValue::Dictionary(
9132            Box::new(DataType::Int32),
9133            Box::new(ScalarValue::Null),
9134        );
9135        assert!(dictionary_scalar.is_null());
9136        let dictionary_array = dictionary_scalar.to_array().unwrap();
9137        assert!(dictionary_array.is_null(0));
9138    }
9139
9140    #[test]
9141    fn test_scalar_value_try_new_null() {
9142        let scalars = vec![
9143            ScalarValue::try_new_null(&DataType::Boolean).unwrap(),
9144            ScalarValue::try_new_null(&DataType::Int8).unwrap(),
9145            ScalarValue::try_new_null(&DataType::Int16).unwrap(),
9146            ScalarValue::try_new_null(&DataType::Int32).unwrap(),
9147            ScalarValue::try_new_null(&DataType::Int64).unwrap(),
9148            ScalarValue::try_new_null(&DataType::UInt8).unwrap(),
9149            ScalarValue::try_new_null(&DataType::UInt16).unwrap(),
9150            ScalarValue::try_new_null(&DataType::UInt32).unwrap(),
9151            ScalarValue::try_new_null(&DataType::UInt64).unwrap(),
9152            ScalarValue::try_new_null(&DataType::Float16).unwrap(),
9153            ScalarValue::try_new_null(&DataType::Float32).unwrap(),
9154            ScalarValue::try_new_null(&DataType::Float64).unwrap(),
9155            ScalarValue::try_new_null(&DataType::Decimal128(42, 42)).unwrap(),
9156            ScalarValue::try_new_null(&DataType::Decimal256(42, 42)).unwrap(),
9157            ScalarValue::try_new_null(&DataType::Utf8).unwrap(),
9158            ScalarValue::try_new_null(&DataType::LargeUtf8).unwrap(),
9159            ScalarValue::try_new_null(&DataType::Utf8View).unwrap(),
9160            ScalarValue::try_new_null(&DataType::Binary).unwrap(),
9161            ScalarValue::try_new_null(&DataType::BinaryView).unwrap(),
9162            ScalarValue::try_new_null(&DataType::FixedSizeBinary(42)).unwrap(),
9163            ScalarValue::try_new_null(&DataType::LargeBinary).unwrap(),
9164            ScalarValue::try_new_null(&DataType::Date32).unwrap(),
9165            ScalarValue::try_new_null(&DataType::Date64).unwrap(),
9166            ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Second)).unwrap(),
9167            ScalarValue::try_new_null(&DataType::Time32(TimeUnit::Millisecond)).unwrap(),
9168            ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Microsecond)).unwrap(),
9169            ScalarValue::try_new_null(&DataType::Time64(TimeUnit::Nanosecond)).unwrap(),
9170            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Second, None))
9171                .unwrap(),
9172            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Millisecond, None))
9173                .unwrap(),
9174            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Microsecond, None))
9175                .unwrap(),
9176            ScalarValue::try_new_null(&DataType::Timestamp(TimeUnit::Nanosecond, None))
9177                .unwrap(),
9178            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::YearMonth))
9179                .unwrap(),
9180            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::DayTime))
9181                .unwrap(),
9182            ScalarValue::try_new_null(&DataType::Interval(IntervalUnit::MonthDayNano))
9183                .unwrap(),
9184            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Second)).unwrap(),
9185            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Microsecond))
9186                .unwrap(),
9187            ScalarValue::try_new_null(&DataType::Duration(TimeUnit::Nanosecond)).unwrap(),
9188            ScalarValue::try_new_null(&DataType::Null).unwrap(),
9189        ];
9190        assert!(scalars.iter().all(|s| s.is_null()));
9191
9192        let field_ref = Arc::new(Field::new("foo", DataType::Int32, true));
9193        let map_field_ref = Arc::new(Field::new(
9194            "foo",
9195            DataType::Struct(Fields::from(vec![
9196                Field::new("bar", DataType::Utf8, true),
9197                Field::new("baz", DataType::Int32, true),
9198            ])),
9199            true,
9200        ));
9201        let scalars = [
9202            ScalarValue::try_new_null(&DataType::List(Arc::clone(&field_ref))).unwrap(),
9203            ScalarValue::try_new_null(&DataType::LargeList(Arc::clone(&field_ref)))
9204                .unwrap(),
9205            ScalarValue::try_new_null(&DataType::FixedSizeList(
9206                Arc::clone(&field_ref),
9207                42,
9208            ))
9209            .unwrap(),
9210            ScalarValue::try_new_null(&DataType::Struct(
9211                vec![Arc::clone(&field_ref)].into(),
9212            ))
9213            .unwrap(),
9214            ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(),
9215            ScalarValue::try_new_null(&DataType::Union(
9216                UnionFields::try_new(vec![42], vec![field_ref]).unwrap(),
9217                UnionMode::Dense,
9218            ))
9219            .unwrap(),
9220        ];
9221        assert!(scalars.iter().all(|s| s.is_null()));
9222    }
9223
9224    // `err.to_string()` depends on backtrace being present (may have backtrace appended)
9225    // `err.strip_backtrace()` also depends on backtrace being present (may have "This was likely caused by ..." stripped)
9226    fn assert_starts_with(actual: impl AsRef<str>, expected_prefix: impl AsRef<str>) {
9227        let actual = actual.as_ref();
9228        let expected_prefix = expected_prefix.as_ref();
9229        assert!(
9230            actual.starts_with(expected_prefix),
9231            "Expected '{actual}' to start with '{expected_prefix}'"
9232        );
9233    }
9234
9235    #[test]
9236    fn test_new_default() {
9237        // Test numeric types
9238        assert_eq!(
9239            ScalarValue::new_default(&DataType::Int32).unwrap(),
9240            ScalarValue::Int32(Some(0))
9241        );
9242        assert_eq!(
9243            ScalarValue::new_default(&DataType::Float64).unwrap(),
9244            ScalarValue::Float64(Some(0.0))
9245        );
9246        assert_eq!(
9247            ScalarValue::new_default(&DataType::Boolean).unwrap(),
9248            ScalarValue::Boolean(Some(false))
9249        );
9250
9251        // Test string types
9252        assert_eq!(
9253            ScalarValue::new_default(&DataType::Utf8).unwrap(),
9254            ScalarValue::Utf8(Some("".to_string()))
9255        );
9256        assert_eq!(
9257            ScalarValue::new_default(&DataType::LargeUtf8).unwrap(),
9258            ScalarValue::LargeUtf8(Some("".to_string()))
9259        );
9260
9261        // Test binary types
9262        assert_eq!(
9263            ScalarValue::new_default(&DataType::Binary).unwrap(),
9264            ScalarValue::Binary(Some(vec![]))
9265        );
9266
9267        // Test fixed size binary
9268        assert_eq!(
9269            ScalarValue::new_default(&DataType::FixedSizeBinary(5)).unwrap(),
9270            ScalarValue::FixedSizeBinary(5, Some(vec![0, 0, 0, 0, 0]))
9271        );
9272
9273        // Test temporal types
9274        assert_eq!(
9275            ScalarValue::new_default(&DataType::Date32).unwrap(),
9276            ScalarValue::Date32(Some(0))
9277        );
9278        assert_eq!(
9279            ScalarValue::new_default(&DataType::Time32(TimeUnit::Second)).unwrap(),
9280            ScalarValue::Time32Second(Some(0))
9281        );
9282
9283        // Test decimal types
9284        assert_eq!(
9285            ScalarValue::new_default(&DataType::Decimal128(10, 2)).unwrap(),
9286            ScalarValue::Decimal128(Some(0), 10, 2)
9287        );
9288
9289        // Test list type
9290        let list_field = Field::new_list_field(DataType::Int32, true);
9291        let list_result =
9292            ScalarValue::new_default(&DataType::List(Arc::new(list_field.clone())))
9293                .unwrap();
9294        match list_result {
9295            ScalarValue::List(arr) => {
9296                assert_eq!(arr.len(), 1);
9297                assert_eq!(arr.value_length(0), 0); // empty list
9298            }
9299            _ => panic!("Expected List"),
9300        }
9301
9302        // Test struct type
9303        let struct_fields = Fields::from(vec![
9304            Field::new("a", DataType::Int32, false),
9305            Field::new("b", DataType::Utf8, false),
9306        ]);
9307        let struct_result =
9308            ScalarValue::new_default(&DataType::Struct(struct_fields.clone())).unwrap();
9309        match struct_result {
9310            ScalarValue::Struct(arr) => {
9311                assert_eq!(arr.len(), 1);
9312                assert_eq!(arr.column(0).as_primitive::<Int32Type>().value(0), 0);
9313                assert_eq!(arr.column(1).as_string::<i32>().value(0), "");
9314            }
9315            _ => panic!("Expected Struct"),
9316        }
9317
9318        // Test union type
9319        let union_fields = UnionFields::try_new(
9320            vec![0, 1],
9321            vec![
9322                Field::new("i32", DataType::Int32, false),
9323                Field::new("f64", DataType::Float64, false),
9324            ],
9325        )
9326        .unwrap();
9327        let union_result = ScalarValue::new_default(&DataType::Union(
9328            union_fields.clone(),
9329            UnionMode::Sparse,
9330        ))
9331        .unwrap();
9332        match union_result {
9333            ScalarValue::Union(Some((type_id, value)), _, _) => {
9334                assert_eq!(type_id, 0);
9335                assert_eq!(*value, ScalarValue::Int32(Some(0)));
9336            }
9337            _ => panic!("Expected Union"),
9338        }
9339    }
9340
9341    #[test]
9342    fn test_scalar_min() {
9343        // Test integer types
9344        assert_eq!(
9345            ScalarValue::min(&DataType::Int8),
9346            Some(ScalarValue::Int8(Some(i8::MIN)))
9347        );
9348        assert_eq!(
9349            ScalarValue::min(&DataType::Int32),
9350            Some(ScalarValue::Int32(Some(i32::MIN)))
9351        );
9352        assert_eq!(
9353            ScalarValue::min(&DataType::UInt8),
9354            Some(ScalarValue::UInt8(Some(0)))
9355        );
9356        assert_eq!(
9357            ScalarValue::min(&DataType::UInt64),
9358            Some(ScalarValue::UInt64(Some(0)))
9359        );
9360
9361        // Test float types
9362        assert_eq!(
9363            ScalarValue::min(&DataType::Float32),
9364            Some(ScalarValue::Float32(Some(f32::NEG_INFINITY)))
9365        );
9366        assert_eq!(
9367            ScalarValue::min(&DataType::Float64),
9368            Some(ScalarValue::Float64(Some(f64::NEG_INFINITY)))
9369        );
9370
9371        // Test decimal types
9372        let decimal_min = ScalarValue::min(&DataType::Decimal128(5, 2)).unwrap();
9373        match decimal_min {
9374            ScalarValue::Decimal128(Some(val), 5, 2) => {
9375                assert_eq!(val, -99999); // -999.99 with scale 2
9376            }
9377            _ => panic!("Expected Decimal128"),
9378        }
9379
9380        // Test temporal types
9381        assert_eq!(
9382            ScalarValue::min(&DataType::Date32),
9383            Some(ScalarValue::Date32(Some(i32::MIN)))
9384        );
9385        assert_eq!(
9386            ScalarValue::min(&DataType::Time32(TimeUnit::Second)),
9387            Some(ScalarValue::Time32Second(Some(0)))
9388        );
9389        assert_eq!(
9390            ScalarValue::min(&DataType::Timestamp(TimeUnit::Nanosecond, None)),
9391            Some(ScalarValue::TimestampNanosecond(Some(i64::MIN), None))
9392        );
9393
9394        // Test duration types
9395        assert_eq!(
9396            ScalarValue::min(&DataType::Duration(TimeUnit::Second)),
9397            Some(ScalarValue::DurationSecond(Some(i64::MIN)))
9398        );
9399
9400        // Test unsupported types
9401        assert_eq!(ScalarValue::min(&DataType::Utf8), None);
9402        assert_eq!(ScalarValue::min(&DataType::Binary), None);
9403        assert_eq!(
9404            ScalarValue::min(&DataType::List(Arc::new(Field::new(
9405                "item",
9406                DataType::Int32,
9407                true
9408            )))),
9409            None
9410        );
9411    }
9412
9413    #[test]
9414    fn test_scalar_max() {
9415        // Test integer types
9416        assert_eq!(
9417            ScalarValue::max(&DataType::Int8),
9418            Some(ScalarValue::Int8(Some(i8::MAX)))
9419        );
9420        assert_eq!(
9421            ScalarValue::max(&DataType::Int32),
9422            Some(ScalarValue::Int32(Some(i32::MAX)))
9423        );
9424        assert_eq!(
9425            ScalarValue::max(&DataType::UInt8),
9426            Some(ScalarValue::UInt8(Some(u8::MAX)))
9427        );
9428        assert_eq!(
9429            ScalarValue::max(&DataType::UInt64),
9430            Some(ScalarValue::UInt64(Some(u64::MAX)))
9431        );
9432
9433        // Test float types
9434        assert_eq!(
9435            ScalarValue::max(&DataType::Float32),
9436            Some(ScalarValue::Float32(Some(f32::INFINITY)))
9437        );
9438        assert_eq!(
9439            ScalarValue::max(&DataType::Float64),
9440            Some(ScalarValue::Float64(Some(f64::INFINITY)))
9441        );
9442
9443        // Test decimal types
9444        let decimal_max = ScalarValue::max(&DataType::Decimal128(5, 2)).unwrap();
9445        match decimal_max {
9446            ScalarValue::Decimal128(Some(val), 5, 2) => {
9447                assert_eq!(val, 99999); // 999.99 with scale 2
9448            }
9449            _ => panic!("Expected Decimal128"),
9450        }
9451
9452        // Test temporal types
9453        assert_eq!(
9454            ScalarValue::max(&DataType::Date32),
9455            Some(ScalarValue::Date32(Some(i32::MAX)))
9456        );
9457        assert_eq!(
9458            ScalarValue::max(&DataType::Time32(TimeUnit::Second)),
9459            Some(ScalarValue::Time32Second(Some(86_399))) // 23:59:59
9460        );
9461        assert_eq!(
9462            ScalarValue::max(&DataType::Time64(TimeUnit::Microsecond)),
9463            Some(ScalarValue::Time64Microsecond(Some(86_399_999_999))) // 23:59:59.999999
9464        );
9465        assert_eq!(
9466            ScalarValue::max(&DataType::Timestamp(TimeUnit::Nanosecond, None)),
9467            Some(ScalarValue::TimestampNanosecond(Some(i64::MAX), None))
9468        );
9469
9470        // Test duration types
9471        assert_eq!(
9472            ScalarValue::max(&DataType::Duration(TimeUnit::Millisecond)),
9473            Some(ScalarValue::DurationMillisecond(Some(i64::MAX)))
9474        );
9475
9476        // Test unsupported types
9477        assert_eq!(ScalarValue::max(&DataType::Utf8), None);
9478        assert_eq!(ScalarValue::max(&DataType::Binary), None);
9479        assert_eq!(
9480            ScalarValue::max(&DataType::Struct(Fields::from(vec![Field::new(
9481                "field",
9482                DataType::Int32,
9483                true
9484            )]))),
9485            None
9486        );
9487    }
9488
9489    #[test]
9490    fn test_min_max_float16() {
9491        // Test Float16 min and max
9492        let min_f16 = ScalarValue::min(&DataType::Float16).unwrap();
9493        match min_f16 {
9494            ScalarValue::Float16(Some(val)) => {
9495                assert_eq!(val, f16::NEG_INFINITY);
9496            }
9497            _ => panic!("Expected Float16"),
9498        }
9499
9500        let max_f16 = ScalarValue::max(&DataType::Float16).unwrap();
9501        match max_f16 {
9502            ScalarValue::Float16(Some(val)) => {
9503                assert_eq!(val, f16::INFINITY);
9504            }
9505            _ => panic!("Expected Float16"),
9506        }
9507    }
9508
9509    #[test]
9510    fn test_new_default_interval() {
9511        // Test all interval types
9512        assert_eq!(
9513            ScalarValue::new_default(&DataType::Interval(IntervalUnit::YearMonth))
9514                .unwrap(),
9515            ScalarValue::IntervalYearMonth(Some(0))
9516        );
9517        assert_eq!(
9518            ScalarValue::new_default(&DataType::Interval(IntervalUnit::DayTime)).unwrap(),
9519            ScalarValue::IntervalDayTime(Some(IntervalDayTime::ZERO))
9520        );
9521        assert_eq!(
9522            ScalarValue::new_default(&DataType::Interval(IntervalUnit::MonthDayNano))
9523                .unwrap(),
9524            ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::ZERO))
9525        );
9526    }
9527
9528    #[test]
9529    fn test_min_max_with_timezone() {
9530        let tz = Some(Arc::from("UTC"));
9531
9532        // Test timestamp with timezone
9533        let min_ts =
9534            ScalarValue::min(&DataType::Timestamp(TimeUnit::Second, tz.clone())).unwrap();
9535        match min_ts {
9536            ScalarValue::TimestampSecond(Some(val), Some(tz_str)) => {
9537                assert_eq!(val, i64::MIN);
9538                assert_eq!(tz_str.as_ref(), "UTC");
9539            }
9540            _ => panic!("Expected TimestampSecond with timezone"),
9541        }
9542
9543        let max_ts =
9544            ScalarValue::max(&DataType::Timestamp(TimeUnit::Millisecond, tz.clone()))
9545                .unwrap();
9546        match max_ts {
9547            ScalarValue::TimestampMillisecond(Some(val), Some(tz_str)) => {
9548                assert_eq!(val, i64::MAX);
9549                assert_eq!(tz_str.as_ref(), "UTC");
9550            }
9551            _ => panic!("Expected TimestampMillisecond with timezone"),
9552        }
9553    }
9554
9555    #[test]
9556    fn test_views_minimize_memory() {
9557        let value = "this string is longer than 12 bytes".to_string();
9558
9559        let scalar = ScalarValue::Utf8View(Some(value.clone()));
9560        let array = scalar.to_array_of_size(10).unwrap();
9561        let array = array.as_string_view();
9562        let buffers = array.data_buffers();
9563        assert_eq!(1, buffers.len());
9564        // Ensure we only have a single copy of the value string
9565        assert_eq!(value.len(), buffers[0].len());
9566
9567        // Same but for BinaryView
9568        let scalar = ScalarValue::BinaryView(Some(value.bytes().collect()));
9569        let array = scalar.to_array_of_size(10).unwrap();
9570        let array = array.as_binary_view();
9571        let buffers = array.data_buffers();
9572        assert_eq!(1, buffers.len());
9573        assert_eq!(value.len(), buffers[0].len());
9574    }
9575
9576    #[test]
9577    fn test_to_array_of_size_run_end_encoded() {
9578        fn run_test<R: RunEndIndexType>() {
9579            let value = Box::new(ScalarValue::Float32(Some(1.0)));
9580            let size = 5;
9581            let scalar = ScalarValue::RunEndEncoded(
9582                Field::new("run_ends", R::DATA_TYPE, false).into(),
9583                Field::new("values", DataType::Float32, true).into(),
9584                value.clone(),
9585            );
9586            let array = scalar.to_array_of_size(size).unwrap();
9587            let array = array.as_run::<R>();
9588            let array = array.downcast::<Float32Array>().unwrap();
9589            assert_eq!(vec![Some(1.0); size], array.into_iter().collect::<Vec<_>>());
9590            assert_eq!(1, array.values().len());
9591        }
9592
9593        run_test::<Int16Type>();
9594        run_test::<Int32Type>();
9595        run_test::<Int64Type>();
9596
9597        let scalar = ScalarValue::RunEndEncoded(
9598            Field::new("run_ends", DataType::Int16, false).into(),
9599            Field::new("values", DataType::Float32, true).into(),
9600            Box::new(ScalarValue::Float32(Some(1.0))),
9601        );
9602        let err = scalar.to_array_of_size(i16::MAX as usize + 10).unwrap_err();
9603        assert_eq!(
9604            "Execution error: Cannot construct RunArray of size 32777: Overflows run-ends type Int16",
9605            err.to_string()
9606        )
9607    }
9608
9609    #[test]
9610    fn test_eq_array_run_end_encoded() {
9611        let run_ends = Int16Array::from(vec![1, 3]);
9612        let values = Float32Array::from(vec![None, Some(1.0)]);
9613        let run_array =
9614            Arc::new(RunArray::try_new(&run_ends, &values).unwrap()) as ArrayRef;
9615
9616        let scalar = ScalarValue::RunEndEncoded(
9617            Field::new("run_ends", DataType::Int16, false).into(),
9618            Field::new("values", DataType::Float32, true).into(),
9619            Box::new(ScalarValue::Float32(None)),
9620        );
9621        assert!(scalar.eq_array(&run_array, 0).unwrap());
9622
9623        let scalar = ScalarValue::RunEndEncoded(
9624            Field::new("run_ends", DataType::Int16, false).into(),
9625            Field::new("values", DataType::Float32, true).into(),
9626            Box::new(ScalarValue::Float32(Some(1.0))),
9627        );
9628        assert!(scalar.eq_array(&run_array, 1).unwrap());
9629        assert!(scalar.eq_array(&run_array, 2).unwrap());
9630
9631        // value types must match
9632        let scalar = ScalarValue::RunEndEncoded(
9633            Field::new("run_ends", DataType::Int16, false).into(),
9634            Field::new("values", DataType::Float64, true).into(),
9635            Box::new(ScalarValue::Float64(Some(1.0))),
9636        );
9637        let err = scalar.eq_array(&run_array, 1).unwrap_err();
9638        let expected = "Internal error: could not cast array of type Float32 to arrow_array::array::primitive_array::PrimitiveArray<arrow_array::types::Float64Type>";
9639        assert!(err.to_string().starts_with(expected));
9640
9641        // run ends type must match
9642        let scalar = ScalarValue::RunEndEncoded(
9643            Field::new("run_ends", DataType::Int32, false).into(),
9644            Field::new("values", DataType::Float32, true).into(),
9645            Box::new(ScalarValue::Float32(None)),
9646        );
9647        let err = scalar.eq_array(&run_array, 0).unwrap_err();
9648        let expected = "Internal error: could not cast array of type RunEndEncoded(\"run_ends\": non-null Int16, \"values\": Float32) to arrow_array::array::run_array::RunArray<arrow_array::types::Int32Type>";
9649        assert!(err.to_string().starts_with(expected));
9650    }
9651
9652    #[test]
9653    fn test_iter_to_array_run_end_encoded() {
9654        let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int16, false));
9655        let values_field = Arc::new(Field::new("values", DataType::Int64, true));
9656        let scalars = vec![
9657            ScalarValue::RunEndEncoded(
9658                Arc::clone(&run_ends_field),
9659                Arc::clone(&values_field),
9660                Box::new(ScalarValue::Int64(Some(1))),
9661            ),
9662            ScalarValue::RunEndEncoded(
9663                Arc::clone(&run_ends_field),
9664                Arc::clone(&values_field),
9665                Box::new(ScalarValue::Int64(Some(1))),
9666            ),
9667            ScalarValue::RunEndEncoded(
9668                Arc::clone(&run_ends_field),
9669                Arc::clone(&values_field),
9670                Box::new(ScalarValue::Int64(None)),
9671            ),
9672            ScalarValue::RunEndEncoded(
9673                Arc::clone(&run_ends_field),
9674                Arc::clone(&values_field),
9675                Box::new(ScalarValue::Int64(Some(2))),
9676            ),
9677            ScalarValue::RunEndEncoded(
9678                Arc::clone(&run_ends_field),
9679                Arc::clone(&values_field),
9680                Box::new(ScalarValue::Int64(Some(2))),
9681            ),
9682            ScalarValue::RunEndEncoded(
9683                Arc::clone(&run_ends_field),
9684                Arc::clone(&values_field),
9685                Box::new(ScalarValue::Int64(Some(2))),
9686            ),
9687        ];
9688
9689        let run_array = ScalarValue::iter_to_array(scalars).unwrap();
9690        let expected = RunArray::try_new(
9691            &Int16Array::from(vec![2, 3, 6]),
9692            &Int64Array::from(vec![Some(1), None, Some(2)]),
9693        )
9694        .unwrap();
9695        assert_eq!(&expected as &dyn Array, run_array.as_ref());
9696
9697        // inconsistent run-ends type
9698        let scalars = vec![
9699            ScalarValue::RunEndEncoded(
9700                Arc::clone(&run_ends_field),
9701                Arc::clone(&values_field),
9702                Box::new(ScalarValue::Int64(Some(1))),
9703            ),
9704            ScalarValue::RunEndEncoded(
9705                Field::new("run_ends", DataType::Int32, false).into(),
9706                Arc::clone(&values_field),
9707                Box::new(ScalarValue::Int64(Some(1))),
9708            ),
9709        ];
9710        let err = ScalarValue::iter_to_array(scalars).unwrap_err();
9711        let expected = "Execution error: Expected RunEndEncoded scalar with run-ends field Field { \"run_ends\": Int16 } but got: RunEndEncoded(Field { name: \"run_ends\", data_type: Int32 }, Field { name: \"values\", data_type: Int64, nullable: true }, Int64(1))";
9712        assert!(err.to_string().starts_with(expected));
9713
9714        // inconsistent value type
9715        let scalars = vec![
9716            ScalarValue::RunEndEncoded(
9717                Arc::clone(&run_ends_field),
9718                Arc::clone(&values_field),
9719                Box::new(ScalarValue::Int64(Some(1))),
9720            ),
9721            ScalarValue::RunEndEncoded(
9722                Arc::clone(&run_ends_field),
9723                Field::new("values", DataType::Int32, true).into(),
9724                Box::new(ScalarValue::Int32(Some(1))),
9725            ),
9726        ];
9727        let err = ScalarValue::iter_to_array(scalars).unwrap_err();
9728        let expected = "Execution error: Expected RunEndEncoded scalar with run-ends field Field { \"run_ends\": Int16 } but got: RunEndEncoded(Field { name: \"run_ends\", data_type: Int16 }, Field { name: \"values\", data_type: Int32, nullable: true }, Int32(1))";
9729        assert!(err.to_string().starts_with(expected));
9730
9731        // inconsistent scalars type
9732        let scalars = vec![
9733            ScalarValue::RunEndEncoded(
9734                Arc::clone(&run_ends_field),
9735                Arc::clone(&values_field),
9736                Box::new(ScalarValue::Int64(Some(1))),
9737            ),
9738            ScalarValue::Int64(Some(1)),
9739        ];
9740        let err = ScalarValue::iter_to_array(scalars).unwrap_err();
9741        let expected = "Execution error: Expected RunEndEncoded scalar with run-ends field Field { \"run_ends\": Int16 } but got: Int64(1)";
9742        assert!(err.to_string().starts_with(expected));
9743    }
9744
9745    #[test]
9746    fn test_convert_array_to_scalar_vec() {
9747        // 1: Regular ListArray
9748        let list = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
9749            Some(vec![Some(1), Some(2)]),
9750            None,
9751            Some(vec![Some(3), None, Some(4)]),
9752        ]);
9753        let converted = ScalarValue::convert_array_to_scalar_vec(&list).unwrap();
9754        assert_eq!(
9755            converted,
9756            vec![
9757                Some(vec![
9758                    ScalarValue::Int64(Some(1)),
9759                    ScalarValue::Int64(Some(2))
9760                ]),
9761                None,
9762                Some(vec![
9763                    ScalarValue::Int64(Some(3)),
9764                    ScalarValue::Int64(None),
9765                    ScalarValue::Int64(Some(4))
9766                ]),
9767            ]
9768        );
9769
9770        // 2: Regular LargeListArray
9771        let large_list = LargeListArray::from_iter_primitive::<Int64Type, _, _>(vec![
9772            Some(vec![Some(1), Some(2)]),
9773            None,
9774            Some(vec![Some(3), None, Some(4)]),
9775        ]);
9776        let converted = ScalarValue::convert_array_to_scalar_vec(&large_list).unwrap();
9777        assert_eq!(
9778            converted,
9779            vec![
9780                Some(vec![
9781                    ScalarValue::Int64(Some(1)),
9782                    ScalarValue::Int64(Some(2))
9783                ]),
9784                None,
9785                Some(vec![
9786                    ScalarValue::Int64(Some(3)),
9787                    ScalarValue::Int64(None),
9788                    ScalarValue::Int64(Some(4))
9789                ]),
9790            ]
9791        );
9792
9793        // 3: Funky (null slot has non-zero list offsets)
9794        // Offsets + Values looks like this: [[1, 2], [3, 4], [5]]
9795        // But with NullBuffer it's like this: [[1, 2], NULL, [5]]
9796        let funky = ListArray::new(
9797            Field::new_list_field(DataType::Int64, true).into(),
9798            OffsetBuffer::new(vec![0, 2, 4, 5].into()),
9799            Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
9800            Some(NullBuffer::from(vec![true, false, true])),
9801        );
9802        let converted = ScalarValue::convert_array_to_scalar_vec(&funky).unwrap();
9803        assert_eq!(
9804            converted,
9805            vec![
9806                Some(vec![
9807                    ScalarValue::Int64(Some(1)),
9808                    ScalarValue::Int64(Some(2))
9809                ]),
9810                None,
9811                Some(vec![ScalarValue::Int64(Some(5))]),
9812            ]
9813        );
9814
9815        // 4: Offsets + Values looks like this: [[1, 2], [], [5]]
9816        // But with NullBuffer it's like this: [[1, 2], NULL, [5]]
9817        // The converted result is: [[1, 2], None, [5]]
9818        let array4 = ListArray::new(
9819            Field::new_list_field(DataType::Int64, true).into(),
9820            OffsetBuffer::new(vec![0, 2, 2, 5].into()),
9821            Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
9822            Some(NullBuffer::from(vec![true, false, true])),
9823        );
9824        let converted = ScalarValue::convert_array_to_scalar_vec(&array4).unwrap();
9825        assert_eq!(
9826            converted,
9827            vec![
9828                Some(vec![
9829                    ScalarValue::Int64(Some(1)),
9830                    ScalarValue::Int64(Some(2))
9831                ]),
9832                None,
9833                Some(vec![
9834                    ScalarValue::Int64(Some(3)),
9835                    ScalarValue::Int64(Some(4)),
9836                    ScalarValue::Int64(Some(5)),
9837                ]),
9838            ]
9839        );
9840
9841        // 5: Offsets + Values looks like this: [[1, 2], [], [5]]
9842        // Same as 4, but the middle array is not null, so after conversion it's empty.
9843        let array5 = ListArray::new(
9844            Field::new_list_field(DataType::Int64, true).into(),
9845            OffsetBuffer::new(vec![0, 2, 2, 5].into()),
9846            Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6])),
9847            Some(NullBuffer::from(vec![true, true, true])),
9848        );
9849        let converted = ScalarValue::convert_array_to_scalar_vec(&array5).unwrap();
9850        assert_eq!(
9851            converted,
9852            vec![
9853                Some(vec![
9854                    ScalarValue::Int64(Some(1)),
9855                    ScalarValue::Int64(Some(2))
9856                ]),
9857                Some(vec![]),
9858                Some(vec![
9859                    ScalarValue::Int64(Some(3)),
9860                    ScalarValue::Int64(Some(4)),
9861                    ScalarValue::Int64(Some(5)),
9862                ]),
9863            ]
9864        );
9865    }
9866}